Commit 72257494 authored by Vishnu Banna's avatar Vishnu Banna
Browse files

classification task fix

parent 842cdd4d
......@@ -51,6 +51,8 @@ def build_classification_model(
norm_activation_config=norm_activation_config,
l2_regularizer=l2_regularizer)
print(backbone)
model = classification_model.ClassificationModel(
backbone=backbone,
num_classes=model_config.num_classes,
......
......@@ -29,13 +29,14 @@ from official.vision.beta.projects.yolo.configs import backbones
@dataclasses.dataclass
class ImageClassificationModel(hyperparams.Config):
num_classes: int = 0
input_size: List[int] = dataclasses.field(default_factory=list)
input_size: List[int] = dataclasses.field(default_factory=lambda:[224, 224])
backbone: backbones.Backbone = backbones.Backbone(
type='darknet', darknet=backbones.Darknet())
dropout_rate: float = 0.0
norm_activation: common.NormActivation = common.NormActivation()
# Adds a Batch Normalization layer pre-GlobalAveragePooling in classification.
add_head_batch_norm: bool = False
kernel_initializer: str = 'VarianceScaling'
@dataclasses.dataclass
......@@ -56,7 +57,6 @@ class ImageClassificationTask(cfg.TaskConfig):
gradient_clip_norm: float = 0.0
logging_dir: Optional[str] = None
@exp_factory.register_config_factory('darknet_classification')
def darknet_classification() -> cfg.ExperimentConfig:
"""Image classification general."""
......@@ -67,3 +67,4 @@ def darknet_classification() -> cfg.ExperimentConfig:
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
"""Classification parser."""
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Import libraries
"""Classification decoder and parser."""
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_addons as tfa
from official.vision.beta.dataloaders import parser
from official.vision.beta.dataloaders import classification_input
from official.vision.beta.ops import preprocess_ops
from official.vision.beta.ops import augment
class Parser(parser.Parser):
class Parser(classification_input.Parser):
"""Parser to parse an image and its annotations into a dictionary of tensors."""
def __init__(self,
output_size,
aug_policy,
scale=[128, 448],
dtype='float32'):
"""Initializes parameters for parsing annotations in the dataset.
Args:
output_size: `Tensor` or `list` for [height, width] of output image. The
output_size should be divided by the largest feature stride 2^max_level.
num_classes: `float`, number of classes.
aug_policy: An optional Augmentation object to choose from AutoAugment and
RandAugment.
scale: A `List[int]`, minimum and maximum image shape range.
dtype: `str`, cast output image in dtype. It can be 'float32', 'float16',
or 'bfloat16'.
"""
self._output_size = output_size
if aug_policy:
if aug_policy == 'autoaug':
self._augmenter = augment.AutoAugment()
elif aug_policy == 'randaug':
self._augmenter = augment.RandAugment(num_layers=2, magnitude=20)
else:
raise ValueError(
'Augmentation policy {} not supported.'.format(aug_policy))
else:
self._augmenter = None
self._scale = scale
if dtype == 'float32':
self._dtype = tf.float32
elif dtype == 'float16':
self._dtype = tf.float16
elif dtype == 'bfloat16':
self._dtype = tf.bfloat16
else:
raise ValueError('dtype {!r} is not supported!'.format(dtype))
def _parse_train_data(self, decoded_tensors):
"""Generates images and labels that are usable for model training.
Args:
decoded_tensors: a dict of Tensors produced by the decoder.
Returns:
images: the image tensor.
labels: a dict of Tensors that contains labels.
"""
image = tf.io.decode_image(decoded_tensors['image/encoded'])
image.set_shape((None, None, 3))
image = tf.image.resize_with_pad(
image,
target_width=self._output_size[0],
target_height=self._output_size[1])
scale = tf.random.uniform([],
minval=self._scale[0],
maxval=self._scale[1],
dtype=tf.int32)
if scale > self._output_size[0]:
image = tf.image.resize_with_crop_or_pad(
image, target_height=scale, target_width=scale)
def _parse_train_image(self, decoded_tensors):
"""Parses image data for training."""
image_bytes = decoded_tensors[self._image_field_key]
if self._decode_jpeg_only:
image_shape = tf.image.extract_jpeg_shape(image_bytes)
# Crops image.
cropped_image = preprocess_ops.random_crop_image_v2(
image_bytes, image_shape)
image = tf.cond(
tf.reduce_all(tf.equal(tf.shape(cropped_image), image_shape)),
lambda: preprocess_ops.center_crop_image_v2(image_bytes, image_shape),
lambda: cropped_image)
else:
image = tf.image.random_crop(image, (scale, scale, 3))
# Decodes image.
image = tf.io.decode_image(image_bytes, channels=3)
image.set_shape([None, None, 3])
# Crops image.
cropped_image = preprocess_ops.random_crop_image(image)
image = tf.cond(
tf.reduce_all(tf.equal(tf.shape(cropped_image), tf.shape(image))),
lambda: preprocess_ops.center_crop_image(image),
lambda: cropped_image)
if self._aug_rand_hflip:
image = tf.image.random_flip_left_right(image)
# Resizes image.
image = tf.image.resize(
image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
image.set_shape([self._output_size[0], self._output_size[1], 3])
# Apply autoaug or randaug.
if self._augmenter is not None:
image = self._augmenter.distort(image)
image = tf.image.random_flip_left_right(image)
image = tf.cast(image, tf.float32) / 255
image = tf.image.resize(image, (self._output_size[0], self._output_size[1]))
label = decoded_tensors['image/class/label']
return image, label
def _parse_eval_data(self, decoded_tensors):
"""Generates images and labels that are usable for model evaluation.
Args:
decoded_tensors: a dict of Tensors produced by the decoder.
Returns:
images: the image tensor.
labels: a dict of Tensors that contains labels.
"""
image = tf.io.decode_image(decoded_tensors['image/encoded'])
image.set_shape((None, None, 3))
image = tf.cast(image, tf.float32)
image = tf.image.resize_with_pad(
image,
target_width=self._output_size[0],
target_height=self._output_size[1]) # Final Output Shape
image = image / 255. # Normalize
#label = tf.one_hot(decoded_tensors['image/class/label'], self._num_classes)
label = decoded_tensors['image/class/label']
return image, label
# Convert image to self._dtype.
image = tf.image.convert_image_dtype(image, self._dtype)
image = image/255.0
return image
def _parse_eval_image(self, decoded_tensors):
"""Parses image data for evaluation."""
image_bytes = decoded_tensors[self._image_field_key]
if self._decode_jpeg_only:
image_shape = tf.image.extract_jpeg_shape(image_bytes)
# Center crops.
image = preprocess_ops.center_crop_image_v2(image_bytes, image_shape)
else:
# Decodes image.
image = tf.io.decode_image(image_bytes, channels=3)
image.set_shape([None, None, 3])
# Center crops.
image = preprocess_ops.center_crop_image(image)
image = tf.image.resize(
image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
image.set_shape([self._output_size[0], self._output_size[1], 3])
# Convert image to self._dtype.
image = tf.image.convert_image_dtype(image, self._dtype)
image = image/255.0
return image
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -11,19 +12,22 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
# ==============================================================================
"""Image classification task definition."""
import tensorflow as tf
from official.core import input_reader
from official.core import task_factory
from official.vision.beta.dataloaders import classification_input
from official.vision.beta.projects.yolo.configs import darknet_classification as exp_cfg
from official.vision.beta.projects.yolo.dataloaders import classification_tfds_decoder as cli
from official.common import dataset_fn
from official.vision.beta.dataloaders import input_reader_factory
from official.vision.beta.dataloaders import tfds_factory
from official.vision.beta.dataloaders import classification_input as classification_input_base
from official.vision.beta.projects.yolo.dataloaders import classification_input
from official.vision.beta.tasks import image_classification
@task_factory.register_task_cls(exp_cfg.ImageClassificationTask)
class ImageClassificationTask(image_classification.ImageClassificationTask):
"""A task for image classification."""
......@@ -33,82 +37,33 @@ class ImageClassificationTask(image_classification.ImageClassificationTask):
num_classes = self.task_config.model.num_classes
input_size = self.task_config.model.input_size
image_field_key = self.task_config.train_data.image_field_key
label_field_key = self.task_config.train_data.label_field_key
is_multilabel = self.task_config.train_data.is_multilabel
if params.tfds_name:
decoder = cli.Decoder()
decoder = tfds_factory.get_classification_decoder(params.tfds_name)
else:
decoder = classification_input.Decoder()
decoder = classification_input_base.Decoder(
image_field_key=image_field_key, label_field_key=label_field_key,
is_multilabel=is_multilabel)
parser = classification_input.Parser(
output_size=input_size[:2],
num_classes=num_classes,
image_field_key=image_field_key,
label_field_key=label_field_key,
decode_jpeg_only=params.decode_jpeg_only,
aug_rand_hflip=params.aug_rand_hflip,
aug_type=params.aug_type,
is_multilabel=is_multilabel,
dtype=params.dtype)
reader = input_reader.InputReader(
reader = input_reader_factory.input_reader_generator(
params,
dataset_fn=tf.data.TFRecordDataset,
dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
decoder_fn=decoder.decode,
parser_fn=parser.parse_fn(params.is_training))
dataset = reader.read(input_context=input_context)
return dataset
def train_step(self, inputs, model, optimizer, metrics=None):
"""Does forward and backward.
Args:
inputs: a dictionary of input tensors.
model: the model, forward pass definition.
optimizer: the optimizer for this training step.
metrics: a nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
features, labels = inputs
if self.task_config.losses.one_hot:
labels = tf.one_hot(labels, self.task_config.model.num_classes)
num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
with tf.GradientTape() as tape:
outputs = model(features, training=True)
# Casting output layer as float32 is necessary when mixed_precision is
# mixed_float16 or mixed_bfloat16 to ensure output is casted as float32.
outputs = tf.nest.map_structure(
lambda x: tf.cast(x, tf.float32), outputs)
# Computes per-replica loss.
loss = self.build_losses(
model_outputs=outputs, labels=labels, aux_losses=model.losses)
# Scales loss as the default gradients allreduce performs sum inside the
# optimizer.
scaled_loss = loss / num_replicas
# For mixed_precision policy, when LossScaleOptimizer is used, loss is
# scaled for numerical stability.
if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
scaled_loss = optimizer.get_scaled_loss(scaled_loss)
tvars = model.trainable_variables
grads = tape.gradient(scaled_loss, tvars)
# Scales back gradient before apply_gradients when LossScaleOptimizer is
# used.
if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
grads = optimizer.get_unscaled_gradients(grads)
# Apply gradient clipping.
if self.task_config.gradient_clip_norm > 0:
grads, _ = tf.clip_by_global_norm(
grads, self.task_config.gradient_clip_norm)
optimizer.apply_gradients(list(zip(grads, tvars)))
logs = {self.loss: loss}
if metrics:
self.process_metrics(metrics, labels, outputs)
logs.update({m.name: m.result() for m in metrics})
elif model.compiled_metrics:
self.process_compiled_metrics(model.compiled_metrics, labels, outputs)
logs.update({m.name: m.result() for m in model.metrics})
return logs
return dataset
\ No newline at end of file
runtime:
all_reduce_alg: null
batchnorm_spatial_persistent: false
dataset_num_private_threads: null
default_shard_dim: -1
distribution_strategy: mirrored
enable_xla: false
gpu_thread_mode: null
loss_scale: dynamic
mixed_precision_dtype: float16
num_cores_per_replica: 1
num_gpus: 2
num_packs: 1
per_gpu_thread_count: 0
run_eagerly: false
task_index: -1
tpu: null
tpu_enable_xla_dynamic_padder: null
worker_hosts: null
task:
evaluation:
top_k: 5
gradient_clip_norm: 0.0
init_checkpoint: ''
logging_dir: null
losses:
l2_weight_decay: 0.0005
label_smoothing: 0.0
one_hot: true
model:
add_head_batch_norm: false
backbone:
darknet:
depth_scale: 1.0
dilate: false
max_level: 5
min_level: 3
model_id: darknet53
use_reorg_input: false
use_separable_conv: false
width_scale: 1.0
type: darknet
dropout_rate: 0.0
input_size: [256, 256, 3]
kernel_initializer: VarianceScaling
norm_activation:
activation: mish
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 1001
name: null
train_data:
aug_policy: null
aug_rand_hflip: true
aug_type: null
block_length: 1
cache: false
color_jitter: 0.0
cycle_length: 10
decode_jpeg_only: true
decoder:
simple_decoder:
mask_binarize_threshold: null
regenerate_source_id: false
type: simple_decoder
deterministic: null
drop_remainder: true
dtype: float16
enable_tf_data_service: false
file_type: tfrecord
global_batch_size: 16
image_field_key: image/encoded
input_path: ''
is_multilabel: false
is_training: true
label_field_key: image/class/label
mixup_and_cutmix: null
randaug_magnitude: 10
random_erasing: null
seed: null
sharding: true
shuffle_buffer_size: 100
tf_data_service_address: null
tf_data_service_job_name: null
tfds_as_supervised: false
tfds_data_dir: ~/tensorflow_datasets/
tfds_name: imagenet2012
tfds_skip_decoding_feature: ''
tfds_split: train
validation_data:
aug_policy: null
aug_rand_hflip: true
aug_type: null
block_length: 1
cache: false
color_jitter: 0.0
cycle_length: 10
decode_jpeg_only: true
decoder:
simple_decoder:
mask_binarize_threshold: null
regenerate_source_id: false
type: simple_decoder
deterministic: null
drop_remainder: false
dtype: float16
enable_tf_data_service: false
file_type: tfrecord
global_batch_size: 16
image_field_key: image/encoded
input_path: ''
is_multilabel: false
is_training: true
label_field_key: image/class/label
mixup_and_cutmix: null
randaug_magnitude: 10
random_erasing: null
seed: null
sharding: true
shuffle_buffer_size: 100
tf_data_service_address: null
tf_data_service_job_name: null
tfds_as_supervised: false
tfds_data_dir: ~/tensorflow_datasets/
tfds_name: imagenet2012
tfds_skip_decoding_feature: ''
tfds_split: validation
trainer:
allow_tpu_summary: false
best_checkpoint_eval_metric: ''
best_checkpoint_export_subdir: ''
best_checkpoint_metric_comp: higher
checkpoint_interval: 10000
continuous_eval_timeout: 3600
eval_tf_function: true
eval_tf_while_loop: false
loss_upper_bound: 1000000.0
max_to_keep: 5
optimizer_config:
ema: null
learning_rate:
polynomial:
cycle: false
decay_steps: 6392000
end_learning_rate: 1.25e-05
initial_learning_rate: 0.0125
name: PolynomialDecay
offset: 0
power: 4.0
type: polynomial
optimizer:
sgd:
clipnorm: null
clipvalue: null
decay: 0.0
global_clipnorm: null
momentum: 0.9
name: SGD
nesterov: false
type: sgd
warmup:
linear:
name: linear
warmup_learning_rate: 0
warmup_steps: 8000
type: linear
recovery_begin_steps: 0
recovery_max_trials: 0
steps_per_loop: 10000
summary_interval: 10000
train_steps: 6400000
train_tf_function: true
train_tf_while_loop: true
validation_interval: 10000
validation_steps: 3200
validation_summary_subdir: validation
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment