Commit 72257494 authored by Vishnu Banna's avatar Vishnu Banna
Browse files

classification task fix

parent 842cdd4d
...@@ -51,6 +51,8 @@ def build_classification_model( ...@@ -51,6 +51,8 @@ def build_classification_model(
norm_activation_config=norm_activation_config, norm_activation_config=norm_activation_config,
l2_regularizer=l2_regularizer) l2_regularizer=l2_regularizer)
print(backbone)
model = classification_model.ClassificationModel( model = classification_model.ClassificationModel(
backbone=backbone, backbone=backbone,
num_classes=model_config.num_classes, num_classes=model_config.num_classes,
......
...@@ -29,13 +29,14 @@ from official.vision.beta.projects.yolo.configs import backbones ...@@ -29,13 +29,14 @@ from official.vision.beta.projects.yolo.configs import backbones
@dataclasses.dataclass @dataclasses.dataclass
class ImageClassificationModel(hyperparams.Config): class ImageClassificationModel(hyperparams.Config):
num_classes: int = 0 num_classes: int = 0
input_size: List[int] = dataclasses.field(default_factory=list) input_size: List[int] = dataclasses.field(default_factory=lambda:[224, 224])
backbone: backbones.Backbone = backbones.Backbone( backbone: backbones.Backbone = backbones.Backbone(
type='darknet', darknet=backbones.Darknet()) type='darknet', darknet=backbones.Darknet())
dropout_rate: float = 0.0 dropout_rate: float = 0.0
norm_activation: common.NormActivation = common.NormActivation() norm_activation: common.NormActivation = common.NormActivation()
# Adds a Batch Normalization layer pre-GlobalAveragePooling in classification. # Adds a Batch Normalization layer pre-GlobalAveragePooling in classification.
add_head_batch_norm: bool = False add_head_batch_norm: bool = False
kernel_initializer: str = 'VarianceScaling'
@dataclasses.dataclass @dataclasses.dataclass
...@@ -56,7 +57,6 @@ class ImageClassificationTask(cfg.TaskConfig): ...@@ -56,7 +57,6 @@ class ImageClassificationTask(cfg.TaskConfig):
gradient_clip_norm: float = 0.0 gradient_clip_norm: float = 0.0
logging_dir: Optional[str] = None logging_dir: Optional[str] = None
@exp_factory.register_config_factory('darknet_classification') @exp_factory.register_config_factory('darknet_classification')
def darknet_classification() -> cfg.ExperimentConfig: def darknet_classification() -> cfg.ExperimentConfig:
"""Image classification general.""" """Image classification general."""
...@@ -67,3 +67,4 @@ def darknet_classification() -> cfg.ExperimentConfig: ...@@ -67,3 +67,4 @@ def darknet_classification() -> cfg.ExperimentConfig:
'task.train_data.is_training != None', 'task.train_data.is_training != None',
'task.validation_data.is_training != None' 'task.validation_data.is_training != None'
]) ])
"""Classification parser.""" # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Import libraries """Classification decoder and parser."""
import tensorflow as tf import tensorflow as tf
import tensorflow_datasets as tfds from official.vision.beta.dataloaders import classification_input
import tensorflow_addons as tfa
from official.vision.beta.dataloaders import parser
from official.vision.beta.ops import preprocess_ops from official.vision.beta.ops import preprocess_ops
from official.vision.beta.ops import augment
class Parser(parser.Parser): class Parser(classification_input.Parser):
"""Parser to parse an image and its annotations into a dictionary of tensors.""" """Parser to parse an image and its annotations into a dictionary of tensors."""
def __init__(self, def _parse_train_image(self, decoded_tensors):
output_size, """Parses image data for training."""
aug_policy, image_bytes = decoded_tensors[self._image_field_key]
scale=[128, 448],
dtype='float32'): if self._decode_jpeg_only:
"""Initializes parameters for parsing annotations in the dataset. image_shape = tf.image.extract_jpeg_shape(image_bytes)
Args: # Crops image.
output_size: `Tensor` or `list` for [height, width] of output image. The cropped_image = preprocess_ops.random_crop_image_v2(
output_size should be divided by the largest feature stride 2^max_level. image_bytes, image_shape)
num_classes: `float`, number of classes. image = tf.cond(
aug_policy: An optional Augmentation object to choose from AutoAugment and tf.reduce_all(tf.equal(tf.shape(cropped_image), image_shape)),
RandAugment. lambda: preprocess_ops.center_crop_image_v2(image_bytes, image_shape),
scale: A `List[int]`, minimum and maximum image shape range. lambda: cropped_image)
dtype: `str`, cast output image in dtype. It can be 'float32', 'float16',
or 'bfloat16'.
"""
self._output_size = output_size
if aug_policy:
if aug_policy == 'autoaug':
self._augmenter = augment.AutoAugment()
elif aug_policy == 'randaug':
self._augmenter = augment.RandAugment(num_layers=2, magnitude=20)
else:
raise ValueError(
'Augmentation policy {} not supported.'.format(aug_policy))
else:
self._augmenter = None
self._scale = scale
if dtype == 'float32':
self._dtype = tf.float32
elif dtype == 'float16':
self._dtype = tf.float16
elif dtype == 'bfloat16':
self._dtype = tf.bfloat16
else:
raise ValueError('dtype {!r} is not supported!'.format(dtype))
def _parse_train_data(self, decoded_tensors):
"""Generates images and labels that are usable for model training.
Args:
decoded_tensors: a dict of Tensors produced by the decoder.
Returns:
images: the image tensor.
labels: a dict of Tensors that contains labels.
"""
image = tf.io.decode_image(decoded_tensors['image/encoded'])
image.set_shape((None, None, 3))
image = tf.image.resize_with_pad(
image,
target_width=self._output_size[0],
target_height=self._output_size[1])
scale = tf.random.uniform([],
minval=self._scale[0],
maxval=self._scale[1],
dtype=tf.int32)
if scale > self._output_size[0]:
image = tf.image.resize_with_crop_or_pad(
image, target_height=scale, target_width=scale)
else: else:
image = tf.image.random_crop(image, (scale, scale, 3)) # Decodes image.
image = tf.io.decode_image(image_bytes, channels=3)
image.set_shape([None, None, 3])
# Crops image.
cropped_image = preprocess_ops.random_crop_image(image)
image = tf.cond(
tf.reduce_all(tf.equal(tf.shape(cropped_image), tf.shape(image))),
lambda: preprocess_ops.center_crop_image(image),
lambda: cropped_image)
if self._aug_rand_hflip:
image = tf.image.random_flip_left_right(image)
# Resizes image.
image = tf.image.resize(
image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
image.set_shape([self._output_size[0], self._output_size[1], 3])
# Apply autoaug or randaug.
if self._augmenter is not None: if self._augmenter is not None:
image = self._augmenter.distort(image) image = self._augmenter.distort(image)
image = tf.image.random_flip_left_right(image) # Convert image to self._dtype.
image = tf.cast(image, tf.float32) / 255 image = tf.image.convert_image_dtype(image, self._dtype)
image = tf.image.resize(image, (self._output_size[0], self._output_size[1])) image = image/255.0
return image
label = decoded_tensors['image/class/label']
return image, label def _parse_eval_image(self, decoded_tensors):
"""Parses image data for evaluation."""
def _parse_eval_data(self, decoded_tensors): image_bytes = decoded_tensors[self._image_field_key]
"""Generates images and labels that are usable for model evaluation.
Args: if self._decode_jpeg_only:
decoded_tensors: a dict of Tensors produced by the decoder. image_shape = tf.image.extract_jpeg_shape(image_bytes)
Returns:
images: the image tensor. # Center crops.
labels: a dict of Tensors that contains labels. image = preprocess_ops.center_crop_image_v2(image_bytes, image_shape)
""" else:
image = tf.io.decode_image(decoded_tensors['image/encoded']) # Decodes image.
image.set_shape((None, None, 3)) image = tf.io.decode_image(image_bytes, channels=3)
image = tf.cast(image, tf.float32) image.set_shape([None, None, 3])
image = tf.image.resize_with_pad(
image, # Center crops.
target_width=self._output_size[0], image = preprocess_ops.center_crop_image(image)
target_height=self._output_size[1]) # Final Output Shape
image = image / 255. # Normalize image = tf.image.resize(
#label = tf.one_hot(decoded_tensors['image/class/label'], self._num_classes) image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
label = decoded_tensors['image/class/label'] image.set_shape([self._output_size[0], self._output_size[1], 3])
return image, label
# Convert image to self._dtype.
image = tf.image.convert_image_dtype(image, self._dtype)
image = image/255.0
return image
# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -11,19 +12,22 @@ ...@@ -11,19 +12,22 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ==============================================================================
# Lint as: python3
"""Image classification task definition.""" """Image classification task definition."""
import tensorflow as tf
from official.core import input_reader
from official.core import task_factory from official.core import task_factory
from official.vision.beta.dataloaders import classification_input
from official.vision.beta.projects.yolo.configs import darknet_classification as exp_cfg from official.vision.beta.projects.yolo.configs import darknet_classification as exp_cfg
from official.vision.beta.projects.yolo.dataloaders import classification_tfds_decoder as cli
from official.common import dataset_fn
from official.vision.beta.dataloaders import input_reader_factory
from official.vision.beta.dataloaders import tfds_factory
from official.vision.beta.dataloaders import classification_input as classification_input_base
from official.vision.beta.projects.yolo.dataloaders import classification_input
from official.vision.beta.tasks import image_classification from official.vision.beta.tasks import image_classification
@task_factory.register_task_cls(exp_cfg.ImageClassificationTask) @task_factory.register_task_cls(exp_cfg.ImageClassificationTask)
class ImageClassificationTask(image_classification.ImageClassificationTask): class ImageClassificationTask(image_classification.ImageClassificationTask):
"""A task for image classification.""" """A task for image classification."""
...@@ -33,82 +37,33 @@ class ImageClassificationTask(image_classification.ImageClassificationTask): ...@@ -33,82 +37,33 @@ class ImageClassificationTask(image_classification.ImageClassificationTask):
num_classes = self.task_config.model.num_classes num_classes = self.task_config.model.num_classes
input_size = self.task_config.model.input_size input_size = self.task_config.model.input_size
image_field_key = self.task_config.train_data.image_field_key
label_field_key = self.task_config.train_data.label_field_key
is_multilabel = self.task_config.train_data.is_multilabel
if params.tfds_name: if params.tfds_name:
decoder = cli.Decoder() decoder = tfds_factory.get_classification_decoder(params.tfds_name)
else: else:
decoder = classification_input.Decoder() decoder = classification_input_base.Decoder(
image_field_key=image_field_key, label_field_key=label_field_key,
is_multilabel=is_multilabel)
parser = classification_input.Parser( parser = classification_input.Parser(
output_size=input_size[:2], output_size=input_size[:2],
num_classes=num_classes, num_classes=num_classes,
image_field_key=image_field_key,
label_field_key=label_field_key,
decode_jpeg_only=params.decode_jpeg_only,
aug_rand_hflip=params.aug_rand_hflip,
aug_type=params.aug_type,
is_multilabel=is_multilabel,
dtype=params.dtype) dtype=params.dtype)
reader = input_reader.InputReader( reader = input_reader_factory.input_reader_generator(
params, params,
dataset_fn=tf.data.TFRecordDataset, dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
decoder_fn=decoder.decode, decoder_fn=decoder.decode,
parser_fn=parser.parse_fn(params.is_training)) parser_fn=parser.parse_fn(params.is_training))
dataset = reader.read(input_context=input_context) dataset = reader.read(input_context=input_context)
return dataset return dataset
\ No newline at end of file
def train_step(self, inputs, model, optimizer, metrics=None):
"""Does forward and backward.
Args:
inputs: a dictionary of input tensors.
model: the model, forward pass definition.
optimizer: the optimizer for this training step.
metrics: a nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
features, labels = inputs
if self.task_config.losses.one_hot:
labels = tf.one_hot(labels, self.task_config.model.num_classes)
num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
with tf.GradientTape() as tape:
outputs = model(features, training=True)
# Casting output layer as float32 is necessary when mixed_precision is
# mixed_float16 or mixed_bfloat16 to ensure output is casted as float32.
outputs = tf.nest.map_structure(
lambda x: tf.cast(x, tf.float32), outputs)
# Computes per-replica loss.
loss = self.build_losses(
model_outputs=outputs, labels=labels, aux_losses=model.losses)
# Scales loss as the default gradients allreduce performs sum inside the
# optimizer.
scaled_loss = loss / num_replicas
# For mixed_precision policy, when LossScaleOptimizer is used, loss is
# scaled for numerical stability.
if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
scaled_loss = optimizer.get_scaled_loss(scaled_loss)
tvars = model.trainable_variables
grads = tape.gradient(scaled_loss, tvars)
# Scales back gradient before apply_gradients when LossScaleOptimizer is
# used.
if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
grads = optimizer.get_unscaled_gradients(grads)
# Apply gradient clipping.
if self.task_config.gradient_clip_norm > 0:
grads, _ = tf.clip_by_global_norm(
grads, self.task_config.gradient_clip_norm)
optimizer.apply_gradients(list(zip(grads, tvars)))
logs = {self.loss: loss}
if metrics:
self.process_metrics(metrics, labels, outputs)
logs.update({m.name: m.result() for m in metrics})
elif model.compiled_metrics:
self.process_compiled_metrics(model.compiled_metrics, labels, outputs)
logs.update({m.name: m.result() for m in model.metrics})
return logs
runtime:
all_reduce_alg: null
batchnorm_spatial_persistent: false
dataset_num_private_threads: null
default_shard_dim: -1
distribution_strategy: mirrored
enable_xla: false
gpu_thread_mode: null
loss_scale: dynamic
mixed_precision_dtype: float16
num_cores_per_replica: 1
num_gpus: 2
num_packs: 1
per_gpu_thread_count: 0
run_eagerly: false
task_index: -1
tpu: null
tpu_enable_xla_dynamic_padder: null
worker_hosts: null
task:
evaluation:
top_k: 5
gradient_clip_norm: 0.0
init_checkpoint: ''
logging_dir: null
losses:
l2_weight_decay: 0.0005
label_smoothing: 0.0
one_hot: true
model:
add_head_batch_norm: false
backbone:
darknet:
depth_scale: 1.0
dilate: false
max_level: 5
min_level: 3
model_id: darknet53
use_reorg_input: false
use_separable_conv: false
width_scale: 1.0
type: darknet
dropout_rate: 0.0
input_size: [256, 256, 3]
kernel_initializer: VarianceScaling
norm_activation:
activation: mish
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 1001
name: null
train_data:
aug_policy: null
aug_rand_hflip: true
aug_type: null
block_length: 1
cache: false
color_jitter: 0.0
cycle_length: 10
decode_jpeg_only: true
decoder:
simple_decoder:
mask_binarize_threshold: null
regenerate_source_id: false
type: simple_decoder
deterministic: null
drop_remainder: true
dtype: float16
enable_tf_data_service: false
file_type: tfrecord
global_batch_size: 16
image_field_key: image/encoded
input_path: ''
is_multilabel: false
is_training: true
label_field_key: image/class/label
mixup_and_cutmix: null
randaug_magnitude: 10
random_erasing: null
seed: null
sharding: true
shuffle_buffer_size: 100
tf_data_service_address: null
tf_data_service_job_name: null
tfds_as_supervised: false
tfds_data_dir: ~/tensorflow_datasets/
tfds_name: imagenet2012
tfds_skip_decoding_feature: ''
tfds_split: train
validation_data:
aug_policy: null
aug_rand_hflip: true
aug_type: null
block_length: 1
cache: false
color_jitter: 0.0
cycle_length: 10
decode_jpeg_only: true
decoder:
simple_decoder:
mask_binarize_threshold: null
regenerate_source_id: false
type: simple_decoder
deterministic: null
drop_remainder: false
dtype: float16
enable_tf_data_service: false
file_type: tfrecord
global_batch_size: 16
image_field_key: image/encoded
input_path: ''
is_multilabel: false
is_training: true
label_field_key: image/class/label
mixup_and_cutmix: null
randaug_magnitude: 10
random_erasing: null
seed: null
sharding: true
shuffle_buffer_size: 100
tf_data_service_address: null
tf_data_service_job_name: null
tfds_as_supervised: false
tfds_data_dir: ~/tensorflow_datasets/
tfds_name: imagenet2012
tfds_skip_decoding_feature: ''
tfds_split: validation
trainer:
allow_tpu_summary: false
best_checkpoint_eval_metric: ''
best_checkpoint_export_subdir: ''
best_checkpoint_metric_comp: higher
checkpoint_interval: 10000
continuous_eval_timeout: 3600
eval_tf_function: true
eval_tf_while_loop: false
loss_upper_bound: 1000000.0
max_to_keep: 5
optimizer_config:
ema: null
learning_rate:
polynomial:
cycle: false
decay_steps: 6392000
end_learning_rate: 1.25e-05
initial_learning_rate: 0.0125
name: PolynomialDecay
offset: 0
power: 4.0
type: polynomial
optimizer:
sgd:
clipnorm: null
clipvalue: null
decay: 0.0
global_clipnorm: null
momentum: 0.9
name: SGD
nesterov: false
type: sgd
warmup:
linear:
name: linear
warmup_learning_rate: 0
warmup_steps: 8000
type: linear
recovery_begin_steps: 0
recovery_max_trials: 0
steps_per_loop: 10000
summary_interval: 10000
train_steps: 6400000
train_tf_function: true
train_tf_while_loop: true
validation_interval: 10000
validation_steps: 3200
validation_summary_subdir: validation
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment