Commit a04d9e0e authored by Vishnu Banna's avatar Vishnu Banna
Browse files

merged

parents 64f16d61 bcbce005
......@@ -329,7 +329,7 @@ def load_eval_image(filename: Text, image_size: int = IMAGE_SIZE) -> tf.Tensor:
def build_eval_dataset(filenames: List[Text],
labels: List[int] = None,
labels: Optional[List[int]] = None,
image_size: int = IMAGE_SIZE,
batch_size: int = 1) -> tf.Tensor:
"""Builds a tf.data.Dataset from a list of filenames and labels.
......
......@@ -14,8 +14,10 @@
"""Defines exported symbols for the `orbit` package."""
from orbit import actions
from orbit import utils
from orbit.controller import Action
from orbit.controller import Controller
from orbit.runner import AbstractEvaluator
......
# Copyright 2021 The Orbit Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Defines an "action" abstraction for use with `orbit.Controller`.
"Actions" are simply arbitrary callables that are applied by the `Controller`
to the output of train steps (after each inner loop of `steps_per_loop` steps)
or an evaluation. This provides a hook mechanism, enabling things like reporting
metrics to Vizier, model exporting, additional logging, etc.
The basic `Action` abstraction (just a type alias) is defined in the
`controller` module. This `actions` module adds a `ConditionalAction` utility
class to make it easy to trigger actions conditionally based on reusable
predicates, as well as a small handful of predefined conditions/actions (in
particular, a `NewBestMetric` condition and an `ExportSavedModel` action).
One example of using actions to do metric-conditional export:
new_best_metric = orbit.actions.NewBestMetric('accuracy')
export_action = orbit.actions.ConditionalAction(
condition=lambda x: x['accuracy'] > 0.9 and new_best_metric(x),
action=orbit.actions.ExportSavedModel(
model,
orbit.actions.ExportFileManager(
base_name=f'{FLAGS.model_dir}/saved_model',
next_id_fn=trainer.global_step.numpy),
signatures=model.infer))
controller = orbit.Controller(
strategy=strategy,
trainer=trainer,
evaluator=evaluator,
eval_actions=[export_action],
global_step=trainer.global_step,
steps_per_loop=FLAGS.steps_per_loop,
checkpoint_manager=checkpoint_manager,
summary_interval=1000)
Note: In multi-client settings where each client runs its own `Controller`
instance, some care should be taken in deciding which clients should run certain
actions. Isolating actions to an individual client (say client 0) can be
achieved using `ConditionalAction` as follows:
client_0_actions = orbit.actions.ConditionalAction(
condition=lambda _: client_id() == 0,
action=[
...
])
In particular, the `NewBestMetric` condition may be used in multi-client
settings if all clients are guaranteed to compute the same metric (ensuring this
is up to client code, not Orbit). However, when saving metrics it may be helpful
to avoid unnecessary writes by setting the `write_value` parameter to `False`
for most clients.
"""
from orbit.actions.conditional_action import ConditionalAction
from orbit.actions.export_saved_model import ExportFileManager
from orbit.actions.export_saved_model import ExportSavedModel
from orbit.actions.new_best_metric import JSONPersistedValue
from orbit.actions.new_best_metric import NewBestMetric
# Copyright 2021 The Orbit Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Provides a `ConditionalAction` abstraction."""
from typing import Any, Callable, Sequence, Union
from orbit import controller
from orbit import runner
import tensorflow as tf
Condition = Callable[[runner.Output], Union[bool, tf.Tensor]]
def _as_sequence(maybe_sequence: Union[Any, Sequence[Any]]) -> Sequence[Any]:
if isinstance(maybe_sequence, Sequence):
return maybe_sequence
return [maybe_sequence]
class ConditionalAction:
"""Represents an action that is only taken when a given condition is met.
This class is itself an `Action` (a callable that can be applied to train or
eval outputs), but is intended to make it easier to write modular and reusable
conditions by decoupling "when" something whappens (the condition) from "what"
happens (the action).
"""
def __init__(
self,
condition: Condition,
action: Union[controller.Action, Sequence[controller.Action]],
):
"""Initializes the instance.
Args:
condition: A callable accepting train or eval outputs and returing a bool.
action: The action (or optionally sequence of actions) to perform when
`condition` is met.
"""
self.condition = condition
self.action = action
def __call__(self, output: runner.Output) -> None:
if self.condition(output):
for action in _as_sequence(self.action):
action(output)
# Copyright 2021 The Orbit Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for orbit.actions.conditional_action."""
from orbit import actions
import tensorflow as tf
class ConditionalActionTest(tf.test.TestCase):
def test_conditional_action(self):
# Define a function to raise an AssertionError, since we can't in a lambda.
def raise_assertion(arg):
raise AssertionError(str(arg))
conditional_action = actions.ConditionalAction(
condition=lambda x: x['value'], action=raise_assertion)
conditional_action({'value': False}) # Nothing is raised.
with self.assertRaises(AssertionError) as ctx:
conditional_action({'value': True})
self.assertEqual(ctx.exception.message, "{'value': True}")
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The Orbit Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Provides the `ExportSavedModel` action and associated helper classes."""
from typing import Callable, Optional
import tensorflow as tf
class _CounterIdFn:
"""Implements a counter-based ID function for `ExportFileManager`."""
def __init__(self, base_name: str):
filenames = tf.io.gfile.glob(f'{base_name}-*')
max_counter = -1
for filename in filenames:
try:
_, file_number = filename.rsplit('-', maxsplit=1)
max_counter = max(max_counter, int(file_number))
except ValueError:
continue
self.value = max_counter + 1
def __call__(self):
output = self.value
self.value += 1
return output
class ExportFileManager:
"""Utility class that manages a group of files with a shared base name.
For actions like SavedModel exporting, there are potentially many different
file naming and cleanup strategies that may be desirable. This class provides
a basic interface allowing SavedModel export to be decoupled from these
details, and a default implementation that should work for many basic
scenarios. Users may subclass this class to alter behavior and define more
customized naming and cleanup strategies.
"""
def __init__(self,
base_name: str,
max_to_keep: int = 5,
next_id_fn: Optional[Callable[[], int]] = None):
"""Initializes the instance.
Args:
base_name: A shared base name for file names generated by this class.
max_to_keep: The maximum number of files matching `base_name` to keep
after each call to `cleanup`. The most recent (as determined by file
modification time) `max_to_keep` files are preserved; the rest are
deleted. If < 0, all files are preserved.
next_id_fn: An optional callable that returns integer IDs to append to
base name (formatted as `'{base_name}-{id}'`). The order of integers is
used to sort files to determine the oldest ones deleted by `clean_up`.
If not supplied, a default ID based on an incrementing counter is used.
One common alternative maybe be to use the current global step count,
for instance passing `next_id_fn=global_step.numpy`.
"""
self._base_name = base_name
self._max_to_keep = max_to_keep
self._next_id_fn = next_id_fn or _CounterIdFn(base_name)
@property
def managed_files(self):
"""Returns all files managed by this instance, in sorted order.
Returns:
The list of files matching the `base_name` provided when constructing this
`ExportFileManager` instance, sorted in increasing integer order of the
IDs returned by `next_id_fn`.
"""
def id_key(name):
_, id_num = name.rsplit('-', maxsplit=1)
return int(id_num)
filenames = tf.io.gfile.glob(f'{self._base_name}-*')
return sorted(filenames, key=id_key)
def clean_up(self):
"""Cleans up old files matching `{base_name}-*`.
The most recent `max_to_keep` files are preserved.
"""
if self._max_to_keep < 0:
return
for filename in self.managed_files[:-self._max_to_keep]:
tf.io.gfile.rmtree(filename)
def next_name(self) -> str:
"""Returns a new file name based on `base_name` and `next_id_fn()`."""
return f'{self._base_name}-{self._next_id_fn()}'
class ExportSavedModel:
"""Action that exports the given model as a SavedModel."""
def __init__(self,
model: tf.Module,
file_manager: ExportFileManager,
signatures,
options: Optional[tf.saved_model.SaveOptions] = None):
"""Initializes the instance.
Args:
model: The model to export.
file_manager: An instance of `ExportFileManager` (or a subclass), that
provides file naming and cleanup functionality.
signatures: The signatures to forward to `tf.saved_model.save()`.
options: Optional options to forward to `tf.saved_model.save()`.
"""
self.model = model
self.file_manager = file_manager
self.signatures = signatures
self.options = options
def __call__(self, _):
"""Exports the SavedModel."""
export_dir = self.file_manager.next_name()
tf.saved_model.save(self.model, export_dir, self.signatures, self.options)
self.file_manager.clean_up()
# Copyright 2021 The Orbit Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for orbit.actions.export_saved_model."""
import os
from orbit import actions
import tensorflow as tf
def _id_key(name):
_, id_num = name.rsplit('-', maxsplit=1)
return int(id_num)
def _id_sorted_file_base_names(dir_path):
return sorted(tf.io.gfile.listdir(dir_path), key=_id_key)
class TestModel(tf.Module):
def __init__(self):
self.value = tf.Variable(0)
@tf.function(input_signature=[])
def __call__(self):
return self.value
class ExportSavedModelTest(tf.test.TestCase):
def test_export_file_manager_default_ids(self):
directory = self.create_tempdir()
base_name = os.path.join(directory.full_path, 'basename')
manager = actions.ExportFileManager(base_name, max_to_keep=3)
self.assertLen(tf.io.gfile.listdir(directory.full_path), 0)
directory.create_file(manager.next_name())
manager.clean_up() # Shouldn't do anything...
self.assertLen(tf.io.gfile.listdir(directory.full_path), 1)
directory.create_file(manager.next_name())
manager.clean_up() # Shouldn't do anything...
self.assertLen(tf.io.gfile.listdir(directory.full_path), 2)
directory.create_file(manager.next_name())
manager.clean_up() # Shouldn't do anything...
self.assertLen(tf.io.gfile.listdir(directory.full_path), 3)
directory.create_file(manager.next_name())
self.assertLen(tf.io.gfile.listdir(directory.full_path), 4)
self.assertEqual(
_id_sorted_file_base_names(directory.full_path),
['basename-0', 'basename-1', 'basename-2', 'basename-3'])
manager.clean_up() # Should delete file with lowest ID.
self.assertEqual(
_id_sorted_file_base_names(directory.full_path),
['basename-1', 'basename-2', 'basename-3'])
manager = actions.ExportFileManager(base_name, max_to_keep=3)
self.assertEqual(os.path.basename(manager.next_name()), 'basename-4')
def test_export_file_manager_custom_ids(self):
directory = self.create_tempdir()
base_name = os.path.join(directory.full_path, 'basename')
id_num = 0
def next_id():
return id_num
manager = actions.ExportFileManager(
base_name, max_to_keep=2, next_id_fn=next_id)
self.assertLen(tf.io.gfile.listdir(directory.full_path), 0)
id_num = 30
directory.create_file(manager.next_name())
self.assertLen(tf.io.gfile.listdir(directory.full_path), 1)
manager.clean_up() # Shouldn't do anything...
self.assertEqual(
_id_sorted_file_base_names(directory.full_path), ['basename-30'])
id_num = 200
directory.create_file(manager.next_name())
self.assertLen(tf.io.gfile.listdir(directory.full_path), 2)
manager.clean_up() # Shouldn't do anything...
self.assertEqual(
_id_sorted_file_base_names(directory.full_path),
['basename-30', 'basename-200'])
id_num = 1000
directory.create_file(manager.next_name())
self.assertLen(tf.io.gfile.listdir(directory.full_path), 3)
self.assertEqual(
_id_sorted_file_base_names(directory.full_path),
['basename-30', 'basename-200', 'basename-1000'])
manager.clean_up() # Should delete file with lowest ID.
self.assertLen(tf.io.gfile.listdir(directory.full_path), 2)
self.assertEqual(
_id_sorted_file_base_names(directory.full_path),
['basename-200', 'basename-1000'])
def test_export_saved_model(self):
directory = self.create_tempdir()
base_name = os.path.join(directory.full_path, 'basename')
file_manager = actions.ExportFileManager(base_name, max_to_keep=2)
model = TestModel()
export_action = actions.ExportSavedModel(
model, file_manager=file_manager, signatures=model.__call__)
model.value.assign(3)
self.assertEqual(model(), 3)
self.assertEmpty(file_manager.managed_files)
export_action({})
self.assertLen(file_manager.managed_files, 1)
reloaded_model = tf.saved_model.load(file_manager.managed_files[-1])
self.assertEqual(reloaded_model(), 3)
model.value.assign(5)
self.assertEqual(model(), 5)
export_action({})
self.assertLen(file_manager.managed_files, 2)
reloaded_model = tf.saved_model.load(file_manager.managed_files[-1])
self.assertEqual(reloaded_model(), 5)
model.value.assign(7)
self.assertEqual(model(), 7)
export_action({})
self.assertLen(file_manager.managed_files, 2) # Still 2, due to clean up.
reloaded_model = tf.saved_model.load(file_manager.managed_files[-1])
self.assertEqual(reloaded_model(), 7)
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The Orbit Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Provides the `NewBestMetric` condition and associated helper classes."""
import json
import os
import sys
from typing import Any, Callable, Optional, Union
import uuid
from orbit import runner
from orbit import utils
import tensorflow as tf
MetricFn = Callable[[runner.Output], Union[float, tf.Tensor]]
class NewBestMetric:
"""Condition that is satisfied when a new best metric is achieved.
This class keeps track of the best metric value seen so far, optionally in a
persistent (preemption-safe) way.
Two methods are provided, which each satisfy the `Action` protocol: `test` for
only testing whether a new best metric is achieved by a given train/eval
output, and `commit`, which both tests and records the new best metric value
if it is achieved. These separate methods enable the same `NewBestMetric`
instance to be reused as a condition multiple times, and can also provide
additional preemption/failure safety. For example, to avoid updating the best
metric if a model export fails or is pre-empted:
new_best_metric = orbit.actions.NewBestMetric(
'accuracy', filename='/model/dir/best_metric')
action = orbit.actions.ConditionalAction(
condition=new_best_metric.test,
action=[
orbit.actions.ExportSavedModel(...),
new_best_metric.commit
])
The default `__call__` implementation is equivalent to `commit`.
This class is safe to use in multi-client settings if all clients can be
guaranteed to compute the same metric. However when saving metrics it may be
helpful to avoid unnecessary writes by setting the `write_value` parameter to
`False` for most clients.
Attributes:
metric: The metric passed to __init__ (may be a string key or a callable
that can be applied to train/eval output).
higher_is_better: Whether higher metric values are better.
"""
def __init__(self,
metric: Union[str, MetricFn],
higher_is_better: bool = True,
filename: Optional[str] = None,
write_metric=True):
"""Initializes the instance.
Args:
metric: Either a string key name to use to look up a metric (assuming the
train/eval output is a dictionary), or a callable that accepts the
train/eval output and returns a metric value.
higher_is_better: Whether higher metric values are better. If `True`, a
new best metric is achieved when the metric value is strictly greater
than the previous best metric. If `False`, a new best metric is achieved
when the metric value is strictly less than the previous best metric.
filename: A filename to use for storage of the best metric value seen so
far, to allow peristence of the value across preemptions. If `None`
(default), values aren't persisted.
write_metric: If `filename` is set, this controls whether this instance
will write new best metric values to the file, or just read from the
file to obtain the initial value. Setting this to `False` for most
clients in some multi-client setups can avoid unnecessary file writes.
Has no effect if `filename` is `None`.
"""
self.metric = metric
self.higher_is_better = higher_is_better
float_max = sys.float_info.max
self._best_value = JSONPersistedValue(
initial_value=-float_max if higher_is_better else float_max,
filename=filename,
write_value=write_metric)
def __call__(self, output: runner.Output) -> bool:
"""Tests `output` and updates the current best value if necessary.
This is equivalent to `commit` below.
Args:
output: The train or eval output to test.
Returns:
`True` if `output` contains a new best metric value, `False` otherwise.
"""
return self.commit(output)
def metric_value(self, output: runner.Output) -> float:
"""Computes the metric value for the given `output`."""
if callable(self.metric):
value = self.metric(output)
else:
value = output[self.metric]
return float(utils.get_value(value))
@property
def best_value(self) -> float:
"""Returns the best metric value seen so far."""
return self._best_value.read()
def test(self, output: runner.Output) -> bool:
"""Tests `output` to see if it contains a new best metric value.
If `output` does contain a new best metric value, this method does *not*
save it (i.e., calling this method multiple times in a row with the same
`output` will continue to return `True`).
Args:
output: The train or eval output to test.
Returns:
`True` if `output` contains a new best metric value, `False` otherwise.
"""
metric_value = self.metric_value(output)
if self.higher_is_better:
if metric_value > self.best_value:
return True
else: # Lower is better.
if metric_value < self.best_value:
return True
return False
def commit(self, output: runner.Output) -> bool:
"""Tests `output` and updates the current best value if necessary.
Unlike `test` above, if `output` does contain a new best metric value, this
method *does* save it (i.e., subsequent calls to this method with the same
`output` will return `False`).
Args:
output: The train or eval output to test.
Returns:
`True` if `output` contains a new best metric value, `False` otherwise.
"""
if self.test(output):
self._best_value.write(self.metric_value(output))
return True
return False
class JSONPersistedValue:
"""Represents a value that is persisted via a file-based backing store.
The value must be JSON-serializable. Each time the value is updated, it will
be written to the backing file. It is only read from the file at
initialization.
"""
def __init__(self,
initial_value: Any,
filename: str,
write_value: bool = True):
"""Initializes the instance.
Args:
initial_value: The initial value to use if no backing file exists or was
given. This must be a JSON-serializable value (possibly nested
combination of lists, dicts, and primitive values).
filename: The path to use for persistent storage of the value. This may be
`None`, in which case the value is not stable across preemptions.
write_value: If `True`, new values will be written to `filename` on calls
to `write()`. If `False`, `filename` is only read once to restore any
persisted value, and new values will not be written to it. This can be
useful in certain multi-client settings to avoid race conditions or
excessive file writes. If `filename` is `None`, this parameter has no
effect.
"""
self._value = None
self._filename = filename
self._write_value = write_value
if self._filename is not None:
if tf.io.gfile.exists(self._filename):
if tf.io.gfile.stat(self._filename).length > 0:
with tf.io.gfile.GFile(self._filename, 'r') as f:
self._value = json.load(f)
elif self._write_value:
tf.io.gfile.makedirs(os.path.dirname(self._filename))
if self._value is None:
self.write(initial_value)
def read(self):
"""Returns the value."""
return self._value
def write(self, value):
"""Writes the value, updating the backing store if one was provided."""
self._value = value
if self._filename is not None and self._write_value:
# To achieve atomic writes, we first write to a temporary file, and then
# rename it to `self._filename`.
tmp_filename = f'{self._filename}.tmp.{uuid.uuid4().hex}'
with tf.io.gfile.GFile(tmp_filename, 'w') as f:
json.dump(self._value, f)
tf.io.gfile.rename(tmp_filename, self._filename, overwrite=True)
# Copyright 2021 The Orbit Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for orbit.actions.new_best_metric."""
import os
from orbit import actions
import tensorflow as tf
class NewBestMetricTest(tf.test.TestCase):
def test_new_best_metric_higher_is_better(self):
new_best_metric = actions.NewBestMetric(
lambda x: x['value'], higher_is_better=True)
self.assertTrue(new_best_metric.test({'value': 0.0}))
self.assertTrue(new_best_metric.commit({'value': 0.0}))
self.assertFalse(new_best_metric.test({'value': 0.0}))
self.assertTrue(new_best_metric.test({'value': 1.0}))
def test_new_best_metric_lower_is_better(self):
new_best_metric = actions.NewBestMetric('value', higher_is_better=False)
self.assertTrue(new_best_metric.test({'value': 0.0}))
self.assertTrue(new_best_metric.commit({'value': 0.0}))
self.assertFalse(new_best_metric.test({'value': 0.0}))
self.assertTrue(new_best_metric.test({'value': -1.0}))
def test_new_best_metric_persistence(self):
backing_file = self.create_tempfile()
new_best_metric = actions.NewBestMetric(
'value',
higher_is_better=True,
filename=backing_file.full_path,
write_metric=False)
self.assertTrue(new_best_metric.test({'value': 0.0}))
self.assertTrue(new_best_metric.commit({'value': 0.0}))
self.assertFalse(new_best_metric.test({'value': 0.0}))
new_best_metric = actions.NewBestMetric(
'value', higher_is_better=True, filename=backing_file.full_path)
self.assertLess(new_best_metric.best_value, 0.0)
self.assertTrue(new_best_metric.commit({'value': 5.0}))
self.assertEqual(new_best_metric.best_value, 5.0)
new_best_metric = actions.NewBestMetric(
'value', higher_is_better=True, filename=backing_file.full_path)
self.assertEqual(new_best_metric.best_value, 5.0)
def test_json_persisted_value(self):
tempfile = self.create_tempfile().full_path
value = {'a': 1, 'b': 2}
persisted_value = actions.JSONPersistedValue(value, tempfile)
# The inital value is used since tempfile is empty.
self.assertEqual(persisted_value.read(), value)
persisted_value = actions.JSONPersistedValue('ignored', tempfile)
# Initial value of 'ignored' is ignored, since there's a value in tempfile.
self.assertEqual(persisted_value.read(), value)
value = [1, 2, 3]
persisted_value.write(value)
# Now that a new value is written, it gets read on initialization.
persisted_value = actions.JSONPersistedValue(['also ignored'], tempfile)
self.assertEqual(persisted_value.read(), value)
# Writes can be disabled.
persisted_value = actions.JSONPersistedValue(
'ignored', tempfile, write_value=False)
self.assertEqual(persisted_value.read(), value)
persisted_value.write("won't get persisted")
persisted_value = actions.JSONPersistedValue(
'ignored', tempfile, write_value=False)
self.assertEqual(persisted_value.read(), value)
def test_json_persisted_value_create_dirs(self):
tempfile = os.path.join(self.create_tempdir().full_path, 'subdir/value')
value = {'a': 1, 'b': 2}
# The directory is not created if write_value=False.
actions.JSONPersistedValue(value, tempfile, write_value=False)
self.assertFalse(tf.io.gfile.exists(os.path.dirname(tempfile)))
actions.JSONPersistedValue(value, tempfile)
self.assertTrue(tf.io.gfile.exists(tempfile))
if __name__ == '__main__':
tf.test.main()
......@@ -17,7 +17,7 @@
import pprint
import time
from typing import Callable, Optional, Union
from typing import Callable, List, Optional, Union
from absl import logging
......@@ -46,6 +46,9 @@ def _format_output(output, indent=4):
return "\n" + "\n".join(lines)
Action = Callable[[runner.Output], None]
class Controller:
"""Class that controls the outer loop of model training and evaluation.
......@@ -53,10 +56,9 @@ class Controller:
loops are implemented by users in the form of `AbstractTrainer` and
`AbstractEvaluator` subclasses, and define how to run a given number of
training or evaluation steps. The outer loop is provided by this `Controller`,
and interleaves calls to the user provided inner loops with additional actions
such as saving checkpoints, running evaluations, and writing summaries
(depending on the arguments passed to `Controller.__init__` and the method
being called).
and interleaves calls to the user-provided inner loops with additional actions
such as saving checkpoints, running evaluations, writing summaries, as well as
(optionally) user provided `Action`s (see below).
There are four top-level "outer loops" provided:
......@@ -70,6 +72,15 @@ class Controller:
training and evaluation use cases, the internal details and method
implementations are also intended to be simple enough to make subclassing or
other custom outer loop implementations easy to achieve.
Some additional customization can be achieved by supplying `train_actions` or
`eval_actions` when constructing the `Controller`. These are just lists of
arbitrary callables that are applied by the `Controller` to the output of
train steps (after each inner loop of `steps_per_loop` steps) or an
evaluation. This provides a hook mechanism, enabling things like reporting
metrics to Vizier, model exporting, additional logging, etc. See the
`orbit.actions` package for a small handful of predefined actions and some
utility classes that may be useful in defining your own.
"""
def __init__(
......@@ -79,6 +90,9 @@ class Controller:
trainer: Optional[runner.AbstractTrainer] = None,
evaluator: Optional[runner.AbstractEvaluator] = None,
strategy: Optional[tf.distribute.Strategy] = None,
# Actions
train_actions: Optional[List[Action]] = None,
eval_actions: Optional[List[Action]] = None,
# Train related
steps_per_loop: Optional[int] = None,
checkpoint_manager: Optional[tf.train.CheckpointManager] = None,
......@@ -86,7 +100,8 @@ class Controller:
summary_interval: Optional[int] = None,
summary_dir: Optional[str] = None,
# Evaluation related
eval_summary_dir: Optional[str] = None):
eval_summary_dir: Optional[str] = None,
):
"""Initializes a `Controller` instance.
Note that if `checkpoint_manager` is provided and there are checkpoints in
......@@ -110,6 +125,12 @@ class Controller:
strategy: An instance of `tf.distribute.Strategy`. If not provided, the
strategy will be initialized from the current in-scope strategy using
`tf.distribute.get_strategy()`.
train_actions: An optional list of `orbit.Action`s to call after each
block of `steps_per_loop` training steps are run. These will be called
with the output of `trainer.train`.
eval_actions: An optional list of `orbit.Action`s to call after each
evaluation. These will be called with the output of
`evaluator.evaluate`.
steps_per_loop: The number of steps to run in each inner loop of training
(passed as the `num_steps` parameter of `trainer.train`).
checkpoint_manager: An instance of `tf.train.CheckpointManager`. If
......@@ -138,6 +159,7 @@ class Controller:
"""
if trainer is None and evaluator is None:
raise ValueError("`trainer` and `evaluator` should not both be `None`.")
if trainer is not None:
if steps_per_loop is None:
raise ValueError(
......@@ -163,6 +185,9 @@ class Controller:
self.strategy = strategy or tf.distribute.get_strategy()
self.train_actions = train_actions or []
self.eval_actions = eval_actions or []
self.global_step = global_step
self.checkpoint_manager = checkpoint_manager
......@@ -255,9 +280,13 @@ class Controller:
with self.eval_summary_manager.summary_writer().as_default():
steps_tensor = tf.convert_to_tensor(steps, dtype=tf.int32)
eval_output = self.evaluator.evaluate(steps_tensor)
eval_output = tf.nest.map_structure(utils.get_value, eval_output or {})
elapsed = time.time() - start
eval_output = eval_output or {}
for action in self.eval_actions:
action(eval_output)
eval_output = tf.nest.map_structure(utils.get_value, eval_output)
_log(f" eval | step: {current_step: 6d} | "
f"eval time: {elapsed: 6.1f} sec | "
f"output: {_format_output(eval_output)}")
......@@ -338,7 +367,7 @@ class Controller:
self.restore_checkpoint(checkpoint_path)
self.evaluate(steps)
def restore_checkpoint(self, checkpoint_path: str = None):
def restore_checkpoint(self, checkpoint_path: Optional[str] = None):
"""Restores the model from a checkpoint.
Args:
......@@ -408,7 +437,6 @@ class Controller:
with tf.summary.record_if(should_record):
num_steps_tensor = tf.convert_to_tensor(num_steps, dtype=tf.int32)
train_output = self.trainer.train(num_steps_tensor)
train_output = tf.nest.map_structure(utils.get_value, train_output or {})
# Verify that global_step was updated properly, then update current_step.
expected_step = current_step + num_steps
......@@ -420,6 +448,11 @@ class Controller:
logging.warning(message)
return
train_output = train_output or {}
for action in self.train_actions:
action(train_output)
train_output = tf.nest.map_structure(utils.get_value, train_output)
current_step = expected_step
steps_per_second = self.step_timer.steps_per_second()
_log(f"train | step: {current_step: 6d} | "
......
......@@ -583,7 +583,7 @@ class ControllerTest(tf.test.TestCase, parameterized.TestCase):
test_runner = TestRunner()
class EarlyStopController(controller.Controller):
"""A subclass of Controller supports early stopping."""
"""A subclass of Controller that supports early stopping."""
def train_and_evaluate(self,
train_steps: int = None,
......@@ -724,5 +724,52 @@ class ControllerTest(tf.test.TestCase, parameterized.TestCase):
summaries_with_matching_keyword(
"accuracy", os.path.join(self.model_dir, "dataset2")))
def test_actions(self):
test_runner = TestRunner()
checkpoint = tf.train.Checkpoint(
model=test_runner.model, optimizer=test_runner.optimizer)
checkpoint_manager = tf.train.CheckpointManager(
checkpoint,
self.model_dir,
max_to_keep=None,
step_counter=test_runner.global_step,
checkpoint_interval=10)
class OutputRecorderAction:
"""Simple `Action` that just saves the outputs passed to `__call__`."""
def __init__(self):
self.outputs = []
def __call__(self, output):
self.outputs.append(output)
train_output_recorder = OutputRecorderAction()
eval_output_recorder = OutputRecorderAction()
test_controller = controller.Controller(
trainer=test_runner,
evaluator=test_runner,
train_actions=[train_output_recorder],
eval_actions=[eval_output_recorder],
global_step=test_runner.global_step,
steps_per_loop=2,
summary_dir=os.path.join(self.model_dir, "summaries/train"),
checkpoint_manager=checkpoint_manager,
eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"))
test_controller.train_and_evaluate(
train_steps=10, eval_steps=2, eval_interval=6)
self.assertLen(train_output_recorder.outputs, 5)
for output in train_output_recorder.outputs:
self.assertIn("loss", output)
self.assertGreaterEqual(output["loss"], 0)
self.assertLen(eval_output_recorder.outputs, 2)
for output in eval_output_recorder.outputs:
self.assertIn("eval_loss", output)
self.assertGreaterEqual(output["eval_loss"], 0)
if __name__ == "__main__":
tf.test.main()
......@@ -83,7 +83,9 @@ class StandardTrainer(runner.AbstractTrainer, metaclass=abc.ABCMeta):
`tf.function`, as determined by the `options` passed to `__init__`.
"""
def __init__(self, train_dataset, options: StandardTrainerOptions = None):
def __init__(self,
train_dataset,
options: Optional[StandardTrainerOptions] = None):
"""Initializes the `StandardTrainer` instance.
Args:
......@@ -256,7 +258,9 @@ class StandardEvaluator(runner.AbstractEvaluator, metaclass=abc.ABCMeta):
is recommended in this case.
"""
def __init__(self, eval_dataset, options: StandardEvaluatorOptions = None):
def __init__(self,
eval_dataset,
options: Optional[StandardEvaluatorOptions] = None):
"""Initializes the `StandardEvaluator` instance.
Args:
......@@ -403,7 +407,7 @@ class StandardEvaluator(runner.AbstractEvaluator, metaclass=abc.ABCMeta):
pass
def eval_reduce(self,
state: Any = None,
state: Optional[Any] = None,
step_outputs: Optional[runner.Output] = None) -> Any:
"""A function to perform per-step reduction on the evaluation outputs.
......
......@@ -170,8 +170,7 @@ the postprocessor can be run after inference.
If you don't need to use the released embeddings or YouTube-8M, then you could
skip postprocessing and use raw embeddings.
A [Colab](https://colab.research.google.com/)
showing how to download the model and calculate the embeddings on your
A Colab showing how to download the model and calculate the embeddings on your
own sound data is available here:
[AudioSet Embedding Colab](https://colab.research.google.com/drive/1TbX92UL9sYWbdwdGE0rJ9owmezB-Rl1C).
[VGGish Embedding Colab](https://colab.research.google.com/drive/1E3CaPAqCai9P9QhJ3WYPNCVmrJU4lAhF).
......@@ -35,6 +35,8 @@ class AttentionModel(tf.keras.Model):
Uses two [kernel_size x kernel_size] convolutions and softplus as activation
to compute an attention map with the same resolution as the featuremap.
Features l2-normalized and aggregated using attention probabilites as weights.
The features (targets) to be aggregated can be the input featuremap, or a
different one with the same resolution.
"""
def __init__(self, kernel_size=1, decay=_DECAY, name='attention'):
......@@ -65,7 +67,7 @@ class AttentionModel(tf.keras.Model):
name='attn_conv2')
self.activation_layer = layers.Activation('softplus')
def call(self, inputs, training=True):
def call(self, inputs, targets=None, training=True):
x = self.conv1(inputs)
x = self.bn_conv1(x, training=training)
x = tf.nn.relu(x)
......@@ -73,9 +75,13 @@ class AttentionModel(tf.keras.Model):
score = self.conv2(x)
prob = self.activation_layer(score)
# Aggregate inputs if targets is None.
if targets is None:
targets = inputs
# L2-normalize the featuremap before pooling.
inputs = tf.nn.l2_normalize(inputs, axis=-1)
feat = tf.reduce_mean(tf.multiply(inputs, prob), [1, 2], keepdims=False)
targets = tf.nn.l2_normalize(targets, axis=-1)
feat = tf.reduce_mean(tf.multiply(targets, prob), [1, 2], keepdims=False)
return feat, prob, score
......@@ -208,7 +214,9 @@ class Delf(tf.keras.Model):
block3 = tf.stop_gradient(block3)
if self._use_dim_reduction:
(dim_expanded_features, dim_reduced_features) = self.autoencoder(block3)
attn_prelogits, attn_scores, _ = self.attention(dim_expanded_features,
attn_prelogits, attn_scores, _ = self.attention(
block3,
targets=dim_expanded_features,
training=training)
else:
attn_prelogits, attn_scores, _ = self.attention(block3, training=training)
......
# Lint as: python3
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for the DELG model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl.testing import parameterized
import tensorflow as tf
from delf.python.training.model import delg_model
class DelgTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
('block3_stridesTrue', True),
('block3_stridesFalse', False),
)
def test_forward_pass(self, block3_strides):
image_size = 321
num_classes = 1000
batch_size = 2
input_shape = (batch_size, image_size, image_size, 3)
local_feature_dim = 64
feature_map_size = image_size // 16 # reduction factor for resnet50.
if block3_strides:
feature_map_size //= 2
model = delg_model.Delg(block3_strides=block3_strides,
use_dim_reduction=True,
reduced_dimension=local_feature_dim)
model.init_classifiers(num_classes)
images = tf.random.uniform(input_shape, minval=-1.0, maxval=1.0, seed=0)
# Run a complete forward pass of the model.
global_feature, attn_scores, local_features = model.build_call(images)
self.assertAllEqual(global_feature.shape, (batch_size, 2048))
self.assertAllEqual(
attn_scores.shape,
(batch_size, feature_map_size, feature_map_size, 1))
self.assertAllEqual(
local_features.shape,
(batch_size, feature_map_size, feature_map_size, local_feature_dim))
@parameterized.named_parameters(
('block3_stridesTrue', True),
('block3_stridesFalse', False),
)
def test_build_model(self, block3_strides):
image_size = 321
num_classes = 1000
batch_size = 2
input_shape = (batch_size, image_size, image_size, 3)
model = delg_model.Delg(
block3_strides=block3_strides,
use_dim_reduction=True)
model.init_classifiers(num_classes)
images = tf.random.uniform(input_shape, minval=-1.0, maxval=1.0, seed=0)
labels = tf.random.uniform((batch_size,),
minval=0,
maxval=model.num_classes - 1,
dtype=tf.int64)
blocks = {}
desc_prelogits = model.backbone(
images, intermediates_dict=blocks, training=False)
desc_logits = model.desc_classification(desc_prelogits, labels)
self.assertAllEqual(desc_prelogits.shape, (batch_size, 2048))
self.assertAllEqual(desc_logits.shape, (batch_size, num_classes))
features = blocks['block3']
attn_prelogits, _, _ = model.attention(features)
attn_logits = model.attn_classification(attn_prelogits)
self.assertAllEqual(attn_prelogits.shape, (batch_size, 1024))
self.assertAllEqual(attn_logits.shape, (batch_size, num_classes))
@parameterized.named_parameters(
('block3_stridesTrue', True),
('block3_stridesFalse', False),
)
def test_train_step(self, block3_strides):
image_size = 321
num_classes = 1000
batch_size = 2
clip_val = 10.0
input_shape = (batch_size, image_size, image_size, 3)
model = delg_model.Delg(
block3_strides=block3_strides,
use_dim_reduction=True)
model.init_classifiers(num_classes)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9)
images = tf.random.uniform(input_shape, minval=0.0, maxval=1.0, seed=0)
labels = tf.random.uniform((batch_size,),
minval=0,
maxval=model.num_classes - 1,
dtype=tf.int64)
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
def compute_loss(labels, predictions):
per_example_loss = loss_object(labels, predictions)
return tf.nn.compute_average_loss(
per_example_loss, global_batch_size=batch_size)
with tf.GradientTape() as gradient_tape:
(desc_prelogits, attn_prelogits, _, backbone_blocks,
dim_expanded_features, _) = model.global_and_local_forward_pass(images)
# Calculate global loss by applying the descriptor classifier.
desc_logits = model.desc_classification(desc_prelogits, labels)
desc_loss = compute_loss(labels, desc_logits)
# Calculate attention loss by applying the attention block classifier.
attn_logits = model.attn_classification(attn_prelogits)
attn_loss = compute_loss(labels, attn_logits)
# Calculate reconstruction loss between the attention prelogits and the
# backbone.
block3 = tf.stop_gradient(backbone_blocks['block3'])
reconstruction_loss = tf.math.reduce_mean(
tf.keras.losses.MSE(block3, dim_expanded_features))
# Cumulate global loss and attention loss and backpropagate through the
# descriptor layer and attention layer together.
total_loss = desc_loss + attn_loss + reconstruction_loss
gradients = gradient_tape.gradient(total_loss, model.trainable_weights)
clipped, _ = tf.clip_by_global_norm(gradients, clip_norm=clip_val)
optimizer.apply_gradients(zip(clipped, model.trainable_weights))
if __name__ == '__main__':
tf.test.main()
......@@ -926,11 +926,27 @@ def object_detection_proto_to_params(od_config):
losses_pb2.WeightedSigmoidClassificationLoss())
loss.localization_loss.CopyFrom(od_config.localization_loss)
_, localization_loss, _, _, _, _, _ = (losses_builder.build(loss))
if od_config.HasField('scale_head_params'):
scale_head_num_filters = list(od_config.scale_head_params.num_filters)
scale_head_kernel_sizes = list(od_config.scale_head_params.kernel_sizes)
else:
scale_head_num_filters = [256]
scale_head_kernel_sizes = [3]
if od_config.HasField('offset_head_params'):
offset_head_num_filters = list(od_config.offset_head_params.num_filters)
offset_head_kernel_sizes = list(od_config.offset_head_params.kernel_sizes)
else:
offset_head_num_filters = [256]
offset_head_kernel_sizes = [3]
return center_net_meta_arch.ObjectDetectionParams(
localization_loss=localization_loss,
scale_loss_weight=od_config.scale_loss_weight,
offset_loss_weight=od_config.offset_loss_weight,
task_loss_weight=od_config.task_loss_weight)
task_loss_weight=od_config.task_loss_weight,
scale_head_num_filters=scale_head_num_filters,
scale_head_kernel_sizes=scale_head_kernel_sizes,
offset_head_num_filters=offset_head_num_filters,
offset_head_kernel_sizes=offset_head_kernel_sizes)
def object_center_proto_to_params(oc_config):
......@@ -973,13 +989,21 @@ def mask_proto_to_params(mask_config):
losses_pb2.WeightedL2LocalizationLoss())
loss.classification_loss.CopyFrom(mask_config.classification_loss)
classification_loss, _, _, _, _, _, _ = (losses_builder.build(loss))
if mask_config.HasField('mask_head_params'):
mask_head_num_filters = list(mask_config.mask_head_params.num_filters)
mask_head_kernel_sizes = list(mask_config.mask_head_params.kernel_sizes)
else:
mask_head_num_filters = [256]
mask_head_kernel_sizes = [3]
return center_net_meta_arch.MaskParams(
classification_loss=classification_loss,
task_loss_weight=mask_config.task_loss_weight,
mask_height=mask_config.mask_height,
mask_width=mask_config.mask_width,
score_threshold=mask_config.score_threshold,
heatmap_bias_init=mask_config.heatmap_bias_init)
heatmap_bias_init=mask_config.heatmap_bias_init,
mask_head_num_filters=mask_head_num_filters,
mask_head_kernel_sizes=mask_head_kernel_sizes)
def densepose_proto_to_params(densepose_config):
......
......@@ -188,7 +188,7 @@ class ModelBuilderTF2Test(
return text_format.Merge(proto_txt,
center_net_pb2.CenterNet.ObjectCenterParams())
def get_fake_object_detection_proto(self):
def get_fake_object_detection_proto(self, customize_head_params=False):
proto_txt = """
task_loss_weight: 0.5
offset_loss_weight: 0.1
......@@ -198,10 +198,19 @@ class ModelBuilderTF2Test(
}
}
"""
if customize_head_params:
proto_txt += """
scale_head_params {
num_filters: 128
num_filters: 64
kernel_sizes: 5
kernel_sizes: 3
}
"""
return text_format.Merge(proto_txt,
center_net_pb2.CenterNet.ObjectDetection())
def get_fake_mask_proto(self):
def get_fake_mask_proto(self, customize_head_params=False):
proto_txt = """
task_loss_weight: 0.7
classification_loss {
......@@ -212,6 +221,15 @@ class ModelBuilderTF2Test(
score_threshold: 0.7
heatmap_bias_init: -2.0
"""
if customize_head_params:
proto_txt += """
mask_head_params {
num_filters: 128
num_filters: 64
kernel_sizes: 5
kernel_sizes: 3
}
"""
return text_format.Merge(proto_txt,
center_net_pb2.CenterNet.MaskEstimation())
......@@ -266,14 +284,16 @@ class ModelBuilderTF2Test(
self.get_fake_object_center_proto(
customize_head_params=customize_head_params))
config.center_net.object_detection_task.CopyFrom(
self.get_fake_object_detection_proto())
self.get_fake_object_detection_proto(
customize_head_params=customize_head_params))
config.center_net.keypoint_estimation_task.append(
self.get_fake_keypoint_proto(
customize_head_params=customize_head_params))
config.center_net.keypoint_label_map_path = (
self.get_fake_label_map_file_path())
config.center_net.mask_estimation_task.CopyFrom(
self.get_fake_mask_proto())
self.get_fake_mask_proto(
customize_head_params=customize_head_params))
config.center_net.densepose_estimation_task.CopyFrom(
self.get_fake_densepose_proto())
......@@ -303,6 +323,14 @@ class ModelBuilderTF2Test(
self.assertAlmostEqual(model._od_params.task_loss_weight, 0.5)
self.assertIsInstance(model._od_params.localization_loss,
losses.L1LocalizationLoss)
self.assertEqual(model._od_params.offset_head_num_filters, [256])
self.assertEqual(model._od_params.offset_head_kernel_sizes, [3])
if customize_head_params:
self.assertEqual(model._od_params.scale_head_num_filters, [128, 64])
self.assertEqual(model._od_params.scale_head_kernel_sizes, [5, 3])
else:
self.assertEqual(model._od_params.scale_head_num_filters, [256])
self.assertEqual(model._od_params.scale_head_kernel_sizes, [3])
# Check keypoint estimation related parameters.
kp_params = model._kp_params_dict['human_pose']
......@@ -352,6 +380,12 @@ class ModelBuilderTF2Test(
self.assertAlmostEqual(model._mask_params.score_threshold, 0.7)
self.assertAlmostEqual(
model._mask_params.heatmap_bias_init, -2.0, places=4)
if customize_head_params:
self.assertEqual(model._mask_params.mask_head_num_filters, [128, 64])
self.assertEqual(model._mask_params.mask_head_kernel_sizes, [5, 3])
else:
self.assertEqual(model._mask_params.mask_head_num_filters, [256])
self.assertEqual(model._mask_params.mask_head_kernel_sizes, [3])
# Check DensePose related parameters.
self.assertEqual(model._densepose_params.class_id, 0)
......
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "RD3uxzaJweYr"
},
"source": [
"##### Copyright 2021 The TensorFlow Authors."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"id": "C-vBUz5IhJs8"
},
"outputs": [],
"source": [
"#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
"# you may not use this file except in compliance with the License.\n",
"# You may obtain a copy of the License at\n",
"#\n",
"# https://www.apache.org/licenses/LICENSE-2.0\n",
"#\n",
"# Unless required by applicable law or agreed to in writing, software\n",
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
"# See the License for the specific language governing permissions and\n",
"# limitations under the License."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "pHTibyMehTvH"
},
"source": [
"# Tutorial: Convert models trained using TensorFlow Object Detection API to TensorFlow Lite\n",
"\n",
"This tutorial demonstrate these steps:\n",
"* Convert TensorFlow models trained using the TensorFlow Object Detection API to [TensorFlow Lite](https://www.tensorflow.org/lite).\n",
"* Add the required metadata using [TFLite Metadata Writer API](https://www.tensorflow.org/lite/convert/metadata_writer_tutorial#object_detectors). This will make the TFLite model compatible with [TFLite Task Library](https://www.tensorflow.org/lite/inference_with_metadata/task_library/object_detector), so that the model can be integrated in mobile apps in 3 lines of code."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "QIR1IFpnLJJA"
},
"source": [
"\u003ctable align=\"left\"\u003e\u003ctd\u003e\n",
" \u003ca target=\"_blank\" href=\"https://colab.sandbox.google.com/github/tensorflow/models/blob/master/research/object_detection/colab_tutorials/convert_odt_model_to_TFLite.ipynb\"\u003e\n",
" \u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\n",
" \u003c/a\u003e\n",
"\u003c/td\u003e\u003ctd\u003e\n",
" \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/research/object_detection/colab_tutorials/convert_odt_model_to_TFLite.ipynb\"\u003e\n",
" \u003cimg width=32px src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
"\u003c/td\u003e\u003c/table\u003e"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Ok_Rpv7XNaFJ"
},
"source": [
"## Preparation"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "t7CAW5C1cmel"
},
"source": [
"### Install the TFLite Support Library"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "DwtFa0jSnNU4"
},
"outputs": [],
"source": [
"!pip install -q tflite_support"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "XRfJR9QXctAR"
},
"source": [
"### Install the TensorFlow Object Detection API\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "7PP2P5XAqeI5"
},
"outputs": [],
"source": [
"import os\n",
"import pathlib\n",
"\n",
"# Clone the tensorflow models repository if it doesn't already exist\n",
"if \"models\" in pathlib.Path.cwd().parts:\n",
" while \"models\" in pathlib.Path.cwd().parts:\n",
" os.chdir('..')\n",
"elif not pathlib.Path('models').exists():\n",
" !git clone --depth 1 https://github.com/tensorflow/models"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "bP6SSh6zqi07"
},
"outputs": [],
"source": [
"%%bash\n",
"cd models/research/\n",
"protoc object_detection/protos/*.proto --python_out=.\n",
"cp object_detection/packages/tf2/setup.py .\n",
"pip install -q ."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "i0to7aXKc0O9"
},
"source": [
"### Import the necessary libraries"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4M8CC1PgqnSf"
},
"outputs": [],
"source": [
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import os\n",
"import random\n",
"import io\n",
"import imageio\n",
"import glob\n",
"import scipy.misc\n",
"import numpy as np\n",
"from six import BytesIO\n",
"from PIL import Image, ImageDraw, ImageFont\n",
"from IPython.display import display, Javascript\n",
"from IPython.display import Image as IPyImage\n",
"\n",
"import tensorflow as tf\n",
"\n",
"from object_detection.utils import label_map_util\n",
"from object_detection.utils import config_util\n",
"from object_detection.utils import visualization_utils as viz_utils\n",
"from object_detection.utils import colab_utils\n",
"from object_detection.utils import config_util\n",
"from object_detection.builders import model_builder\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "s9WIOOMTNti5"
},
"source": [
"## Download a pretrained model from Model Zoo\n",
"\n",
"In this tutorial, we demonstrate converting a pretrained model `SSD MobileNet V2 FPNLite 640x640` in the [TensorFlow 2 Model Zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md). You can replace the model with your own model and the rest will work the same."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "TIY3cxDgsxuZ"
},
"outputs": [],
"source": [
"!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz\n",
"!tar -xf ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz\n",
"!rm ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "0gV8vr6nN-z9"
},
"source": [
"## Generate TensorFlow Lite Model"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Z8FjeSmmxpXz"
},
"source": [
"### Step 1: Export TFLite inference graph\n",
"\n",
"First, we invoke `export_tflite_graph_tf2.py` to generate a TFLite-friendly intermediate SavedModel. This will then be passed to the TensorFlow Lite Converter for generating the final model.\n",
"\n",
"Use `--help` with the above script to get the full list of supported parameters.\n",
"These can fine-tune accuracy and speed for your model."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ChfN-tzBXqko"
},
"outputs": [],
"source": [
"!python models/research/object_detection/export_tflite_graph_tf2.py \\\n",
" --trained_checkpoint_dir {'ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/checkpoint'} \\\n",
" --output_directory {'ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/tflite'} \\\n",
" --pipeline_config_path {'ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/pipeline.config'}"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "IPr06cZ3OY3H"
},
"source": [
"### Step 2: Convert to TFLite\n",
"\n",
"Use the [TensorFlow Lite Converter](https://www.tensorflow.org/lite/convert) to\n",
"convert the `SavedModel` to TFLite. Note that you need to use `from_saved_model`\n",
"for TFLite conversion with the Python API.\n",
"\n",
"You can also leverage\n",
"[Post-training Quantization](https://www.tensorflow.org/lite/performance/post_training_quantization)\n",
"to\n",
"[optimize performance](https://www.tensorflow.org/lite/performance/model_optimization)\n",
"and obtain a smaller model. In this tutorial, we use the [dynamic range quantization](https://www.tensorflow.org/lite/performance/post_training_quant)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "JMpy3Rlpq-Yq"
},
"outputs": [],
"source": [
"_TFLITE_MODEL_PATH = \"ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/model.tflite\"\n",
"\n",
"converter = tf.lite.TFLiteConverter.from_saved_model('ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/tflite/saved_model')\n",
"converter.optimizations = [tf.lite.Optimize.DEFAULT]\n",
"tflite_model = converter.convert()\n",
"\n",
"with open(_TFLITE_MODEL_PATH, 'wb') as f:\n",
" f.write(tflite_model)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "fyjlnmaEOtKp"
},
"source": [
"### Step 3: Add Metadata\n",
"\n",
"The model needs to be packed with [TFLite Metadata](https://www.tensorflow.org/lite/convert/metadata) to enable easy integration into mobile apps using the [TFLite Task Library](https://www.tensorflow.org/lite/inference_with_metadata/task_library/object_detector). This metadata helps the inference code perform the correct pre \u0026 post processing as required by the model. Use the following code to create the metadata."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "-ecGLG_Ovjcr"
},
"outputs": [],
"source": [
"# Download the COCO dataset label map that was used to trained the SSD MobileNet V2 FPNLite 640x640 model\n",
"!wget https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/data/mscoco_label_map.pbtxt -q\n",
"\n",
"# We need to convert the Object Detection API's labelmap into what the Task API needs:\n",
"# a txt file with one class name on each line from index 0 to N.\n",
"# The first '0' class indicates the background.\n",
"# This code assumes COCO detection which has 90 classes, you can write a label\n",
"# map file for your model if re-trained.\n",
"_ODT_LABEL_MAP_PATH = 'mscoco_label_map.pbtxt'\n",
"_TFLITE_LABEL_PATH = \"ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/tflite_label_map.txt\"\n",
"\n",
"category_index = label_map_util.create_category_index_from_labelmap(\n",
" _ODT_LABEL_MAP_PATH)\n",
"f = open(_TFLITE_LABEL_PATH, 'w')\n",
"for class_id in range(1, 91):\n",
" if class_id not in category_index:\n",
" f.write('???\\n')\n",
" continue\n",
" name = category_index[class_id]['name']\n",
" f.write(name+'\\n')\n",
"f.close()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "YJSyXq5Qss9X"
},
"source": [
"Then we'll add the label map and other necessary metadata (e.g. normalization config) to the TFLite model.\n",
"\n",
"As the `SSD MobileNet V2 FPNLite 640x640` model take input image with pixel value in the range of [-1..1] ([code](https://github.com/tensorflow/models/blob/b09e75828e2c65ead9e624a5c7afed8d214247aa/research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py#L132)), we need to set `norm_mean = 127.5` and `norm_std = 127.5`. See this [documentation](https://www.tensorflow.org/lite/convert/metadata#normalization_and_quantization_parameters) for more details on the normalization parameters."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "CRQpfDAWsPeK"
},
"outputs": [],
"source": [
"from tflite_support.metadata_writers import object_detector\n",
"from tflite_support.metadata_writers import writer_utils\n",
"\n",
"_TFLITE_MODEL_WITH_METADATA_PATH = \"ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/model_with_metadata.tflite\"\n",
"\n",
"writer = object_detector.MetadataWriter.create_for_inference(\n",
" writer_utils.load_file(_TFLITE_MODEL_PATH), input_norm_mean=[127.5], \n",
" input_norm_std=[127.5], label_file_paths=[_TFLITE_LABEL_PATH])\n",
"writer_utils.save_file(writer.populate(), _TFLITE_MODEL_WITH_METADATA_PATH)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "YFEAjRBdPCQb"
},
"source": [
"Optional: Print out the metadata added to the TFLite model."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "FT3-38PJsSOt"
},
"outputs": [],
"source": [
"from tflite_support import metadata\n",
"\n",
"displayer = metadata.MetadataDisplayer.with_model_file(_TFLITE_MODEL_WITH_METADATA_PATH)\n",
"print(\"Metadata populated:\")\n",
"print(displayer.get_metadata_json())\n",
"print(\"=============================\")\n",
"print(\"Associated file(s) populated:\")\n",
"print(displayer.get_packed_associated_file_list())"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "l7zVslTRnEHX"
},
"source": [
"The TFLite model now can be integrated into a mobile app using the TFLite Task Library. See the [documentation](https://www.tensorflow.org/lite/inference_with_metadata/task_library/object_detector) for more details."
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "Convert TF Object Detection API model to TFLite.ipynb",
"private_outputs": true,
"provenance": [
{
"file_id": "1R4_y-u14YTdvBzhmvC0HQwh3HkcCN2Bd",
"timestamp": 1623114733432
},
{
"file_id": "1Rey5kAzNQhJ77tsXGjhcAV0UZ6du0Sla",
"timestamp": 1622897882140
}
],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
......@@ -37,7 +37,6 @@ from abc import ABCMeta
from abc import abstractmethod
import six
from six.moves import zip
import tensorflow.compat.v1 as tf
......@@ -107,11 +106,9 @@ class AnchorGenerator(six.with_metaclass(ABCMeta, object)):
with tf.name_scope(self.name_scope()):
anchors_list = self._generate(feature_map_shape_list, **params)
if self.check_num_anchors:
with tf.control_dependencies([
self._assert_correct_number_of_anchors(
anchors_list, feature_map_shape_list)]):
for item in anchors_list:
item.set(tf.identity(item.get()))
return anchors_list
@abstractmethod
......@@ -146,26 +143,3 @@ class AnchorGenerator(six.with_metaclass(ABCMeta, object)):
feature_map_indices_list.append(
i * tf.ones([boxes.num_boxes()], dtype=tf.int32))
return tf.concat(feature_map_indices_list, axis=0)
def _assert_correct_number_of_anchors(self, anchors_list,
feature_map_shape_list):
"""Assert that correct number of anchors was generated.
Args:
anchors_list: A list of box_list.BoxList object holding anchors generated.
feature_map_shape_list: list of (height, width) pairs in the format
[(height_0, width_0), (height_1, width_1), ...] that the generated
anchors must align with.
Returns:
Op that raises InvalidArgumentError if the number of anchors does not
match the number of expected anchors.
"""
expected_num_anchors = 0
actual_num_anchors = 0
for num_anchors_per_location, feature_map_shape, anchors in zip(
self.num_anchors_per_location(), feature_map_shape_list, anchors_list):
expected_num_anchors += (num_anchors_per_location
* feature_map_shape[0]
* feature_map_shape[1])
actual_num_anchors += anchors.num_boxes()
return tf.assert_equal(expected_num_anchors, actual_num_anchors)
......@@ -101,7 +101,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints,
fields.BoxListFields.{boxes,classes,masks,mask_weights,keypoints,
keypoint_visibilities, densepose_*, track_ids,
temporal_offsets, track_match_flags}
fields.InputDataFields.is_annotated.
......@@ -123,7 +123,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints,
fields.BoxListFields.{boxes,classes,masks,mask_weights,keypoints,
keypoint_visibilities, densepose_*, track_ids} or
fields.InputDataFields.is_annotated.
......@@ -299,6 +299,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
groundtruth_boxes_list,
groundtruth_classes_list,
groundtruth_masks_list=None,
groundtruth_mask_weights_list=None,
groundtruth_keypoints_list=None,
groundtruth_keypoint_visibilities_list=None,
groundtruth_dp_num_points_list=None,
......@@ -334,6 +335,8 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
masks with values in {0, 1}. If None, no masks are provided.
Mask resolution `height_in`x`width_in` must agree with the resolution
of the input image tensor provided to the `preprocess` function.
groundtruth_mask_weights_list: a list of 1-D tf.float32 tensors of shape
[num_boxes] with weights for each instance mask.
groundtruth_keypoints_list: a list of 3-D tf.float32 tensors of
shape [num_boxes, num_keypoints, 2] containing keypoints.
Keypoints are assumed to be provided in normalized coordinates and
......@@ -399,6 +402,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
if groundtruth_masks_list:
self._groundtruth_lists[
fields.BoxListFields.masks] = groundtruth_masks_list
if groundtruth_mask_weights_list:
self._groundtruth_lists[
fields.BoxListFields.mask_weights] = groundtruth_mask_weights_list
if groundtruth_keypoints_list:
self._groundtruth_lists[
fields.BoxListFields.keypoints] = groundtruth_keypoints_list
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment