Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
0a770680
Commit
0a770680
authored
Jun 02, 2021
by
Dan Holtmann-Rice
Committed by
A. Unique TensorFlower
Jun 02, 2021
Browse files
Internal change
PiperOrigin-RevId: 377131555
parent
4ca9e10a
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
408 additions
and
291 deletions
+408
-291
orbit/actions/__init__.py
orbit/actions/__init__.py
+74
-0
orbit/actions/conditional_action.py
orbit/actions/conditional_action.py
+60
-0
orbit/actions/conditional_action_test.py
orbit/actions/conditional_action_test.py
+39
-0
orbit/actions/export_saved_model.py
orbit/actions/export_saved_model.py
+135
-0
orbit/actions/export_saved_model_test.py
orbit/actions/export_saved_model_test.py
+2
-80
orbit/actions/new_best_metric.py
orbit/actions/new_best_metric.py
+4
-211
orbit/actions/new_best_metric_test.py
orbit/actions/new_best_metric_test.py
+94
-0
No files found.
orbit/actions/__init__.py
0 → 100644
View file @
0a770680
# Copyright 2021 The Orbit Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Defines an "action" abstraction for use with `orbit.Controller`.
"Actions" are simply arbitrary callables that are applied by the `Controller`
to the output of train steps (after each inner loop of `steps_per_loop` steps)
or an evaluation. This provides a hook mechanism, enabling things like reporting
metrics to Vizier, model exporting, additional logging, etc.
The basic `Action` abstraction (just a type alias) is defined in the
`controller` module. This `actions` module adds a `ConditionalAction` utility
class to make it easy to trigger actions conditionally based on reusable
predicates, as well as a small handful of predefined conditions/actions (in
particular, a `NewBestMetric` condition and an `ExportSavedModel` action).
One example of using actions to do metric-conditional export:
new_best_metric = orbit.actions.NewBestMetric('accuracy')
export_action = orbit.actions.ConditionalAction(
condition=lambda x: x['accuracy'] > 0.9 and new_best_metric(x),
action=orbit.actions.ExportSavedModel(
model,
orbit.actions.ExportFileManager(
base_name=f'{FLAGS.model_dir}/saved_model',
next_id_fn=trainer.global_step.numpy),
signatures=model.infer))
controller = orbit.Controller(
strategy=strategy,
trainer=trainer,
evaluator=evaluator,
eval_actions=[export_action],
global_step=trainer.global_step,
steps_per_loop=FLAGS.steps_per_loop,
checkpoint_manager=checkpoint_manager,
summary_interval=1000)
Note: In multi-client settings where each client runs its own `Controller`
instance, some care should be taken in deciding which clients should run certain
actions. Isolating actions to an individual client (say client 0) can be
achieved using `ConditionalAction` as follows:
client_0_actions = orbit.actions.ConditionalAction(
condition=lambda _: client_id() == 0,
action=[
...
])
In particular, the `NewBestMetric` condition may be used in multi-client
settings if all clients are guaranteed to compute the same metric (ensuring this
is up to client code, not Orbit). However, when saving metrics it may be helpful
to avoid unnecessary writes by setting the `write_value` parameter to `False`
for most clients.
"""
from
orbit.actions.conditional_action
import
ConditionalAction
from
orbit.actions.export_saved_model
import
ExportFileManager
from
orbit.actions.export_saved_model
import
ExportSavedModel
from
orbit.actions.new_best_metric
import
JSONPersistedValue
from
orbit.actions.new_best_metric
import
NewBestMetric
orbit/actions/conditional_action.py
0 → 100644
View file @
0a770680
# Copyright 2021 The Orbit Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Provides a `ConditionalAction` abstraction."""
from
typing
import
Any
,
Callable
,
Sequence
,
Union
from
orbit
import
controller
from
orbit
import
runner
import
tensorflow
as
tf
Condition
=
Callable
[[
runner
.
Output
],
Union
[
bool
,
tf
.
Tensor
]]
def
_as_sequence
(
maybe_sequence
:
Union
[
Any
,
Sequence
[
Any
]])
->
Sequence
[
Any
]:
if
isinstance
(
maybe_sequence
,
Sequence
):
return
maybe_sequence
return
[
maybe_sequence
]
class
ConditionalAction
:
"""Represents an action that is only taken when a given condition is met.
This class is itself an `Action` (a callable that can be applied to train or
eval outputs), but is intended to make it easier to write modular and reusable
conditions by decoupling "when" something whappens (the condition) from "what"
happens (the action).
"""
def
__init__
(
self
,
condition
:
Condition
,
action
:
Union
[
controller
.
Action
,
Sequence
[
controller
.
Action
]],
):
"""Initializes the instance.
Args:
condition: A callable accepting train or eval outputs and returing a bool.
action: The action (or optionally sequence of actions) to perform when
`condition` is met.
"""
self
.
condition
=
condition
self
.
action
=
action
def
__call__
(
self
,
output
:
runner
.
Output
)
->
None
:
if
self
.
condition
(
output
):
for
action
in
_as_sequence
(
self
.
action
):
action
(
output
)
orbit/actions/conditional_action_test.py
0 → 100644
View file @
0a770680
# Copyright 2021 The Orbit Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for orbit.actions.conditional_action."""
from
orbit
import
actions
import
tensorflow
as
tf
class
ConditionalActionTest
(
tf
.
test
.
TestCase
):
def
test_conditional_action
(
self
):
# Define a function to raise an AssertionError, since we can't in a lambda.
def
raise_assertion
(
arg
):
raise
AssertionError
(
str
(
arg
))
conditional_action
=
actions
.
ConditionalAction
(
condition
=
lambda
x
:
x
[
'value'
],
action
=
raise_assertion
)
conditional_action
({
'value'
:
False
})
# Nothing is raised.
with
self
.
assertRaises
(
AssertionError
)
as
ctx
:
conditional_action
({
'value'
:
True
})
self
.
assertEqual
(
ctx
.
exception
.
message
,
"{'value': True}"
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
orbit/actions/export_saved_model.py
0 → 100644
View file @
0a770680
# Copyright 2021 The Orbit Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Provides the `ExportSavedModel` action and associated helper classes."""
from
typing
import
Callable
,
Optional
import
tensorflow
as
tf
class
_CounterIdFn
:
"""Implements a counter-based ID function for `ExportFileManager`."""
def
__init__
(
self
,
base_name
:
str
):
filenames
=
tf
.
io
.
gfile
.
glob
(
f
'
{
base_name
}
-*'
)
max_counter
=
-
1
for
filename
in
filenames
:
try
:
_
,
file_number
=
filename
.
rsplit
(
'-'
,
maxsplit
=
1
)
max_counter
=
max
(
max_counter
,
int
(
file_number
))
except
ValueError
:
continue
self
.
value
=
max_counter
+
1
def
__call__
(
self
):
output
=
self
.
value
self
.
value
+=
1
return
output
class
ExportFileManager
:
"""Utility class that manages a group of files with a shared base name.
For actions like SavedModel exporting, there are potentially many different
file naming and cleanup strategies that may be desirable. This class provides
a basic interface allowing SavedModel export to be decoupled from these
details, and a default implementation that should work for many basic
scenarios. Users may subclass this class to alter behavior and define more
customized naming and cleanup strategies.
"""
def
__init__
(
self
,
base_name
:
str
,
max_to_keep
:
int
=
5
,
next_id_fn
:
Optional
[
Callable
[[],
int
]]
=
None
):
"""Initializes the instance.
Args:
base_name: A shared base name for file names generated by this class.
max_to_keep: The maximum number of files matching `base_name` to keep
after each call to `cleanup`. The most recent (as determined by file
modification time) `max_to_keep` files are preserved; the rest are
deleted. If < 0, all files are preserved.
next_id_fn: An optional callable that returns integer IDs to append to
base name (formatted as `'{base_name}-{id}'`). The order of integers is
used to sort files to determine the oldest ones deleted by `clean_up`.
If not supplied, a default ID based on an incrementing counter is used.
One common alternative maybe be to use the current global step count,
for instance passing `next_id_fn=global_step.numpy`.
"""
self
.
_base_name
=
base_name
self
.
_max_to_keep
=
max_to_keep
self
.
_next_id_fn
=
next_id_fn
or
_CounterIdFn
(
base_name
)
@
property
def
managed_files
(
self
):
"""Returns all files managed by this instance, in sorted order.
Returns:
The list of files matching the `base_name` provided when constructing this
`ExportFileManager` instance, sorted in increasing integer order of the
IDs returned by `next_id_fn`.
"""
def
id_key
(
name
):
_
,
id_num
=
name
.
rsplit
(
'-'
,
maxsplit
=
1
)
return
int
(
id_num
)
filenames
=
tf
.
io
.
gfile
.
glob
(
f
'
{
self
.
_base_name
}
-*'
)
return
sorted
(
filenames
,
key
=
id_key
)
def
clean_up
(
self
):
"""Cleans up old files matching `{base_name}-*`.
The most recent `max_to_keep` files are preserved.
"""
if
self
.
_max_to_keep
<
0
:
return
for
filename
in
self
.
managed_files
[:
-
self
.
_max_to_keep
]:
tf
.
io
.
gfile
.
rmtree
(
filename
)
def
next_name
(
self
)
->
str
:
"""Returns a new file name based on `base_name` and `next_id_fn()`."""
return
f
'
{
self
.
_base_name
}
-
{
self
.
_next_id_fn
()
}
'
class
ExportSavedModel
:
"""Action that exports the given model as a SavedModel."""
def
__init__
(
self
,
model
:
tf
.
Module
,
file_manager
:
ExportFileManager
,
signatures
,
options
:
Optional
[
tf
.
saved_model
.
SaveOptions
]
=
None
):
"""Initializes the instance.
Args:
model: The model to export.
file_manager: An instance of `ExportFileManager` (or a subclass), that
provides file naming and cleanup functionality.
signatures: The signatures to forward to `tf.saved_model.save()`.
options: Optional options to forward to `tf.saved_model.save()`.
"""
self
.
model
=
model
self
.
file_manager
=
file_manager
self
.
signatures
=
signatures
self
.
options
=
options
def
__call__
(
self
,
_
):
"""Exports the SavedModel."""
export_dir
=
self
.
file_manager
.
next_name
()
tf
.
saved_model
.
save
(
self
.
model
,
export_dir
,
self
.
signatures
,
self
.
options
)
self
.
file_manager
.
clean_up
()
orbit/actions_test.py
→
orbit/actions
/export_saved_model
_test.py
View file @
0a770680
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""Tests for orbit.actions."""
"""Tests for orbit.actions.
export_saved_model.
"""
import
os
import
os
...
@@ -40,85 +40,7 @@ class TestModel(tf.Module):
...
@@ -40,85 +40,7 @@ class TestModel(tf.Module):
return
self
.
value
return
self
.
value
class
ActionsTest
(
tf
.
test
.
TestCase
):
class
ExportSavedModelTest
(
tf
.
test
.
TestCase
):
def
test_conditional_action
(
self
):
# Define a function to raise an AssertionError, since we can't in a lambda.
def
raise_assertion
(
arg
):
raise
AssertionError
(
str
(
arg
))
conditional_action
=
actions
.
ConditionalAction
(
condition
=
lambda
x
:
x
,
action
=
raise_assertion
)
conditional_action
(
False
)
# Nothing is raised.
with
self
.
assertRaises
(
AssertionError
)
as
ctx
:
conditional_action
(
True
)
self
.
assertEqual
(
ctx
.
exception
.
message
,
'True'
)
def
test_new_best_metric_higher_is_better
(
self
):
new_best_metric
=
actions
.
NewBestMetric
(
lambda
x
:
x
,
higher_is_better
=
True
)
self
.
assertTrue
(
new_best_metric
.
test
(
0.0
))
self
.
assertTrue
(
new_best_metric
.
commit
(
0.0
))
self
.
assertFalse
(
new_best_metric
.
test
(
0.0
))
self
.
assertTrue
(
new_best_metric
.
test
(
1.0
))
def
test_new_best_metric_lower_is_better
(
self
):
new_best_metric
=
actions
.
NewBestMetric
(
lambda
x
:
x
,
higher_is_better
=
False
)
self
.
assertTrue
(
new_best_metric
.
test
(
0.0
))
self
.
assertTrue
(
new_best_metric
.
commit
(
0.0
))
self
.
assertFalse
(
new_best_metric
.
test
(
0.0
))
self
.
assertTrue
(
new_best_metric
.
test
(
-
1.0
))
def
test_new_best_metric_persistence
(
self
):
backing_file
=
self
.
create_tempfile
()
new_best_metric
=
actions
.
NewBestMetric
(
lambda
x
:
x
,
higher_is_better
=
True
,
filename
=
backing_file
.
full_path
,
write_metric
=
False
)
self
.
assertTrue
(
new_best_metric
.
test
(
0.0
))
self
.
assertTrue
(
new_best_metric
.
commit
(
0.0
))
self
.
assertFalse
(
new_best_metric
.
test
(
0.0
))
new_best_metric
=
actions
.
NewBestMetric
(
lambda
x
:
x
,
higher_is_better
=
True
,
filename
=
backing_file
.
full_path
)
self
.
assertLess
(
new_best_metric
.
best_value
,
0.0
)
self
.
assertTrue
(
new_best_metric
.
commit
(
5.0
))
self
.
assertEqual
(
new_best_metric
.
best_value
,
5.0
)
new_best_metric
=
actions
.
NewBestMetric
(
lambda
x
:
x
,
higher_is_better
=
True
,
filename
=
backing_file
.
full_path
)
self
.
assertEqual
(
new_best_metric
.
best_value
,
5.0
)
def
test_json_persisted_value
(
self
):
tempfile
=
self
.
create_tempfile
().
full_path
value
=
{
'a'
:
1
,
'b'
:
2
}
persisted_value
=
actions
.
JSONPersistedValue
(
value
,
tempfile
)
# The inital value is used since tempfile is empty.
self
.
assertEqual
(
persisted_value
.
read
(),
value
)
persisted_value
=
actions
.
JSONPersistedValue
(
'ignored'
,
tempfile
)
# Initial value of 'ignored' is ignored, since there's a value in tempfile.
self
.
assertEqual
(
persisted_value
.
read
(),
value
)
value
=
[
1
,
2
,
3
]
persisted_value
.
write
(
value
)
# Now that a new value is written, it gets read on initialization.
persisted_value
=
actions
.
JSONPersistedValue
([
'also ignored'
],
tempfile
)
self
.
assertEqual
(
persisted_value
.
read
(),
value
)
# Writes can be disabled.
persisted_value
=
actions
.
JSONPersistedValue
(
'ignored'
,
tempfile
,
write_value
=
False
)
self
.
assertEqual
(
persisted_value
.
read
(),
value
)
persisted_value
.
write
(
"won't get persisted"
)
persisted_value
=
actions
.
JSONPersistedValue
(
'ignored'
,
tempfile
,
write_value
=
False
)
self
.
assertEqual
(
persisted_value
.
read
(),
value
)
def
test_json_persisted_value_create_dirs
(
self
):
tempfile
=
os
.
path
.
join
(
self
.
create_tempdir
().
full_path
,
'subdir/value'
)
value
=
{
'a'
:
1
,
'b'
:
2
}
# The directory is not created if write_value=False.
actions
.
JSONPersistedValue
(
value
,
tempfile
,
write_value
=
False
)
self
.
assertFalse
(
tf
.
io
.
gfile
.
exists
(
os
.
path
.
dirname
(
tempfile
)))
actions
.
JSONPersistedValue
(
value
,
tempfile
)
self
.
assertTrue
(
tf
.
io
.
gfile
.
exists
(
tempfile
))
def
test_export_file_manager_default_ids
(
self
):
def
test_export_file_manager_default_ids
(
self
):
directory
=
self
.
create_tempdir
()
directory
=
self
.
create_tempdir
()
...
...
orbit/actions.py
→
orbit/actions
/new_best_metric
.py
View file @
0a770680
...
@@ -12,110 +12,19 @@
...
@@ -12,110 +12,19 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""Defines an "action" abstraction for use with `orbit.Controller`.
"""Provides the `NewBestMetric` condition and associated helper classes."""
"Actions" are simply arbitrary callables that are applied by the `Controller`
to the output of train steps (after each inner loop of `steps_per_loop` steps)
or an evaluation. This provides a hook mechanism, enabling things like reporting
metrics to Vizier, model exporting, additional logging, etc.
The basic `Action` abstraction (just a type alias) is defined in the
`controller` module. This `actions` module adds a `ConditionalAction` utility
class to make it easy to trigger actions conditionally based on reusable
predicates, as well as a small handful of predefined conditions/actions (in
particular, a `NewBestMetric` condition and an `ExportSavedModel` action).
One example of using actions to do metric-conditional export:
new_best_metric = orbit.actions.NewBestMetric('accuracy')
export_action = orbit.actions.ConditionalAction(
condition=lambda x: x['accuracy'] > 0.9 and new_best_metric(x),
action=orbit.actions.ExportSavedModel(
model,
orbit.actions.ExportFileManager(
base_name=f'{FLAGS.model_dir}/saved_model',
next_id_fn=trainer.global_step.numpy),
signatures=model.infer))
controller = orbit.Controller(
strategy=strategy,
trainer=trainer,
evaluator=evaluator,
eval_actions=[export_action],
global_step=trainer.global_step,
steps_per_loop=FLAGS.steps_per_loop,
checkpoint_manager=checkpoint_manager,
summary_interval=1000)
Note: In multi-client settings where each client runs its own `Controller`
instance, some care should be taken in deciding which clients should run certain
actions. Isolating actions to an individual client (say client 0) can be
achieved using `ConditionalAction` as follows:
client_0_actions = orbit.actions.ConditionalAction(
condition=lambda _: client_id() == 0,
action=[
...
])
In particular, the `NewBestMetric` condition may be used in multi-client
settings if all clients are guaranteed to compute the same metric (ensuring this
is up to client code, not Orbit). However, when saving metrics it may be helpful
to avoid unnecessary writes by setting the `write_value` parameter to `False`
for most clients.
"""
import
json
import
json
import
os
import
os
import
sys
import
sys
from
typing
import
Any
,
Callable
,
Optional
,
Sequence
,
Union
from
typing
import
Any
,
Callable
,
Optional
,
Union
import
uuid
import
uuid
from
orbit
import
controller
from
orbit
import
runner
from
orbit
import
runner
from
orbit
import
utils
from
orbit
import
utils
import
tensorflow
as
tf
import
tensorflow
as
tf
Condition
=
Callable
[[
runner
.
Output
],
Union
[
bool
,
tf
.
Tensor
]]
def
_as_sequence
(
maybe_sequence
:
Union
[
Any
,
Sequence
[
Any
]])
->
Sequence
[
Any
]:
if
isinstance
(
maybe_sequence
,
Sequence
):
return
maybe_sequence
return
[
maybe_sequence
]
class
ConditionalAction
:
"""Represents an action that is only taken when a given condition is met.
This class is itself an `Action` (a callable that can be applied to train or
eval outputs), but is intended to make it easier to write modular and reusable
conditions by decoupling "when" something whappens (the condition) from "what"
happens (the action).
"""
def
__init__
(
self
,
condition
:
Condition
,
action
:
Union
[
controller
.
Action
,
Sequence
[
controller
.
Action
]],
):
"""Initializes the instance.
Args:
condition: A callable accepting train or eval outputs and returing a bool.
action: The action (or optionally sequence of actions) to perform when
`condition` is met.
"""
self
.
condition
=
condition
self
.
action
=
action
def
__call__
(
self
,
output
:
runner
.
Output
)
->
None
:
if
self
.
condition
(
output
):
for
action
in
_as_sequence
(
self
.
action
):
action
(
output
)
MetricFn
=
Callable
[[
runner
.
Output
],
Union
[
float
,
tf
.
Tensor
]]
MetricFn
=
Callable
[[
runner
.
Output
],
Union
[
float
,
tf
.
Tensor
]]
...
@@ -290,7 +199,7 @@ class JSONPersistedValue:
...
@@ -290,7 +199,7 @@ class JSONPersistedValue:
if
tf
.
io
.
gfile
.
exists
(
self
.
_filename
):
if
tf
.
io
.
gfile
.
exists
(
self
.
_filename
):
if
tf
.
io
.
gfile
.
stat
(
self
.
_filename
).
length
>
0
:
if
tf
.
io
.
gfile
.
stat
(
self
.
_filename
).
length
>
0
:
with
tf
.
io
.
gfile
.
GFile
(
self
.
_filename
,
'r'
)
as
f
:
with
tf
.
io
.
gfile
.
GFile
(
self
.
_filename
,
'r'
)
as
f
:
self
.
_value
=
json
.
load
s
(
f
.
read
()
)
self
.
_value
=
json
.
load
(
f
)
elif
self
.
_write_value
:
elif
self
.
_write_value
:
tf
.
io
.
gfile
.
makedirs
(
os
.
path
.
dirname
(
self
.
_filename
))
tf
.
io
.
gfile
.
makedirs
(
os
.
path
.
dirname
(
self
.
_filename
))
...
@@ -311,119 +220,3 @@ class JSONPersistedValue:
...
@@ -311,119 +220,3 @@ class JSONPersistedValue:
with
tf
.
io
.
gfile
.
GFile
(
tmp_filename
,
'w'
)
as
f
:
with
tf
.
io
.
gfile
.
GFile
(
tmp_filename
,
'w'
)
as
f
:
json
.
dump
(
self
.
_value
,
f
)
json
.
dump
(
self
.
_value
,
f
)
tf
.
io
.
gfile
.
rename
(
tmp_filename
,
self
.
_filename
,
overwrite
=
True
)
tf
.
io
.
gfile
.
rename
(
tmp_filename
,
self
.
_filename
,
overwrite
=
True
)
class
_CounterIdFn
:
"""Implements a counter-based ID function for `ExportFileManager`."""
def
__init__
(
self
,
base_name
:
str
):
filenames
=
tf
.
io
.
gfile
.
glob
(
f
'
{
base_name
}
-*'
)
max_counter
=
-
1
for
filename
in
filenames
:
try
:
_
,
file_number
=
filename
.
rsplit
(
'-'
,
maxsplit
=
1
)
max_counter
=
max
(
max_counter
,
int
(
file_number
))
except
ValueError
:
continue
self
.
value
=
max_counter
+
1
def
__call__
(
self
):
output
=
self
.
value
self
.
value
+=
1
return
output
class
ExportFileManager
:
"""Utility class that manages a group of files with a shared base name.
For actions like SavedModel exporting, there are potentially many different
file naming and cleanup strategies that may be desirable. This class provides
a basic interface allowing SavedModel export to be decoupled from these
details, and a default implementation that should work for many basic
scenarios. Users may subclass this class to alter behavior and define more
customized naming and cleanup strategies.
"""
def
__init__
(
self
,
base_name
:
str
,
max_to_keep
:
int
=
5
,
next_id_fn
:
Optional
[
Callable
[[],
int
]]
=
None
):
"""Initializes the instance.
Args:
base_name: A shared base name for file names generated by this class.
max_to_keep: The maximum number of files matching `base_name` to keep
after each call to `cleanup`. The most recent (as determined by file
modification time) `max_to_keep` files are preserved; the rest are
deleted. If < 0, all files are preserved.
next_id_fn: An optional callable that returns integer IDs to append to
base name (formatted as `'{base_name}-{id}'`). The order of integers is
used to sort files to determine the oldest ones deleted by `clean_up`.
If not supplied, a default ID based on an incrementing counter is used.
One common alternative maybe be to use the current global step count,
for instance passing `next_id_fn=global_step.numpy`.
"""
self
.
_base_name
=
base_name
self
.
_max_to_keep
=
max_to_keep
self
.
_next_id_fn
=
next_id_fn
or
_CounterIdFn
(
base_name
)
@
property
def
managed_files
(
self
):
"""Returns all files managed by this instance, in sorted order.
Returns:
The list of files matching the `base_name` provided when constructing this
`ExportFileManager` instance, sorted in increasing integer order of the
IDs returned by `next_id_fn`.
"""
def
id_key
(
name
):
_
,
id_num
=
name
.
rsplit
(
'-'
,
maxsplit
=
1
)
return
int
(
id_num
)
filenames
=
tf
.
io
.
gfile
.
glob
(
f
'
{
self
.
_base_name
}
-*'
)
return
sorted
(
filenames
,
key
=
id_key
)
def
clean_up
(
self
):
"""Cleans up old files matching `{base_name}-*`.
The most recent `max_to_keep` files are preserved.
"""
if
self
.
_max_to_keep
<
0
:
return
for
filename
in
self
.
managed_files
[:
-
self
.
_max_to_keep
]:
tf
.
io
.
gfile
.
rmtree
(
filename
)
def
next_name
(
self
)
->
str
:
"""Returns a new file name based on `base_name` and `next_id_fn()`."""
return
f
'
{
self
.
_base_name
}
-
{
self
.
_next_id_fn
()
}
'
class
ExportSavedModel
:
"""Action that exports the given model as a SavedModel."""
def
__init__
(
self
,
model
:
tf
.
Module
,
file_manager
:
ExportFileManager
,
signatures
,
options
:
Optional
[
tf
.
saved_model
.
SaveOptions
]
=
None
):
"""Initializes the instance.
Args:
model: The model to export.
file_manager: An instance of `ExportFileManager` (or a subclass), that
provides file naming and cleanup functionality.
signatures: The signatures to forward to `tf.saved_model.save()`.
options: Optional options to forward to `tf.saved_model.save()`.
"""
self
.
model
=
model
self
.
file_manager
=
file_manager
self
.
signatures
=
signatures
self
.
options
=
options
def
__call__
(
self
,
_
):
"""Exports the SavedModel."""
export_dir
=
self
.
file_manager
.
next_name
()
tf
.
saved_model
.
save
(
self
.
model
,
export_dir
,
self
.
signatures
,
self
.
options
)
self
.
file_manager
.
clean_up
()
orbit/actions/new_best_metric_test.py
0 → 100644
View file @
0a770680
# Copyright 2021 The Orbit Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for orbit.actions.new_best_metric."""
import
os
from
orbit
import
actions
import
tensorflow
as
tf
class
NewBestMetricTest
(
tf
.
test
.
TestCase
):
def
test_new_best_metric_higher_is_better
(
self
):
new_best_metric
=
actions
.
NewBestMetric
(
lambda
x
:
x
[
'value'
],
higher_is_better
=
True
)
self
.
assertTrue
(
new_best_metric
.
test
({
'value'
:
0.0
}))
self
.
assertTrue
(
new_best_metric
.
commit
({
'value'
:
0.0
}))
self
.
assertFalse
(
new_best_metric
.
test
({
'value'
:
0.0
}))
self
.
assertTrue
(
new_best_metric
.
test
({
'value'
:
1.0
}))
def
test_new_best_metric_lower_is_better
(
self
):
new_best_metric
=
actions
.
NewBestMetric
(
'value'
,
higher_is_better
=
False
)
self
.
assertTrue
(
new_best_metric
.
test
({
'value'
:
0.0
}))
self
.
assertTrue
(
new_best_metric
.
commit
({
'value'
:
0.0
}))
self
.
assertFalse
(
new_best_metric
.
test
({
'value'
:
0.0
}))
self
.
assertTrue
(
new_best_metric
.
test
({
'value'
:
-
1.0
}))
def
test_new_best_metric_persistence
(
self
):
backing_file
=
self
.
create_tempfile
()
new_best_metric
=
actions
.
NewBestMetric
(
'value'
,
higher_is_better
=
True
,
filename
=
backing_file
.
full_path
,
write_metric
=
False
)
self
.
assertTrue
(
new_best_metric
.
test
({
'value'
:
0.0
}))
self
.
assertTrue
(
new_best_metric
.
commit
({
'value'
:
0.0
}))
self
.
assertFalse
(
new_best_metric
.
test
({
'value'
:
0.0
}))
new_best_metric
=
actions
.
NewBestMetric
(
'value'
,
higher_is_better
=
True
,
filename
=
backing_file
.
full_path
)
self
.
assertLess
(
new_best_metric
.
best_value
,
0.0
)
self
.
assertTrue
(
new_best_metric
.
commit
({
'value'
:
5.0
}))
self
.
assertEqual
(
new_best_metric
.
best_value
,
5.0
)
new_best_metric
=
actions
.
NewBestMetric
(
'value'
,
higher_is_better
=
True
,
filename
=
backing_file
.
full_path
)
self
.
assertEqual
(
new_best_metric
.
best_value
,
5.0
)
def
test_json_persisted_value
(
self
):
tempfile
=
self
.
create_tempfile
().
full_path
value
=
{
'a'
:
1
,
'b'
:
2
}
persisted_value
=
actions
.
JSONPersistedValue
(
value
,
tempfile
)
# The inital value is used since tempfile is empty.
self
.
assertEqual
(
persisted_value
.
read
(),
value
)
persisted_value
=
actions
.
JSONPersistedValue
(
'ignored'
,
tempfile
)
# Initial value of 'ignored' is ignored, since there's a value in tempfile.
self
.
assertEqual
(
persisted_value
.
read
(),
value
)
value
=
[
1
,
2
,
3
]
persisted_value
.
write
(
value
)
# Now that a new value is written, it gets read on initialization.
persisted_value
=
actions
.
JSONPersistedValue
([
'also ignored'
],
tempfile
)
self
.
assertEqual
(
persisted_value
.
read
(),
value
)
# Writes can be disabled.
persisted_value
=
actions
.
JSONPersistedValue
(
'ignored'
,
tempfile
,
write_value
=
False
)
self
.
assertEqual
(
persisted_value
.
read
(),
value
)
persisted_value
.
write
(
"won't get persisted"
)
persisted_value
=
actions
.
JSONPersistedValue
(
'ignored'
,
tempfile
,
write_value
=
False
)
self
.
assertEqual
(
persisted_value
.
read
(),
value
)
def
test_json_persisted_value_create_dirs
(
self
):
tempfile
=
os
.
path
.
join
(
self
.
create_tempdir
().
full_path
,
'subdir/value'
)
value
=
{
'a'
:
1
,
'b'
:
2
}
# The directory is not created if write_value=False.
actions
.
JSONPersistedValue
(
value
,
tempfile
,
write_value
=
False
)
self
.
assertFalse
(
tf
.
io
.
gfile
.
exists
(
os
.
path
.
dirname
(
tempfile
)))
actions
.
JSONPersistedValue
(
value
,
tempfile
)
self
.
assertTrue
(
tf
.
io
.
gfile
.
exists
(
tempfile
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment