Unverified Commit d1c8d840 authored by Yuge Zhang's avatar Yuge Zhang Committed by GitHub
Browse files

Fix a few issues in Retiarii (#3725)

parent 6b52fb12
...@@ -72,7 +72,7 @@ class Repeat(nn.Module): ...@@ -72,7 +72,7 @@ class Repeat(nn.Module):
class Cell(nn.Module): class Cell(nn.Module):
""" """
Cell structure [1]_ [2]_ that is popularly used in NAS literature. Cell structure [zophnas]_ [zophnasnet]_ that is popularly used in NAS literature.
A cell consists of multiple "nodes". Each node is a sum of multiple operators. Each operator is chosen from A cell consists of multiple "nodes". Each node is a sum of multiple operators. Each operator is chosen from
``op_candidates``, and takes one input from previous nodes and predecessors. Predecessor means the input of cell. ``op_candidates``, and takes one input from previous nodes and predecessors. Predecessor means the input of cell.
...@@ -95,8 +95,8 @@ class Cell(nn.Module): ...@@ -95,8 +95,8 @@ class Cell(nn.Module):
References References
---------- ----------
.. [1] Barret Zoph, Quoc V. Le, "Neural Architecture Search with Reinforcement Learning". https://arxiv.org/abs/1611.01578 .. [zophnas] Barret Zoph, Quoc V. Le, "Neural Architecture Search with Reinforcement Learning". https://arxiv.org/abs/1611.01578
.. [2] Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc V. Le, .. [zophnasnet] Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc V. Le,
"Learning Transferable Architectures for Scalable Image Recognition". https://arxiv.org/abs/1707.07012 "Learning Transferable Architectures for Scalable Image Recognition". https://arxiv.org/abs/1707.07012
""" """
......
...@@ -15,7 +15,7 @@ from ...utils import uid ...@@ -15,7 +15,7 @@ from ...utils import uid
class LayerChoiceMutator(Mutator): class LayerChoiceMutator(Mutator):
def __init__(self, nodes: List[Node]): def __init__(self, nodes: List[Node]):
super().__init__() super().__init__(label=nodes[0].operation.parameters['label'])
self.nodes = nodes self.nodes = nodes
def mutate(self, model): def mutate(self, model):
...@@ -40,7 +40,7 @@ class LayerChoiceMutator(Mutator): ...@@ -40,7 +40,7 @@ class LayerChoiceMutator(Mutator):
class InputChoiceMutator(Mutator): class InputChoiceMutator(Mutator):
def __init__(self, nodes: List[Node]): def __init__(self, nodes: List[Node]):
super().__init__() super().__init__(label=nodes[0].operation.parameters['label'])
self.nodes = nodes self.nodes = nodes
def mutate(self, model): def mutate(self, model):
...@@ -56,7 +56,7 @@ class InputChoiceMutator(Mutator): ...@@ -56,7 +56,7 @@ class InputChoiceMutator(Mutator):
class ValueChoiceMutator(Mutator): class ValueChoiceMutator(Mutator):
def __init__(self, nodes: List[Node], candidates: List[Any]): def __init__(self, nodes: List[Node], candidates: List[Any]):
super().__init__() super().__init__(label=nodes[0].operation.parameters['label'])
self.nodes = nodes self.nodes = nodes
self.candidates = candidates self.candidates = candidates
...@@ -69,7 +69,8 @@ class ValueChoiceMutator(Mutator): ...@@ -69,7 +69,8 @@ class ValueChoiceMutator(Mutator):
class ParameterChoiceMutator(Mutator): class ParameterChoiceMutator(Mutator):
def __init__(self, nodes: List[Tuple[Node, str]], candidates: List[Any]): def __init__(self, nodes: List[Tuple[Node, str]], candidates: List[Any]):
super().__init__() node, argname = nodes[0]
super().__init__(label=node.operation.parameters[argname].label)
self.nodes = nodes self.nodes = nodes
self.candidates = candidates self.candidates = candidates
...@@ -84,7 +85,7 @@ class ParameterChoiceMutator(Mutator): ...@@ -84,7 +85,7 @@ class ParameterChoiceMutator(Mutator):
class RepeatMutator(Mutator): class RepeatMutator(Mutator):
def __init__(self, nodes: List[Node]): def __init__(self, nodes: List[Node]):
# nodes is a subgraph consisting of repeated blocks. # nodes is a subgraph consisting of repeated blocks.
super().__init__() super().__init__(label=nodes[0].operation.parameters['label'])
self.nodes = nodes self.nodes = nodes
def _retrieve_chain_from_graph(self, graph: Graph) -> List[Node]: def _retrieve_chain_from_graph(self, graph: Graph) -> List[Node]:
......
# This file might cause import error for those who didn't install RL-related dependencies # This file might cause import error for those who didn't install RL-related dependencies
import logging import logging
import threading
from multiprocessing.pool import ThreadPool from multiprocessing.pool import ThreadPool
import gym import gym
...@@ -18,6 +19,7 @@ from ..execution import submit_models, wait_models ...@@ -18,6 +19,7 @@ from ..execution import submit_models, wait_models
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
_thread_lock = threading.Lock()
class MultiThreadEnvWorker(EnvWorker): class MultiThreadEnvWorker(EnvWorker):
...@@ -100,7 +102,8 @@ class ModelEvaluationEnv(gym.Env): ...@@ -100,7 +102,8 @@ class ModelEvaluationEnv(gym.Env):
if self.cur_step < self.num_steps else self.action_dim if self.cur_step < self.num_steps else self.action_dim
} }
if self.cur_step == self.num_steps: if self.cur_step == self.num_steps:
model = get_targeted_model(self.base_model, self.mutators, self.sample) with _thread_lock:
model = get_targeted_model(self.base_model, self.mutators, self.sample)
_logger.info(f'New model created: {self.sample}') _logger.info(f'New model created: {self.sample}')
submit_models(model) submit_models(model)
wait_models(model) wait_models(model)
......
...@@ -62,7 +62,7 @@ class GridSearch(BaseStrategy): ...@@ -62,7 +62,7 @@ class GridSearch(BaseStrategy):
search_space = dry_run_for_search_space(base_model, applied_mutators) search_space = dry_run_for_search_space(base_model, applied_mutators)
for sample in grid_generator(search_space, shuffle=self.shuffle): for sample in grid_generator(search_space, shuffle=self.shuffle):
_logger.debug('New model created. Waiting for resource. %s', str(sample)) _logger.debug('New model created. Waiting for resource. %s', str(sample))
if query_available_resources() <= 0: while query_available_resources() <= 0:
time.sleep(self._polling_interval) time.sleep(self._polling_interval)
submit_models(get_targeted_model(base_model, applied_mutators, sample)) submit_models(get_targeted_model(base_model, applied_mutators, sample))
...@@ -113,6 +113,6 @@ class Random(BaseStrategy): ...@@ -113,6 +113,6 @@ class Random(BaseStrategy):
search_space = dry_run_for_search_space(base_model, applied_mutators) search_space = dry_run_for_search_space(base_model, applied_mutators)
for sample in random_generator(search_space, dedup=self.dedup): for sample in random_generator(search_space, dedup=self.dedup):
_logger.debug('New model created. Waiting for resource. %s', str(sample)) _logger.debug('New model created. Waiting for resource. %s', str(sample))
if query_available_resources() <= 0: while query_available_resources() <= 0:
time.sleep(self._polling_interval) time.sleep(self._polling_interval)
submit_models(get_targeted_model(base_model, applied_mutators, sample)) submit_models(get_targeted_model(base_model, applied_mutators, sample))
...@@ -40,6 +40,18 @@ class TPESampler(Sampler): ...@@ -40,6 +40,18 @@ class TPESampler(Sampler):
class TPEStrategy(BaseStrategy): class TPEStrategy(BaseStrategy):
"""
The Tree-structured Parzen Estimator (TPE) [bergstrahpo]_ is a sequential model-based optimization (SMBO) approach.
SMBO methods sequentially construct models to approximate the performance of hyperparameters based on historical measurements,
and then subsequently choose new hyperparameters to test based on this model.
References
----------
.. [bergstrahpo] Bergstra et al., "Algorithms for Hyper-Parameter Optimization".
https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf
"""
def __init__(self): def __init__(self):
self.tpe_sampler = TPESampler() self.tpe_sampler = TPESampler()
self.model_id = 0 self.model_id = 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment