#-*-Python-*- # NOTE: For best training, low-level exploration (uvf_add_noise_fn.stddev) # should be reduced to around 0.1. create_maze_env.env_name = "PointMaze" context_range_min = -10 context_range_max = 10 context_range = (%context_range_min, %context_range_max) meta_context_range = ((-2, -2), (10, 10)) RESET_EPISODE_PERIOD = 500 RESET_ENV_PERIOD = 1 # End episode every N steps UvfAgent.reset_episode_cond_fn = @every_n_steps every_n_steps.n = %RESET_EPISODE_PERIOD train_uvf.max_steps_per_episode = %RESET_EPISODE_PERIOD # Do a manual reset every N episodes UvfAgent.reset_env_cond_fn = @every_n_episodes every_n_episodes.n = %RESET_ENV_PERIOD every_n_episodes.steps_per_episode = %RESET_EPISODE_PERIOD ## Config defaults EVAL_MODES = ["eval1", "eval2", "eval3"] ## Config agent CONTEXT = @agent/Context META_CONTEXT = @meta/Context ## Config agent context agent/Context.context_ranges = [%context_range] agent/Context.context_shapes = [%SUBGOAL_DIM] agent/Context.meta_action_every_n = 10 agent/Context.samplers = { "train": [@train/DirectionSampler], "explore": [@train/DirectionSampler], "eval1": [@uvf_eval1/ConstantSampler], "eval2": [@uvf_eval2/ConstantSampler], "eval3": [@uvf_eval3/ConstantSampler], } agent/Context.context_transition_fn = @relative_context_transition_fn agent/Context.context_multi_transition_fn = @relative_context_multi_transition_fn agent/Context.reward_fn = @uvf/negative_distance ## Config meta context meta/Context.context_ranges = [%meta_context_range] meta/Context.context_shapes = [2] meta/Context.samplers = { "train": [@train/RandomSampler], "explore": [@train/RandomSampler], "eval1": [@eval1/ConstantSampler], "eval2": [@eval2/ConstantSampler], "eval3": [@eval3/ConstantSampler], } meta/Context.reward_fn = @task/negative_distance ## Config rewards task/negative_distance.state_indices = [0, 1] task/negative_distance.relative_context = False task/negative_distance.diff = False task/negative_distance.offset = 0.0 ## Config samplers train/RandomSampler.context_range = %meta_context_range train/DirectionSampler.context_range = %context_range train/DirectionSampler.k = %SUBGOAL_DIM relative_context_transition_fn.k = %SUBGOAL_DIM relative_context_multi_transition_fn.k = %SUBGOAL_DIM MetaAgent.k = %SUBGOAL_DIM eval1/ConstantSampler.value = [8, 0] eval2/ConstantSampler.value = [8, 8] eval3/ConstantSampler.value = [0, 8]