from __future__ import division from future import standard_library standard_library.install_aliases() from builtins import str from builtins import range from past.utils import old_div # Copyright 2018 The TensorFlow Authors All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import numpy as np import tensorflow as tf import os, random, gc, math, re import multiprocessing, types, shutil, pickle, json from collections import defaultdict, MutableMapping def tanh_sample_info(mu, logsigma, stop_action_gradient=False, n_samples=1): if n_samples > 1: mu = tf.expand_dims(mu, 2) logsigma = tf.expand_dims(logsigma, 2) sample_shape = tf.concat([tf.shape(mu), n_samples], 0) else: sample_shape = tf.shape(mu) flat_act = mu + tf.random_normal(sample_shape) * tf.exp(logsigma) if stop_action_gradient: flat_act = tf.stop_gradient(flat_act) normalized_dist_t = (flat_act - mu) * tf.exp(-logsigma) # ... x D quadratic = - 0.5 * tf.reduce_sum(normalized_dist_t ** 2, axis=-1) # ... x (None) log_z = tf.reduce_sum(logsigma, axis=-1) # ... x (None) D_t = tf.cast(tf.shape(mu)[-1], tf.float32) log_z += 0.5 * D_t * np.log(2 * np.pi) flat_ll = quadratic - log_z scaled_act = tf.tanh(flat_act) corr = tf.reduce_sum(tf.log(1. - tf.square(scaled_act) + 1e-6), axis=-1) scaled_ll = flat_ll - corr return flat_act, flat_ll, scaled_act, scaled_ll def tf_cheating_contcartpole(state, action): gravity = 9.8 masscart = 1.0 masspole = 0.1 total_mass = (masspole + masscart) length = 0.5 # actually half the pole's length polemass_length = (masspole * length) force_mag = 10.0 tau = 0.02 # seconds between state updates # Angle at which to fail the episode theta_threshold_radians = 12 * 2 * math.pi / 360 x_threshold = 2.4 x, x_dot, theta, theta_dot = tf.split(state, 4, axis=-1) done = tf.logical_or(x < -x_threshold, tf.logical_or(x > x_threshold, tf.logical_or(theta < -theta_threshold_radians, theta > theta_threshold_radians))) force = force_mag * action costheta = tf.cos(theta) sintheta = tf.sin(theta) temp = old_div((force + polemass_length * theta_dot * theta_dot * sintheta), total_mass) thetaacc = old_div((gravity * sintheta - costheta* temp), (length * (old_div(4.0,3.0) - masspole * costheta * costheta / total_mass))) xacc = temp - polemass_length * thetaacc * costheta / total_mass x = x + tau * x_dot x_dot = x_dot + tau * xacc theta = theta + tau * theta_dot theta_dot = theta_dot + tau * thetaacc state = tf.concat([x,x_dot,theta,theta_dot], -1) done = tf.squeeze(tf.cast(done, tf.float32), -1) reward = 1.0 - done done *= 0. return state, reward, done def create_directory(dir): dir_chunks = dir.split("/") for i in range(len(dir_chunks)): partial_dir = "/".join(dir_chunks[:i+1]) try: os.makedirs(partial_dir) except OSError: pass return dir def create_and_wipe_directory(dir): shutil.rmtree(create_directory(dir)) create_directory(dir) def wipe_file(fname): with open(fname, "w") as f: f.write("") return fname def get_largest_epoch_in_dir(dir, saveid): reg_matches = [re.findall('\d+_%s'%saveid,filename) for filename in os.listdir(dir)] epoch_labels = [int(regmatch[0].split("_")[0]) for regmatch in reg_matches if regmatch] if len(epoch_labels) == 0: return False return max(epoch_labels) def wipe_all_but_largest_epoch_in_dir(dir, saveid): largest = get_largest_epoch_in_dir(dir, saveid) reg_matches = [(filename, re.findall('\d+_%s'%saveid,filename)) for filename in os.listdir(dir)] for filename, regmatch in reg_matches: if regmatch and int(regmatch[0].split("_")[0]) != largest: os.remove(os.path.join(dir,filename)) class ConfigDict(dict): def __init__(self, loc=None, ghost=False): self._dict = defaultdict(lambda :False) self.ghost = ghost if loc: with open(loc) as f: raw = json.load(f) if "inherits" in raw and raw["inherits"]: for dep_loc in raw["inherits"]: self.update(ConfigDict(dep_loc)) if "updates" in raw and raw["updates"]: self.update(raw["updates"], include_all=True) def __getitem__(self, key): return self._dict[key] def __setitem__(self, key, value): self._dict[key] = value def __str__(self): return str(dict(self._dict)) def __repr__(self): return str(dict(self._dict)) def __iter__(self): return self._dict.__iter__() def __bool__(self): return bool(self._dict) def __nonzero__(self): return bool(self._dict) def update(self, dictlike, include_all=False): for key in dictlike: value = dictlike[key] if isinstance(value, dict): if key[0] == "*": # this means only override, do not set key = key[1:] ghost = True else: ghost = False if not include_all and isinstance(value, ConfigDict) and key not in self._dict and value.ghost: continue if key not in self._dict: self._dict[key] = ConfigDict(ghost=ghost) self._dict[key].update(value) else: self._dict[key] = value