From eb29ca3d10ede132f02bb3140220c470417fe5bb Mon Sep 17 00:00:00 2001 From: Mo8it Date: Sun, 30 Jan 2022 03:32:47 +0100 Subject: [PATCH] Fixed normalization --- analysis/reward_discount_analysis.jl | 9 +++++---- src/RL/EnvHelper.jl | 4 ++++ src/RL/Reward.jl | 6 +----- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/analysis/reward_discount_analysis.jl b/analysis/reward_discount_analysis.jl index c878cfd..cd8ed07 100644 --- a/analysis/reward_discount_analysis.jl +++ b/analysis/reward_discount_analysis.jl @@ -14,13 +14,14 @@ function run_reward_discount_analysis() γ = γs[γ_ind] env_helper = ReCo.run_rl(; EnvType=ReCo.OriginEnv, - n_episodes=500, - episode_duration=8.0, - n_particles=200, + n_episodes=3, + episode_duration=15.0, + n_particles=150, update_actions_at=0.08, ϵ_stable=0.00001, process_dir="reward_discount_analysis/$γ_ind", reward_discount=γ, + show_simulation_progress=false, ) rewards = env_helper.shared.hook.rewards @@ -34,7 +35,7 @@ function run_reward_discount_analysis() ax = Axis(fig[1, 1]; xlabel="Episode", ylabel="Reward") rewards_plots = [] - for (rewards_ind, rewards) in enumerate(γ_rewards) + for rewards in γ_rewards rewards_plot = lines!(ax, 1:length(rewards), rewards) push!(rewards_plots, rewards_plot) end diff --git a/src/RL/EnvHelper.jl b/src/RL/EnvHelper.jl index f43511d..bee70f4 100644 --- a/src/RL/EnvHelper.jl +++ b/src/RL/EnvHelper.jl @@ -7,6 +7,7 @@ struct EnvHelperSharedProps{H<:AbstractHook} n_steps_before_actions_update::Int64 n_actions_updates_per_episode::Int64 + reward_normalization::Float64 elliptical_a_b_ratio::Float64 @@ -27,12 +28,15 @@ struct EnvHelperSharedProps{H<:AbstractHook} elliptical_a_b_ratio::Float64, n_particles::Int64, ) where {H<:AbstractHook} + reward_normalization = n_particles * n_actions_updates_per_episode + return new{H}( env, agent, hook, n_steps_before_actions_update, n_actions_updates_per_episode, + reward_normalization, elliptical_a_b_ratio, n_particles, fill(0, n_particles), diff --git a/src/RL/Reward.jl b/src/RL/Reward.jl index 385e009..de388cd 100644 --- a/src/RL/Reward.jl +++ b/src/RL/Reward.jl @@ -2,14 +2,10 @@ function minimizing_reward(value::Float64, max_value::Float64) return exp(-0.5 * (value / (max_value / 3))^2) end -function reward_normalization(env_helper::EnvHelper) - return env_helper.shared.n_particles * env_helper.shared.n_actions_updates_per_episode -end - function set_normalized_reward!( env::Env, unnormalized_reward::Float64, env_helper::EnvHelper ) - normalization = reward_normalizaion(env_helper) + normalization = env_helper.shared.reward_normalization env.shared.reward = unnormalized_reward / normalization return nothing