1
0
Fork 0
mirror of https://gitlab.rlp.net/mobitar/ReCo.jl.git synced 2024-12-29 10:13:31 +00:00

Fixed normalization

This commit is contained in:
Mo8it 2022-01-30 03:32:47 +01:00
parent 02739b7de6
commit eb29ca3d10
3 changed files with 10 additions and 9 deletions

View file

@ -14,13 +14,14 @@ function run_reward_discount_analysis()
γ = γs[γ_ind] γ = γs[γ_ind]
env_helper = ReCo.run_rl(; env_helper = ReCo.run_rl(;
EnvType=ReCo.OriginEnv, EnvType=ReCo.OriginEnv,
n_episodes=500, n_episodes=3,
episode_duration=8.0, episode_duration=15.0,
n_particles=200, n_particles=150,
update_actions_at=0.08, update_actions_at=0.08,
ϵ_stable=0.00001, ϵ_stable=0.00001,
process_dir="reward_discount_analysis/$γ_ind", process_dir="reward_discount_analysis/$γ_ind",
reward_discount=γ, reward_discount=γ,
show_simulation_progress=false,
) )
rewards = env_helper.shared.hook.rewards rewards = env_helper.shared.hook.rewards
@ -34,7 +35,7 @@ function run_reward_discount_analysis()
ax = Axis(fig[1, 1]; xlabel="Episode", ylabel="Reward") ax = Axis(fig[1, 1]; xlabel="Episode", ylabel="Reward")
rewards_plots = [] rewards_plots = []
for (rewards_ind, rewards) in enumerate(γ_rewards) for rewards in γ_rewards
rewards_plot = lines!(ax, 1:length(rewards), rewards) rewards_plot = lines!(ax, 1:length(rewards), rewards)
push!(rewards_plots, rewards_plot) push!(rewards_plots, rewards_plot)
end end

View file

@ -7,6 +7,7 @@ struct EnvHelperSharedProps{H<:AbstractHook}
n_steps_before_actions_update::Int64 n_steps_before_actions_update::Int64
n_actions_updates_per_episode::Int64 n_actions_updates_per_episode::Int64
reward_normalization::Float64
elliptical_a_b_ratio::Float64 elliptical_a_b_ratio::Float64
@ -27,12 +28,15 @@ struct EnvHelperSharedProps{H<:AbstractHook}
elliptical_a_b_ratio::Float64, elliptical_a_b_ratio::Float64,
n_particles::Int64, n_particles::Int64,
) where {H<:AbstractHook} ) where {H<:AbstractHook}
reward_normalization = n_particles * n_actions_updates_per_episode
return new{H}( return new{H}(
env, env,
agent, agent,
hook, hook,
n_steps_before_actions_update, n_steps_before_actions_update,
n_actions_updates_per_episode, n_actions_updates_per_episode,
reward_normalization,
elliptical_a_b_ratio, elliptical_a_b_ratio,
n_particles, n_particles,
fill(0, n_particles), fill(0, n_particles),

View file

@ -2,14 +2,10 @@ function minimizing_reward(value::Float64, max_value::Float64)
return exp(-0.5 * (value / (max_value / 3))^2) return exp(-0.5 * (value / (max_value / 3))^2)
end end
function reward_normalization(env_helper::EnvHelper)
return env_helper.shared.n_particles * env_helper.shared.n_actions_updates_per_episode
end
function set_normalized_reward!( function set_normalized_reward!(
env::Env, unnormalized_reward::Float64, env_helper::EnvHelper env::Env, unnormalized_reward::Float64, env_helper::EnvHelper
) )
normalization = reward_normalizaion(env_helper) normalization = env_helper.shared.reward_normalization
env.shared.reward = unnormalized_reward / normalization env.shared.reward = unnormalized_reward / normalization
return nothing return nothing