mirror of
https://gitlab.rlp.net/mobitar/ReCo.jl.git
synced 2024-12-21 00:51:21 +00:00
Fixed normalization
This commit is contained in:
parent
02739b7de6
commit
eb29ca3d10
3 changed files with 10 additions and 9 deletions
|
@ -14,13 +14,14 @@ function run_reward_discount_analysis()
|
||||||
γ = γs[γ_ind]
|
γ = γs[γ_ind]
|
||||||
env_helper = ReCo.run_rl(;
|
env_helper = ReCo.run_rl(;
|
||||||
EnvType=ReCo.OriginEnv,
|
EnvType=ReCo.OriginEnv,
|
||||||
n_episodes=500,
|
n_episodes=3,
|
||||||
episode_duration=8.0,
|
episode_duration=15.0,
|
||||||
n_particles=200,
|
n_particles=150,
|
||||||
update_actions_at=0.08,
|
update_actions_at=0.08,
|
||||||
ϵ_stable=0.00001,
|
ϵ_stable=0.00001,
|
||||||
process_dir="reward_discount_analysis/$γ_ind",
|
process_dir="reward_discount_analysis/$γ_ind",
|
||||||
reward_discount=γ,
|
reward_discount=γ,
|
||||||
|
show_simulation_progress=false,
|
||||||
)
|
)
|
||||||
|
|
||||||
rewards = env_helper.shared.hook.rewards
|
rewards = env_helper.shared.hook.rewards
|
||||||
|
@ -34,7 +35,7 @@ function run_reward_discount_analysis()
|
||||||
ax = Axis(fig[1, 1]; xlabel="Episode", ylabel="Reward")
|
ax = Axis(fig[1, 1]; xlabel="Episode", ylabel="Reward")
|
||||||
|
|
||||||
rewards_plots = []
|
rewards_plots = []
|
||||||
for (rewards_ind, rewards) in enumerate(γ_rewards)
|
for rewards in γ_rewards
|
||||||
rewards_plot = lines!(ax, 1:length(rewards), rewards)
|
rewards_plot = lines!(ax, 1:length(rewards), rewards)
|
||||||
push!(rewards_plots, rewards_plot)
|
push!(rewards_plots, rewards_plot)
|
||||||
end
|
end
|
||||||
|
|
|
@ -7,6 +7,7 @@ struct EnvHelperSharedProps{H<:AbstractHook}
|
||||||
|
|
||||||
n_steps_before_actions_update::Int64
|
n_steps_before_actions_update::Int64
|
||||||
n_actions_updates_per_episode::Int64
|
n_actions_updates_per_episode::Int64
|
||||||
|
reward_normalization::Float64
|
||||||
|
|
||||||
elliptical_a_b_ratio::Float64
|
elliptical_a_b_ratio::Float64
|
||||||
|
|
||||||
|
@ -27,12 +28,15 @@ struct EnvHelperSharedProps{H<:AbstractHook}
|
||||||
elliptical_a_b_ratio::Float64,
|
elliptical_a_b_ratio::Float64,
|
||||||
n_particles::Int64,
|
n_particles::Int64,
|
||||||
) where {H<:AbstractHook}
|
) where {H<:AbstractHook}
|
||||||
|
reward_normalization = n_particles * n_actions_updates_per_episode
|
||||||
|
|
||||||
return new{H}(
|
return new{H}(
|
||||||
env,
|
env,
|
||||||
agent,
|
agent,
|
||||||
hook,
|
hook,
|
||||||
n_steps_before_actions_update,
|
n_steps_before_actions_update,
|
||||||
n_actions_updates_per_episode,
|
n_actions_updates_per_episode,
|
||||||
|
reward_normalization,
|
||||||
elliptical_a_b_ratio,
|
elliptical_a_b_ratio,
|
||||||
n_particles,
|
n_particles,
|
||||||
fill(0, n_particles),
|
fill(0, n_particles),
|
||||||
|
|
|
@ -2,14 +2,10 @@ function minimizing_reward(value::Float64, max_value::Float64)
|
||||||
return exp(-0.5 * (value / (max_value / 3))^2)
|
return exp(-0.5 * (value / (max_value / 3))^2)
|
||||||
end
|
end
|
||||||
|
|
||||||
function reward_normalization(env_helper::EnvHelper)
|
|
||||||
return env_helper.shared.n_particles * env_helper.shared.n_actions_updates_per_episode
|
|
||||||
end
|
|
||||||
|
|
||||||
function set_normalized_reward!(
|
function set_normalized_reward!(
|
||||||
env::Env, unnormalized_reward::Float64, env_helper::EnvHelper
|
env::Env, unnormalized_reward::Float64, env_helper::EnvHelper
|
||||||
)
|
)
|
||||||
normalization = reward_normalizaion(env_helper)
|
normalization = env_helper.shared.reward_normalization
|
||||||
env.shared.reward = unnormalized_reward / normalization
|
env.shared.reward = unnormalized_reward / normalization
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
|
|
Loading…
Reference in a new issue