mirror of
https://gitlab.rlp.net/mobitar/ReCo.jl.git
synced 2024-12-21 00:51:21 +00:00
Added reward normalization
This commit is contained in:
parent
b5767f0104
commit
02739b7de6
6 changed files with 36 additions and 9 deletions
|
@ -1,7 +1,7 @@
|
|||
name = "ReCo"
|
||||
uuid = "b25f7548-fcc9-4c91-bc24-841b54f4dd54"
|
||||
authors = ["MoBit <mo8it@protonmail.com>"]
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
|
||||
[deps]
|
||||
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
|
||||
|
@ -22,6 +22,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
|
|||
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
|
||||
Luxor = "ae8d54c2-7ccd-5906-9d76-62fc9837b5bc"
|
||||
MathTeXEngine = "0a4f8689-d25c-4efe-a92b-7142dfc1aa53"
|
||||
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
|
||||
ProfileView = "c46f51b8-102a-5cf2-8d2c-8597cb0e0da7"
|
||||
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
|
||||
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
|
||||
|
|
|
@ -6,6 +6,7 @@ struct EnvHelperSharedProps{H<:AbstractHook}
|
|||
hook::H
|
||||
|
||||
n_steps_before_actions_update::Int64
|
||||
n_actions_updates_per_episode::Int64
|
||||
|
||||
elliptical_a_b_ratio::Float64
|
||||
|
||||
|
@ -22,6 +23,7 @@ struct EnvHelperSharedProps{H<:AbstractHook}
|
|||
agent::Agent,
|
||||
hook::H,
|
||||
n_steps_before_actions_update::Int64,
|
||||
n_actions_updates_per_episode::Int64,
|
||||
elliptical_a_b_ratio::Float64,
|
||||
n_particles::Int64,
|
||||
) where {H<:AbstractHook}
|
||||
|
@ -30,6 +32,7 @@ struct EnvHelperSharedProps{H<:AbstractHook}
|
|||
agent,
|
||||
hook,
|
||||
n_steps_before_actions_update,
|
||||
n_actions_updates_per_episode,
|
||||
elliptical_a_b_ratio,
|
||||
n_particles,
|
||||
fill(0, n_particles),
|
||||
|
|
|
@ -165,7 +165,6 @@ function update_reward!(
|
|||
env_helper::LocalCOMWithAdditionalShapeRewardEnvHelper,
|
||||
particle::ReCo.Particle,
|
||||
)
|
||||
normalization = env_helper.shared.n_particles # TODO: Add factor from steps
|
||||
n_neighbours = env_helper.n_neighbours[particle.id]
|
||||
|
||||
if n_neighbours == 0
|
||||
|
@ -191,7 +190,7 @@ function update_reward!(
|
|||
)
|
||||
end
|
||||
|
||||
env.shared.reward = reward / normalization
|
||||
set_normalized_reward!(env, reward, env_helper)
|
||||
end
|
||||
|
||||
return nothing
|
||||
|
|
|
@ -94,13 +94,10 @@ end
|
|||
function update_reward!(
|
||||
env::OriginEnv, env_helper::OriginEnvHelper, particle::ReCo.Particle
|
||||
)
|
||||
normalization = env_helper.shared.n_particles # TODO: Add factor from steps
|
||||
|
||||
reward = minimizing_reward(
|
||||
env_helper.distances_to_origin[particle.id], env_helper.max_distance_to_origin
|
||||
)
|
||||
|
||||
env.shared.reward = reward / normalization
|
||||
set_normalized_reward!(env, reward, env_helper)
|
||||
|
||||
return nothing
|
||||
end
|
18
src/RL/RL.jl
18
src/RL/RL.jl
|
@ -70,6 +70,7 @@ function run_rl(;
|
|||
packing_ratio::Float64=0.15,
|
||||
show_progress::Bool=true,
|
||||
reward_discount::Float64=1.0,
|
||||
show_simulation_progress::Bool=true,
|
||||
) where {E<:Env}
|
||||
@assert 0.0 <= elliptical_a_b_ratio <= 1.0
|
||||
@assert n_episodes > 0
|
||||
|
@ -98,8 +99,15 @@ function run_rl(;
|
|||
|
||||
hook = TotalRewardPerEpisode()
|
||||
|
||||
n_actions_updates_per_episode = ceil(Int64, episode_duration / update_actions_at)
|
||||
env_helper_shared = EnvHelperSharedProps(
|
||||
env, agent, hook, n_steps_before_actions_update, elliptical_a_b_ratio, n_particles
|
||||
env,
|
||||
agent,
|
||||
hook,
|
||||
n_steps_before_actions_update,
|
||||
n_actions_updates_per_episode,
|
||||
elliptical_a_b_ratio,
|
||||
n_particles,
|
||||
)
|
||||
|
||||
env_helper_args = (
|
||||
|
@ -127,7 +135,13 @@ function run_rl(;
|
|||
agent(PRE_EPISODE_STAGE, env)
|
||||
|
||||
# Episode
|
||||
ReCo.run_sim(dir; duration=episode_duration, seed=episode, env_helper=env_helper)
|
||||
ReCo.run_sim(
|
||||
dir;
|
||||
duration=episode_duration,
|
||||
seed=episode,
|
||||
env_helper=env_helper,
|
||||
show_progress=show_simulation_progress,
|
||||
)
|
||||
|
||||
env.shared.terminated = true
|
||||
|
||||
|
|
|
@ -1,3 +1,16 @@
|
|||
function minimizing_reward(value::Float64, max_value::Float64)
|
||||
return exp(-0.5 * (value / (max_value / 3))^2)
|
||||
end
|
||||
|
||||
function reward_normalization(env_helper::EnvHelper)
|
||||
return env_helper.shared.n_particles * env_helper.shared.n_actions_updates_per_episode
|
||||
end
|
||||
|
||||
function set_normalized_reward!(
|
||||
env::Env, unnormalized_reward::Float64, env_helper::EnvHelper
|
||||
)
|
||||
normalization = reward_normalizaion(env_helper)
|
||||
env.shared.reward = unnormalized_reward / normalization
|
||||
|
||||
return nothing
|
||||
end
|
Loading…
Reference in a new issue