1
0
Fork 0
mirror of https://gitlab.rlp.net/mobitar/ReCo.jl.git synced 2024-09-17 18:51:17 +00:00

Added reward normalization

This commit is contained in:
Mo8it 2022-01-30 03:20:45 +01:00
parent b5767f0104
commit 02739b7de6
6 changed files with 36 additions and 9 deletions

View file

@ -1,7 +1,7 @@
name = "ReCo"
uuid = "b25f7548-fcc9-4c91-bc24-841b54f4dd54"
authors = ["MoBit <mo8it@protonmail.com>"]
version = "0.3.0"
version = "0.4.0"
[deps]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
@ -22,6 +22,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
Luxor = "ae8d54c2-7ccd-5906-9d76-62fc9837b5bc"
MathTeXEngine = "0a4f8689-d25c-4efe-a92b-7142dfc1aa53"
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
ProfileView = "c46f51b8-102a-5cf2-8d2c-8597cb0e0da7"
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

View file

@ -6,6 +6,7 @@ struct EnvHelperSharedProps{H<:AbstractHook}
hook::H
n_steps_before_actions_update::Int64
n_actions_updates_per_episode::Int64
elliptical_a_b_ratio::Float64
@ -22,6 +23,7 @@ struct EnvHelperSharedProps{H<:AbstractHook}
agent::Agent,
hook::H,
n_steps_before_actions_update::Int64,
n_actions_updates_per_episode::Int64,
elliptical_a_b_ratio::Float64,
n_particles::Int64,
) where {H<:AbstractHook}
@ -30,6 +32,7 @@ struct EnvHelperSharedProps{H<:AbstractHook}
agent,
hook,
n_steps_before_actions_update,
n_actions_updates_per_episode,
elliptical_a_b_ratio,
n_particles,
fill(0, n_particles),

View file

@ -165,7 +165,6 @@ function update_reward!(
env_helper::LocalCOMWithAdditionalShapeRewardEnvHelper,
particle::ReCo.Particle,
)
normalization = env_helper.shared.n_particles # TODO: Add factor from steps
n_neighbours = env_helper.n_neighbours[particle.id]
if n_neighbours == 0
@ -191,7 +190,7 @@ function update_reward!(
)
end
env.shared.reward = reward / normalization
set_normalized_reward!(env, reward, env_helper)
end
return nothing

View file

@ -94,13 +94,10 @@ end
function update_reward!(
env::OriginEnv, env_helper::OriginEnvHelper, particle::ReCo.Particle
)
normalization = env_helper.shared.n_particles # TODO: Add factor from steps
reward = minimizing_reward(
env_helper.distances_to_origin[particle.id], env_helper.max_distance_to_origin
)
env.shared.reward = reward / normalization
set_normalized_reward!(env, reward, env_helper)
return nothing
end

View file

@ -70,6 +70,7 @@ function run_rl(;
packing_ratio::Float64=0.15,
show_progress::Bool=true,
reward_discount::Float64=1.0,
show_simulation_progress::Bool=true,
) where {E<:Env}
@assert 0.0 <= elliptical_a_b_ratio <= 1.0
@assert n_episodes > 0
@ -98,8 +99,15 @@ function run_rl(;
hook = TotalRewardPerEpisode()
n_actions_updates_per_episode = ceil(Int64, episode_duration / update_actions_at)
env_helper_shared = EnvHelperSharedProps(
env, agent, hook, n_steps_before_actions_update, elliptical_a_b_ratio, n_particles
env,
agent,
hook,
n_steps_before_actions_update,
n_actions_updates_per_episode,
elliptical_a_b_ratio,
n_particles,
)
env_helper_args = (
@ -127,7 +135,13 @@ function run_rl(;
agent(PRE_EPISODE_STAGE, env)
# Episode
ReCo.run_sim(dir; duration=episode_duration, seed=episode, env_helper=env_helper)
ReCo.run_sim(
dir;
duration=episode_duration,
seed=episode,
env_helper=env_helper,
show_progress=show_simulation_progress,
)
env.shared.terminated = true

View file

@ -1,3 +1,16 @@
function minimizing_reward(value::Float64, max_value::Float64)
return exp(-0.5 * (value / (max_value / 3))^2)
end
function reward_normalization(env_helper::EnvHelper)
return env_helper.shared.n_particles * env_helper.shared.n_actions_updates_per_episode
end
function set_normalized_reward!(
env::Env, unnormalized_reward::Float64, env_helper::EnvHelper
)
normalization = reward_normalizaion(env_helper)
env.shared.reward = unnormalized_reward / normalization
return nothing
end