diff --git a/Project.toml b/Project.toml index 4c3d134..f4f96bc 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ReCo" uuid = "b25f7548-fcc9-4c91-bc24-841b54f4dd54" authors = ["MoBit "] -version = "0.3.0" +version = "0.4.0" [deps] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" @@ -22,6 +22,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" Luxor = "ae8d54c2-7ccd-5906-9d76-62fc9837b5bc" MathTeXEngine = "0a4f8689-d25c-4efe-a92b-7142dfc1aa53" +PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" ProfileView = "c46f51b8-102a-5cf2-8d2c-8597cb0e0da7" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" diff --git a/src/RL/EnvHelper.jl b/src/RL/EnvHelper.jl index 75d52b6..f43511d 100644 --- a/src/RL/EnvHelper.jl +++ b/src/RL/EnvHelper.jl @@ -6,6 +6,7 @@ struct EnvHelperSharedProps{H<:AbstractHook} hook::H n_steps_before_actions_update::Int64 + n_actions_updates_per_episode::Int64 elliptical_a_b_ratio::Float64 @@ -22,6 +23,7 @@ struct EnvHelperSharedProps{H<:AbstractHook} agent::Agent, hook::H, n_steps_before_actions_update::Int64, + n_actions_updates_per_episode::Int64, elliptical_a_b_ratio::Float64, n_particles::Int64, ) where {H<:AbstractHook} @@ -30,6 +32,7 @@ struct EnvHelperSharedProps{H<:AbstractHook} agent, hook, n_steps_before_actions_update, + n_actions_updates_per_episode, elliptical_a_b_ratio, n_particles, fill(0, n_particles), diff --git a/src/RL/Envs/LocalCOMWithAdditionalShapeRewardEnv.jl b/src/RL/Envs/LocalCOMWithAdditionalShapeRewardEnv.jl index 25a5c8c..5b315ba 100644 --- a/src/RL/Envs/LocalCOMWithAdditionalShapeRewardEnv.jl +++ b/src/RL/Envs/LocalCOMWithAdditionalShapeRewardEnv.jl @@ -165,7 +165,6 @@ function update_reward!( env_helper::LocalCOMWithAdditionalShapeRewardEnvHelper, particle::ReCo.Particle, ) - normalization = env_helper.shared.n_particles # TODO: Add factor from steps n_neighbours = env_helper.n_neighbours[particle.id] if n_neighbours == 0 @@ -191,7 +190,7 @@ function update_reward!( ) end - env.shared.reward = reward / normalization + set_normalized_reward!(env, reward, env_helper) end return nothing diff --git a/src/RL/Envs/OriginEnv.jl b/src/RL/Envs/OriginEnv.jl index 4a12e72..f75983a 100644 --- a/src/RL/Envs/OriginEnv.jl +++ b/src/RL/Envs/OriginEnv.jl @@ -94,13 +94,10 @@ end function update_reward!( env::OriginEnv, env_helper::OriginEnvHelper, particle::ReCo.Particle ) - normalization = env_helper.shared.n_particles # TODO: Add factor from steps - reward = minimizing_reward( env_helper.distances_to_origin[particle.id], env_helper.max_distance_to_origin ) - - env.shared.reward = reward / normalization + set_normalized_reward!(env, reward, env_helper) return nothing end \ No newline at end of file diff --git a/src/RL/RL.jl b/src/RL/RL.jl index f3325b4..ee220d5 100644 --- a/src/RL/RL.jl +++ b/src/RL/RL.jl @@ -70,6 +70,7 @@ function run_rl(; packing_ratio::Float64=0.15, show_progress::Bool=true, reward_discount::Float64=1.0, + show_simulation_progress::Bool=true, ) where {E<:Env} @assert 0.0 <= elliptical_a_b_ratio <= 1.0 @assert n_episodes > 0 @@ -98,8 +99,15 @@ function run_rl(; hook = TotalRewardPerEpisode() + n_actions_updates_per_episode = ceil(Int64, episode_duration / update_actions_at) env_helper_shared = EnvHelperSharedProps( - env, agent, hook, n_steps_before_actions_update, elliptical_a_b_ratio, n_particles + env, + agent, + hook, + n_steps_before_actions_update, + n_actions_updates_per_episode, + elliptical_a_b_ratio, + n_particles, ) env_helper_args = ( @@ -127,7 +135,13 @@ function run_rl(; agent(PRE_EPISODE_STAGE, env) # Episode - ReCo.run_sim(dir; duration=episode_duration, seed=episode, env_helper=env_helper) + ReCo.run_sim( + dir; + duration=episode_duration, + seed=episode, + env_helper=env_helper, + show_progress=show_simulation_progress, + ) env.shared.terminated = true diff --git a/src/RL/Reward.jl b/src/RL/Reward.jl index a8552c6..385e009 100644 --- a/src/RL/Reward.jl +++ b/src/RL/Reward.jl @@ -1,3 +1,16 @@ function minimizing_reward(value::Float64, max_value::Float64) return exp(-0.5 * (value / (max_value / 3))^2) +end + +function reward_normalization(env_helper::EnvHelper) + return env_helper.shared.n_particles * env_helper.shared.n_actions_updates_per_episode +end + +function set_normalized_reward!( + env::Env, unnormalized_reward::Float64, env_helper::EnvHelper +) + normalization = reward_normalizaion(env_helper) + env.shared.reward = unnormalized_reward / normalization + + return nothing end \ No newline at end of file