diff --git a/src/RL.jl b/src/RL.jl index f889f5e..bfa17f0 100644 --- a/src/RL.jl +++ b/src/RL.jl @@ -10,7 +10,7 @@ using LoopVectorization: @turbo using Random: Random using ProgressMeter: @showprogress -using ..ReCo: ReCo, Particle, angle2 +using ..ReCo: ReCo, Particle, angle2, center_of_mass const INITIAL_REWARD = 0.0 @@ -30,15 +30,17 @@ mutable struct Env <: AbstractEnv reward::Float64 terminated::Bool + center_of_mass::SVector{2,Float64} + function Env( min_distance::Float64, max_distance::Float64; - n_v_actions::Int64=2, - n_ω_actions::Int64=3, - max_v::Float64=80.0, + n_v_actions::Int64=3, + n_ω_actions::Int64=5, + max_v::Float64=60.0, max_ω::Float64=π / 2, - n_distance_states::Int64=2, - n_angle_states::Int64=2, + n_distance_states::Int64=3, + n_angle_states::Int64=4, ) @assert min_distance > 0.0 @assert max_distance > min_distance @@ -127,6 +129,7 @@ mutable struct Env <: AbstractEnv initial_state_ind, INITIAL_REWARD, false, + SVector(0.0, 0.0), ) end end @@ -168,6 +171,9 @@ struct Params{H<:AbstractHook} min_sq_distances::Vector{Float64} vecs_r⃗₁₂_to_min_distance_particle::Vector{SVector{2,Float64}} + half_box_len::Float64 + max_elliptic_distance::Float64 + function Params( env::Env, agent::Agent, @@ -175,7 +181,10 @@ struct Params{H<:AbstractHook} n_steps_before_actions_update::Int64, goal_shape_ratio::Float64, n_particles::Int64, + half_box_len::Float64, ) where {H<:AbstractHook} + max_elliptic_distance = sqrt(2) * half_box_len + n_states = env.n_states return new{H}( @@ -191,6 +200,8 @@ struct Params{H<:AbstractHook} n_particles, fill(Inf64, n_particles), fill(SVector(0.0, 0.0), n_particles), + half_box_len, + max_elliptic_distance, ) end end @@ -225,11 +236,9 @@ function get_state_ind(state::S, state_space::Vector{S}) where {S<:SVector{2,Int return findfirst(x -> x == state, state_space) end -function state_update_hook( - rl_params::Params, particles::Vector{Particle}, n_particles::Int64 -) - @turbo for i in 1:n_particles - rl_params.old_states_ind[i] = rl_params.states_ind[i] +function state_update_hook(rl_params::Params, particles::Vector{Particle}) + @turbo for id in 1:(rl_params.n_particles) + rl_params.old_states_ind[id] = rl_params.states_ind[id] end env = rl_params.env @@ -240,10 +249,10 @@ function state_update_hook( state_space = env.state_space - for i in 1:n_particles + for id in 1:(rl_params.n_particles) env_distance_state::Union{Interval,Nothing} = nothing - min_sq_distance = rl_params.min_sq_distances[i] + min_sq_distance = rl_params.min_sq_distances[id] min_distance = sqrt(min_sq_distance) if !isinf(min_sq_distance) @@ -259,8 +268,8 @@ function state_update_hook( state_ind = n_states if !isnothing(env_distance_state) - r⃗₁₂ = rl_params.vecs_r⃗₁₂_to_min_distance_particle[i] - si, co = sincos(particles[i].φ) + r⃗₁₂ = rl_params.vecs_r⃗₁₂_to_min_distance_particle[id] + si, co = sincos(particles[id].φ) #= Angle between two vectors @@ -284,9 +293,11 @@ function state_update_hook( state_ind = get_state_ind(state, state_space) end - rl_params.states_ind[i] = state_ind + rl_params.states_ind[id] = state_ind end + env.center_of_mass = center_of_mass(particles, rl_params.half_box_len) + return nothing end @@ -315,15 +326,13 @@ function update_table_and_actions_hook( env.state_ind = rl_params.states_ind[id] # Update reward - env.reward = -(particle.c[1]^2 + particle.c[2]^2) + vec_to_center_of_mass = ReCo.minimum_image( + particle.c - env.center_of_mass, rl_params.half_box_len + ) - #= - 1 - - ( - ReCo.gyration_tensor_eigvals_ratio(particles, half_box_len) - - rl_params.goal_shape_ratio - )^2 - =# + env.reward = + -(vec_to_center_of_mass[1]^2 + vec_to_center_of_mass[2]^2) / + rl_params.max_elliptic_distance / rl_params.n_particles # Post act agent(POST_ACT_STAGE, env) @@ -336,6 +345,7 @@ function update_table_and_actions_hook( rl_params.actions[id] = action rl_params.actions_ind[id] = action_ind + return nothing end @@ -354,14 +364,14 @@ function act_hook( return nothing end -function gen_agent(n_states::Int64, n_actions::Int64) +function gen_agent(n_states::Int64, n_actions::Int64, ϵ::Float64) policy = QBasedPolicy(; learner=MonteCarloLearner(; approximator=TabularQApproximator(; n_state=n_states, n_action=n_actions, opt=InvDecay(1.0) ), ), - explorer=EpsilonGreedyExplorer(0.1), + explorer=EpsilonGreedyExplorer(ϵ), ) return Agent(; policy=policy, trajectory=VectorSARTTrajectory()) @@ -369,42 +379,55 @@ end function run_rl(; goal_shape_ratio::Float64, - n_episodes::Int64=100, + n_episodes::Int64=200, episode_duration::Float64=50.0, - update_actions_at::Float64=0.2, + update_actions_at::Float64=0.1, n_particles::Int64=100, seed::Int64=42, + ϵ::Float64=0.01, + parent_dir::String="", ) @assert 0.0 <= goal_shape_ratio <= 1.0 @assert n_episodes > 0 @assert episode_duration > 0 - @assert update_actions_at in 0.01:0.01:episode_duration + @assert update_actions_at in 0.001:0.001:episode_duration @assert n_particles > 0 + @assert 0.0 < ϵ < 1.0 # Setup Random.seed!(seed) - sim_consts = ReCo.gen_sim_consts(n_particles, 0.0; skin_to_interaction_r_ratio=1.6) + sim_consts = ReCo.gen_sim_consts( + n_particles, 0.0; skin_to_interaction_r_ratio=1.8, packing_ratio=0.15 + ) n_particles = sim_consts.n_particles env = Env(sim_consts.particle_radius, sim_consts.skin_r) - agent = gen_agent(env.n_states, env.n_actions) + agent = gen_agent(env.n_states, env.n_actions, ϵ) n_steps_before_actions_update = round(Int64, update_actions_at / sim_consts.δt) hook = TotalRewardPerEpisode() rl_params = Params( - env, agent, hook, n_steps_before_actions_update, goal_shape_ratio, n_particles + env, + agent, + hook, + n_steps_before_actions_update, + goal_shape_ratio, + n_particles, + sim_consts.half_box_len, ) + parent_dir = "RL" * parent_dir + # Pre experiment hook(PRE_EXPERIMENT_STAGE, agent, env) agent(PRE_EXPERIMENT_STAGE, env) @showprogress 0.6 for episode in 1:n_episodes - dir = ReCo.init_sim_with_sim_consts(sim_consts; parent_dir="RL") + dir = ReCo.init_sim_with_sim_consts(sim_consts; parent_dir=parent_dir) # Reset reset!(env) @@ -423,6 +446,8 @@ function run_rl(; # Post episode hook(POST_EPISODE_STAGE, agent, env) agent(POST_EPISODE_STAGE, env) + + display(hook.rewards) end # Post experiment diff --git a/src/simulation.jl b/src/simulation.jl index 95d7cbb..4f142ff 100644 --- a/src/simulation.jl +++ b/src/simulation.jl @@ -64,7 +64,7 @@ function euler!( end end - state_update_hook(rl_params, args.particles, args.n_particles) + state_update_hook(rl_params, args.particles) @simd for p in args.particles si, co = sincos(p.φ)