mirror of
https://gitlab.rlp.net/mobitar/ReCo.jl.git
synced 2024-12-21 00:51:21 +00:00
RL circle around center of mass
This commit is contained in:
parent
775944c872
commit
6f774ea6d0
2 changed files with 59 additions and 34 deletions
91
src/RL.jl
91
src/RL.jl
|
@ -10,7 +10,7 @@ using LoopVectorization: @turbo
|
|||
using Random: Random
|
||||
using ProgressMeter: @showprogress
|
||||
|
||||
using ..ReCo: ReCo, Particle, angle2
|
||||
using ..ReCo: ReCo, Particle, angle2, center_of_mass
|
||||
|
||||
const INITIAL_REWARD = 0.0
|
||||
|
||||
|
@ -30,15 +30,17 @@ mutable struct Env <: AbstractEnv
|
|||
reward::Float64
|
||||
terminated::Bool
|
||||
|
||||
center_of_mass::SVector{2,Float64}
|
||||
|
||||
function Env(
|
||||
min_distance::Float64,
|
||||
max_distance::Float64;
|
||||
n_v_actions::Int64=2,
|
||||
n_ω_actions::Int64=3,
|
||||
max_v::Float64=80.0,
|
||||
n_v_actions::Int64=3,
|
||||
n_ω_actions::Int64=5,
|
||||
max_v::Float64=60.0,
|
||||
max_ω::Float64=π / 2,
|
||||
n_distance_states::Int64=2,
|
||||
n_angle_states::Int64=2,
|
||||
n_distance_states::Int64=3,
|
||||
n_angle_states::Int64=4,
|
||||
)
|
||||
@assert min_distance > 0.0
|
||||
@assert max_distance > min_distance
|
||||
|
@ -127,6 +129,7 @@ mutable struct Env <: AbstractEnv
|
|||
initial_state_ind,
|
||||
INITIAL_REWARD,
|
||||
false,
|
||||
SVector(0.0, 0.0),
|
||||
)
|
||||
end
|
||||
end
|
||||
|
@ -168,6 +171,9 @@ struct Params{H<:AbstractHook}
|
|||
min_sq_distances::Vector{Float64}
|
||||
vecs_r⃗₁₂_to_min_distance_particle::Vector{SVector{2,Float64}}
|
||||
|
||||
half_box_len::Float64
|
||||
max_elliptic_distance::Float64
|
||||
|
||||
function Params(
|
||||
env::Env,
|
||||
agent::Agent,
|
||||
|
@ -175,7 +181,10 @@ struct Params{H<:AbstractHook}
|
|||
n_steps_before_actions_update::Int64,
|
||||
goal_shape_ratio::Float64,
|
||||
n_particles::Int64,
|
||||
half_box_len::Float64,
|
||||
) where {H<:AbstractHook}
|
||||
max_elliptic_distance = sqrt(2) * half_box_len
|
||||
|
||||
n_states = env.n_states
|
||||
|
||||
return new{H}(
|
||||
|
@ -191,6 +200,8 @@ struct Params{H<:AbstractHook}
|
|||
n_particles,
|
||||
fill(Inf64, n_particles),
|
||||
fill(SVector(0.0, 0.0), n_particles),
|
||||
half_box_len,
|
||||
max_elliptic_distance,
|
||||
)
|
||||
end
|
||||
end
|
||||
|
@ -225,11 +236,9 @@ function get_state_ind(state::S, state_space::Vector{S}) where {S<:SVector{2,Int
|
|||
return findfirst(x -> x == state, state_space)
|
||||
end
|
||||
|
||||
function state_update_hook(
|
||||
rl_params::Params, particles::Vector{Particle}, n_particles::Int64
|
||||
)
|
||||
@turbo for i in 1:n_particles
|
||||
rl_params.old_states_ind[i] = rl_params.states_ind[i]
|
||||
function state_update_hook(rl_params::Params, particles::Vector{Particle})
|
||||
@turbo for id in 1:(rl_params.n_particles)
|
||||
rl_params.old_states_ind[id] = rl_params.states_ind[id]
|
||||
end
|
||||
|
||||
env = rl_params.env
|
||||
|
@ -240,10 +249,10 @@ function state_update_hook(
|
|||
|
||||
state_space = env.state_space
|
||||
|
||||
for i in 1:n_particles
|
||||
for id in 1:(rl_params.n_particles)
|
||||
env_distance_state::Union{Interval,Nothing} = nothing
|
||||
|
||||
min_sq_distance = rl_params.min_sq_distances[i]
|
||||
min_sq_distance = rl_params.min_sq_distances[id]
|
||||
min_distance = sqrt(min_sq_distance)
|
||||
|
||||
if !isinf(min_sq_distance)
|
||||
|
@ -259,8 +268,8 @@ function state_update_hook(
|
|||
state_ind = n_states
|
||||
|
||||
if !isnothing(env_distance_state)
|
||||
r⃗₁₂ = rl_params.vecs_r⃗₁₂_to_min_distance_particle[i]
|
||||
si, co = sincos(particles[i].φ)
|
||||
r⃗₁₂ = rl_params.vecs_r⃗₁₂_to_min_distance_particle[id]
|
||||
si, co = sincos(particles[id].φ)
|
||||
|
||||
#=
|
||||
Angle between two vectors
|
||||
|
@ -284,9 +293,11 @@ function state_update_hook(
|
|||
state_ind = get_state_ind(state, state_space)
|
||||
end
|
||||
|
||||
rl_params.states_ind[i] = state_ind
|
||||
rl_params.states_ind[id] = state_ind
|
||||
end
|
||||
|
||||
env.center_of_mass = center_of_mass(particles, rl_params.half_box_len)
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
@ -315,15 +326,13 @@ function update_table_and_actions_hook(
|
|||
env.state_ind = rl_params.states_ind[id]
|
||||
|
||||
# Update reward
|
||||
env.reward = -(particle.c[1]^2 + particle.c[2]^2)
|
||||
vec_to_center_of_mass = ReCo.minimum_image(
|
||||
particle.c - env.center_of_mass, rl_params.half_box_len
|
||||
)
|
||||
|
||||
#=
|
||||
1 -
|
||||
(
|
||||
ReCo.gyration_tensor_eigvals_ratio(particles, half_box_len) -
|
||||
rl_params.goal_shape_ratio
|
||||
)^2
|
||||
=#
|
||||
env.reward =
|
||||
-(vec_to_center_of_mass[1]^2 + vec_to_center_of_mass[2]^2) /
|
||||
rl_params.max_elliptic_distance / rl_params.n_particles
|
||||
|
||||
# Post act
|
||||
agent(POST_ACT_STAGE, env)
|
||||
|
@ -336,6 +345,7 @@ function update_table_and_actions_hook(
|
|||
|
||||
rl_params.actions[id] = action
|
||||
rl_params.actions_ind[id] = action_ind
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
@ -354,14 +364,14 @@ function act_hook(
|
|||
return nothing
|
||||
end
|
||||
|
||||
function gen_agent(n_states::Int64, n_actions::Int64)
|
||||
function gen_agent(n_states::Int64, n_actions::Int64, ϵ::Float64)
|
||||
policy = QBasedPolicy(;
|
||||
learner=MonteCarloLearner(;
|
||||
approximator=TabularQApproximator(;
|
||||
n_state=n_states, n_action=n_actions, opt=InvDecay(1.0)
|
||||
),
|
||||
),
|
||||
explorer=EpsilonGreedyExplorer(0.1),
|
||||
explorer=EpsilonGreedyExplorer(ϵ),
|
||||
)
|
||||
|
||||
return Agent(; policy=policy, trajectory=VectorSARTTrajectory())
|
||||
|
@ -369,42 +379,55 @@ end
|
|||
|
||||
function run_rl(;
|
||||
goal_shape_ratio::Float64,
|
||||
n_episodes::Int64=100,
|
||||
n_episodes::Int64=200,
|
||||
episode_duration::Float64=50.0,
|
||||
update_actions_at::Float64=0.2,
|
||||
update_actions_at::Float64=0.1,
|
||||
n_particles::Int64=100,
|
||||
seed::Int64=42,
|
||||
ϵ::Float64=0.01,
|
||||
parent_dir::String="",
|
||||
)
|
||||
@assert 0.0 <= goal_shape_ratio <= 1.0
|
||||
@assert n_episodes > 0
|
||||
@assert episode_duration > 0
|
||||
@assert update_actions_at in 0.01:0.01:episode_duration
|
||||
@assert update_actions_at in 0.001:0.001:episode_duration
|
||||
@assert n_particles > 0
|
||||
@assert 0.0 < ϵ < 1.0
|
||||
|
||||
# Setup
|
||||
Random.seed!(seed)
|
||||
|
||||
sim_consts = ReCo.gen_sim_consts(n_particles, 0.0; skin_to_interaction_r_ratio=1.6)
|
||||
sim_consts = ReCo.gen_sim_consts(
|
||||
n_particles, 0.0; skin_to_interaction_r_ratio=1.8, packing_ratio=0.15
|
||||
)
|
||||
n_particles = sim_consts.n_particles
|
||||
|
||||
env = Env(sim_consts.particle_radius, sim_consts.skin_r)
|
||||
|
||||
agent = gen_agent(env.n_states, env.n_actions)
|
||||
agent = gen_agent(env.n_states, env.n_actions, ϵ)
|
||||
|
||||
n_steps_before_actions_update = round(Int64, update_actions_at / sim_consts.δt)
|
||||
|
||||
hook = TotalRewardPerEpisode()
|
||||
|
||||
rl_params = Params(
|
||||
env, agent, hook, n_steps_before_actions_update, goal_shape_ratio, n_particles
|
||||
env,
|
||||
agent,
|
||||
hook,
|
||||
n_steps_before_actions_update,
|
||||
goal_shape_ratio,
|
||||
n_particles,
|
||||
sim_consts.half_box_len,
|
||||
)
|
||||
|
||||
parent_dir = "RL" * parent_dir
|
||||
|
||||
# Pre experiment
|
||||
hook(PRE_EXPERIMENT_STAGE, agent, env)
|
||||
agent(PRE_EXPERIMENT_STAGE, env)
|
||||
|
||||
@showprogress 0.6 for episode in 1:n_episodes
|
||||
dir = ReCo.init_sim_with_sim_consts(sim_consts; parent_dir="RL")
|
||||
dir = ReCo.init_sim_with_sim_consts(sim_consts; parent_dir=parent_dir)
|
||||
|
||||
# Reset
|
||||
reset!(env)
|
||||
|
@ -423,6 +446,8 @@ function run_rl(;
|
|||
# Post episode
|
||||
hook(POST_EPISODE_STAGE, agent, env)
|
||||
agent(POST_EPISODE_STAGE, env)
|
||||
|
||||
display(hook.rewards)
|
||||
end
|
||||
|
||||
# Post experiment
|
||||
|
|
|
@ -64,7 +64,7 @@ function euler!(
|
|||
end
|
||||
end
|
||||
|
||||
state_update_hook(rl_params, args.particles, args.n_particles)
|
||||
state_update_hook(rl_params, args.particles)
|
||||
|
||||
@simd for p in args.particles
|
||||
si, co = sincos(p.φ)
|
||||
|
|
Loading…
Reference in a new issue