mirror of
https://gitlab.rlp.net/mobitar/ReCo.jl.git
synced 2024-12-21 00:51:21 +00:00
RL circle around center of mass
This commit is contained in:
parent
775944c872
commit
6f774ea6d0
2 changed files with 59 additions and 34 deletions
91
src/RL.jl
91
src/RL.jl
|
@ -10,7 +10,7 @@ using LoopVectorization: @turbo
|
||||||
using Random: Random
|
using Random: Random
|
||||||
using ProgressMeter: @showprogress
|
using ProgressMeter: @showprogress
|
||||||
|
|
||||||
using ..ReCo: ReCo, Particle, angle2
|
using ..ReCo: ReCo, Particle, angle2, center_of_mass
|
||||||
|
|
||||||
const INITIAL_REWARD = 0.0
|
const INITIAL_REWARD = 0.0
|
||||||
|
|
||||||
|
@ -30,15 +30,17 @@ mutable struct Env <: AbstractEnv
|
||||||
reward::Float64
|
reward::Float64
|
||||||
terminated::Bool
|
terminated::Bool
|
||||||
|
|
||||||
|
center_of_mass::SVector{2,Float64}
|
||||||
|
|
||||||
function Env(
|
function Env(
|
||||||
min_distance::Float64,
|
min_distance::Float64,
|
||||||
max_distance::Float64;
|
max_distance::Float64;
|
||||||
n_v_actions::Int64=2,
|
n_v_actions::Int64=3,
|
||||||
n_ω_actions::Int64=3,
|
n_ω_actions::Int64=5,
|
||||||
max_v::Float64=80.0,
|
max_v::Float64=60.0,
|
||||||
max_ω::Float64=π / 2,
|
max_ω::Float64=π / 2,
|
||||||
n_distance_states::Int64=2,
|
n_distance_states::Int64=3,
|
||||||
n_angle_states::Int64=2,
|
n_angle_states::Int64=4,
|
||||||
)
|
)
|
||||||
@assert min_distance > 0.0
|
@assert min_distance > 0.0
|
||||||
@assert max_distance > min_distance
|
@assert max_distance > min_distance
|
||||||
|
@ -127,6 +129,7 @@ mutable struct Env <: AbstractEnv
|
||||||
initial_state_ind,
|
initial_state_ind,
|
||||||
INITIAL_REWARD,
|
INITIAL_REWARD,
|
||||||
false,
|
false,
|
||||||
|
SVector(0.0, 0.0),
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -168,6 +171,9 @@ struct Params{H<:AbstractHook}
|
||||||
min_sq_distances::Vector{Float64}
|
min_sq_distances::Vector{Float64}
|
||||||
vecs_r⃗₁₂_to_min_distance_particle::Vector{SVector{2,Float64}}
|
vecs_r⃗₁₂_to_min_distance_particle::Vector{SVector{2,Float64}}
|
||||||
|
|
||||||
|
half_box_len::Float64
|
||||||
|
max_elliptic_distance::Float64
|
||||||
|
|
||||||
function Params(
|
function Params(
|
||||||
env::Env,
|
env::Env,
|
||||||
agent::Agent,
|
agent::Agent,
|
||||||
|
@ -175,7 +181,10 @@ struct Params{H<:AbstractHook}
|
||||||
n_steps_before_actions_update::Int64,
|
n_steps_before_actions_update::Int64,
|
||||||
goal_shape_ratio::Float64,
|
goal_shape_ratio::Float64,
|
||||||
n_particles::Int64,
|
n_particles::Int64,
|
||||||
|
half_box_len::Float64,
|
||||||
) where {H<:AbstractHook}
|
) where {H<:AbstractHook}
|
||||||
|
max_elliptic_distance = sqrt(2) * half_box_len
|
||||||
|
|
||||||
n_states = env.n_states
|
n_states = env.n_states
|
||||||
|
|
||||||
return new{H}(
|
return new{H}(
|
||||||
|
@ -191,6 +200,8 @@ struct Params{H<:AbstractHook}
|
||||||
n_particles,
|
n_particles,
|
||||||
fill(Inf64, n_particles),
|
fill(Inf64, n_particles),
|
||||||
fill(SVector(0.0, 0.0), n_particles),
|
fill(SVector(0.0, 0.0), n_particles),
|
||||||
|
half_box_len,
|
||||||
|
max_elliptic_distance,
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -225,11 +236,9 @@ function get_state_ind(state::S, state_space::Vector{S}) where {S<:SVector{2,Int
|
||||||
return findfirst(x -> x == state, state_space)
|
return findfirst(x -> x == state, state_space)
|
||||||
end
|
end
|
||||||
|
|
||||||
function state_update_hook(
|
function state_update_hook(rl_params::Params, particles::Vector{Particle})
|
||||||
rl_params::Params, particles::Vector{Particle}, n_particles::Int64
|
@turbo for id in 1:(rl_params.n_particles)
|
||||||
)
|
rl_params.old_states_ind[id] = rl_params.states_ind[id]
|
||||||
@turbo for i in 1:n_particles
|
|
||||||
rl_params.old_states_ind[i] = rl_params.states_ind[i]
|
|
||||||
end
|
end
|
||||||
|
|
||||||
env = rl_params.env
|
env = rl_params.env
|
||||||
|
@ -240,10 +249,10 @@ function state_update_hook(
|
||||||
|
|
||||||
state_space = env.state_space
|
state_space = env.state_space
|
||||||
|
|
||||||
for i in 1:n_particles
|
for id in 1:(rl_params.n_particles)
|
||||||
env_distance_state::Union{Interval,Nothing} = nothing
|
env_distance_state::Union{Interval,Nothing} = nothing
|
||||||
|
|
||||||
min_sq_distance = rl_params.min_sq_distances[i]
|
min_sq_distance = rl_params.min_sq_distances[id]
|
||||||
min_distance = sqrt(min_sq_distance)
|
min_distance = sqrt(min_sq_distance)
|
||||||
|
|
||||||
if !isinf(min_sq_distance)
|
if !isinf(min_sq_distance)
|
||||||
|
@ -259,8 +268,8 @@ function state_update_hook(
|
||||||
state_ind = n_states
|
state_ind = n_states
|
||||||
|
|
||||||
if !isnothing(env_distance_state)
|
if !isnothing(env_distance_state)
|
||||||
r⃗₁₂ = rl_params.vecs_r⃗₁₂_to_min_distance_particle[i]
|
r⃗₁₂ = rl_params.vecs_r⃗₁₂_to_min_distance_particle[id]
|
||||||
si, co = sincos(particles[i].φ)
|
si, co = sincos(particles[id].φ)
|
||||||
|
|
||||||
#=
|
#=
|
||||||
Angle between two vectors
|
Angle between two vectors
|
||||||
|
@ -284,9 +293,11 @@ function state_update_hook(
|
||||||
state_ind = get_state_ind(state, state_space)
|
state_ind = get_state_ind(state, state_space)
|
||||||
end
|
end
|
||||||
|
|
||||||
rl_params.states_ind[i] = state_ind
|
rl_params.states_ind[id] = state_ind
|
||||||
end
|
end
|
||||||
|
|
||||||
|
env.center_of_mass = center_of_mass(particles, rl_params.half_box_len)
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -315,15 +326,13 @@ function update_table_and_actions_hook(
|
||||||
env.state_ind = rl_params.states_ind[id]
|
env.state_ind = rl_params.states_ind[id]
|
||||||
|
|
||||||
# Update reward
|
# Update reward
|
||||||
env.reward = -(particle.c[1]^2 + particle.c[2]^2)
|
vec_to_center_of_mass = ReCo.minimum_image(
|
||||||
|
particle.c - env.center_of_mass, rl_params.half_box_len
|
||||||
|
)
|
||||||
|
|
||||||
#=
|
env.reward =
|
||||||
1 -
|
-(vec_to_center_of_mass[1]^2 + vec_to_center_of_mass[2]^2) /
|
||||||
(
|
rl_params.max_elliptic_distance / rl_params.n_particles
|
||||||
ReCo.gyration_tensor_eigvals_ratio(particles, half_box_len) -
|
|
||||||
rl_params.goal_shape_ratio
|
|
||||||
)^2
|
|
||||||
=#
|
|
||||||
|
|
||||||
# Post act
|
# Post act
|
||||||
agent(POST_ACT_STAGE, env)
|
agent(POST_ACT_STAGE, env)
|
||||||
|
@ -336,6 +345,7 @@ function update_table_and_actions_hook(
|
||||||
|
|
||||||
rl_params.actions[id] = action
|
rl_params.actions[id] = action
|
||||||
rl_params.actions_ind[id] = action_ind
|
rl_params.actions_ind[id] = action_ind
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -354,14 +364,14 @@ function act_hook(
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function gen_agent(n_states::Int64, n_actions::Int64)
|
function gen_agent(n_states::Int64, n_actions::Int64, ϵ::Float64)
|
||||||
policy = QBasedPolicy(;
|
policy = QBasedPolicy(;
|
||||||
learner=MonteCarloLearner(;
|
learner=MonteCarloLearner(;
|
||||||
approximator=TabularQApproximator(;
|
approximator=TabularQApproximator(;
|
||||||
n_state=n_states, n_action=n_actions, opt=InvDecay(1.0)
|
n_state=n_states, n_action=n_actions, opt=InvDecay(1.0)
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
explorer=EpsilonGreedyExplorer(0.1),
|
explorer=EpsilonGreedyExplorer(ϵ),
|
||||||
)
|
)
|
||||||
|
|
||||||
return Agent(; policy=policy, trajectory=VectorSARTTrajectory())
|
return Agent(; policy=policy, trajectory=VectorSARTTrajectory())
|
||||||
|
@ -369,42 +379,55 @@ end
|
||||||
|
|
||||||
function run_rl(;
|
function run_rl(;
|
||||||
goal_shape_ratio::Float64,
|
goal_shape_ratio::Float64,
|
||||||
n_episodes::Int64=100,
|
n_episodes::Int64=200,
|
||||||
episode_duration::Float64=50.0,
|
episode_duration::Float64=50.0,
|
||||||
update_actions_at::Float64=0.2,
|
update_actions_at::Float64=0.1,
|
||||||
n_particles::Int64=100,
|
n_particles::Int64=100,
|
||||||
seed::Int64=42,
|
seed::Int64=42,
|
||||||
|
ϵ::Float64=0.01,
|
||||||
|
parent_dir::String="",
|
||||||
)
|
)
|
||||||
@assert 0.0 <= goal_shape_ratio <= 1.0
|
@assert 0.0 <= goal_shape_ratio <= 1.0
|
||||||
@assert n_episodes > 0
|
@assert n_episodes > 0
|
||||||
@assert episode_duration > 0
|
@assert episode_duration > 0
|
||||||
@assert update_actions_at in 0.01:0.01:episode_duration
|
@assert update_actions_at in 0.001:0.001:episode_duration
|
||||||
@assert n_particles > 0
|
@assert n_particles > 0
|
||||||
|
@assert 0.0 < ϵ < 1.0
|
||||||
|
|
||||||
# Setup
|
# Setup
|
||||||
Random.seed!(seed)
|
Random.seed!(seed)
|
||||||
|
|
||||||
sim_consts = ReCo.gen_sim_consts(n_particles, 0.0; skin_to_interaction_r_ratio=1.6)
|
sim_consts = ReCo.gen_sim_consts(
|
||||||
|
n_particles, 0.0; skin_to_interaction_r_ratio=1.8, packing_ratio=0.15
|
||||||
|
)
|
||||||
n_particles = sim_consts.n_particles
|
n_particles = sim_consts.n_particles
|
||||||
|
|
||||||
env = Env(sim_consts.particle_radius, sim_consts.skin_r)
|
env = Env(sim_consts.particle_radius, sim_consts.skin_r)
|
||||||
|
|
||||||
agent = gen_agent(env.n_states, env.n_actions)
|
agent = gen_agent(env.n_states, env.n_actions, ϵ)
|
||||||
|
|
||||||
n_steps_before_actions_update = round(Int64, update_actions_at / sim_consts.δt)
|
n_steps_before_actions_update = round(Int64, update_actions_at / sim_consts.δt)
|
||||||
|
|
||||||
hook = TotalRewardPerEpisode()
|
hook = TotalRewardPerEpisode()
|
||||||
|
|
||||||
rl_params = Params(
|
rl_params = Params(
|
||||||
env, agent, hook, n_steps_before_actions_update, goal_shape_ratio, n_particles
|
env,
|
||||||
|
agent,
|
||||||
|
hook,
|
||||||
|
n_steps_before_actions_update,
|
||||||
|
goal_shape_ratio,
|
||||||
|
n_particles,
|
||||||
|
sim_consts.half_box_len,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parent_dir = "RL" * parent_dir
|
||||||
|
|
||||||
# Pre experiment
|
# Pre experiment
|
||||||
hook(PRE_EXPERIMENT_STAGE, agent, env)
|
hook(PRE_EXPERIMENT_STAGE, agent, env)
|
||||||
agent(PRE_EXPERIMENT_STAGE, env)
|
agent(PRE_EXPERIMENT_STAGE, env)
|
||||||
|
|
||||||
@showprogress 0.6 for episode in 1:n_episodes
|
@showprogress 0.6 for episode in 1:n_episodes
|
||||||
dir = ReCo.init_sim_with_sim_consts(sim_consts; parent_dir="RL")
|
dir = ReCo.init_sim_with_sim_consts(sim_consts; parent_dir=parent_dir)
|
||||||
|
|
||||||
# Reset
|
# Reset
|
||||||
reset!(env)
|
reset!(env)
|
||||||
|
@ -423,6 +446,8 @@ function run_rl(;
|
||||||
# Post episode
|
# Post episode
|
||||||
hook(POST_EPISODE_STAGE, agent, env)
|
hook(POST_EPISODE_STAGE, agent, env)
|
||||||
agent(POST_EPISODE_STAGE, env)
|
agent(POST_EPISODE_STAGE, env)
|
||||||
|
|
||||||
|
display(hook.rewards)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Post experiment
|
# Post experiment
|
||||||
|
|
|
@ -64,7 +64,7 @@ function euler!(
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
state_update_hook(rl_params, args.particles, args.n_particles)
|
state_update_hook(rl_params, args.particles)
|
||||||
|
|
||||||
@simd for p in args.particles
|
@simd for p in args.particles
|
||||||
si, co = sincos(p.φ)
|
si, co = sincos(p.φ)
|
||||||
|
|
Loading…
Reference in a new issue