1
0
Fork 0
mirror of https://gitlab.rlp.net/mobitar/ReCo.jl.git synced 2024-12-21 00:51:21 +00:00

RL circle around center of mass

This commit is contained in:
Mo8it 2021-12-28 17:15:00 +01:00
parent 775944c872
commit 6f774ea6d0
2 changed files with 59 additions and 34 deletions

View file

@ -10,7 +10,7 @@ using LoopVectorization: @turbo
using Random: Random
using ProgressMeter: @showprogress
using ..ReCo: ReCo, Particle, angle2
using ..ReCo: ReCo, Particle, angle2, center_of_mass
const INITIAL_REWARD = 0.0
@ -30,15 +30,17 @@ mutable struct Env <: AbstractEnv
reward::Float64
terminated::Bool
center_of_mass::SVector{2,Float64}
function Env(
min_distance::Float64,
max_distance::Float64;
n_v_actions::Int64=2,
n_ω_actions::Int64=3,
max_v::Float64=80.0,
n_v_actions::Int64=3,
n_ω_actions::Int64=5,
max_v::Float64=60.0,
max_ω::Float64=π / 2,
n_distance_states::Int64=2,
n_angle_states::Int64=2,
n_distance_states::Int64=3,
n_angle_states::Int64=4,
)
@assert min_distance > 0.0
@assert max_distance > min_distance
@ -127,6 +129,7 @@ mutable struct Env <: AbstractEnv
initial_state_ind,
INITIAL_REWARD,
false,
SVector(0.0, 0.0),
)
end
end
@ -168,6 +171,9 @@ struct Params{H<:AbstractHook}
min_sq_distances::Vector{Float64}
vecs_r⃗₁₂_to_min_distance_particle::Vector{SVector{2,Float64}}
half_box_len::Float64
max_elliptic_distance::Float64
function Params(
env::Env,
agent::Agent,
@ -175,7 +181,10 @@ struct Params{H<:AbstractHook}
n_steps_before_actions_update::Int64,
goal_shape_ratio::Float64,
n_particles::Int64,
half_box_len::Float64,
) where {H<:AbstractHook}
max_elliptic_distance = sqrt(2) * half_box_len
n_states = env.n_states
return new{H}(
@ -191,6 +200,8 @@ struct Params{H<:AbstractHook}
n_particles,
fill(Inf64, n_particles),
fill(SVector(0.0, 0.0), n_particles),
half_box_len,
max_elliptic_distance,
)
end
end
@ -225,11 +236,9 @@ function get_state_ind(state::S, state_space::Vector{S}) where {S<:SVector{2,Int
return findfirst(x -> x == state, state_space)
end
function state_update_hook(
rl_params::Params, particles::Vector{Particle}, n_particles::Int64
)
@turbo for i in 1:n_particles
rl_params.old_states_ind[i] = rl_params.states_ind[i]
function state_update_hook(rl_params::Params, particles::Vector{Particle})
@turbo for id in 1:(rl_params.n_particles)
rl_params.old_states_ind[id] = rl_params.states_ind[id]
end
env = rl_params.env
@ -240,10 +249,10 @@ function state_update_hook(
state_space = env.state_space
for i in 1:n_particles
for id in 1:(rl_params.n_particles)
env_distance_state::Union{Interval,Nothing} = nothing
min_sq_distance = rl_params.min_sq_distances[i]
min_sq_distance = rl_params.min_sq_distances[id]
min_distance = sqrt(min_sq_distance)
if !isinf(min_sq_distance)
@ -259,8 +268,8 @@ function state_update_hook(
state_ind = n_states
if !isnothing(env_distance_state)
r⃗₁₂ = rl_params.vecs_r⃗₁₂_to_min_distance_particle[i]
si, co = sincos(particles[i].φ)
r⃗₁₂ = rl_params.vecs_r⃗₁₂_to_min_distance_particle[id]
si, co = sincos(particles[id].φ)
#=
Angle between two vectors
@ -284,9 +293,11 @@ function state_update_hook(
state_ind = get_state_ind(state, state_space)
end
rl_params.states_ind[i] = state_ind
rl_params.states_ind[id] = state_ind
end
env.center_of_mass = center_of_mass(particles, rl_params.half_box_len)
return nothing
end
@ -315,15 +326,13 @@ function update_table_and_actions_hook(
env.state_ind = rl_params.states_ind[id]
# Update reward
env.reward = -(particle.c[1]^2 + particle.c[2]^2)
vec_to_center_of_mass = ReCo.minimum_image(
particle.c - env.center_of_mass, rl_params.half_box_len
)
#=
1 -
(
ReCo.gyration_tensor_eigvals_ratio(particles, half_box_len) -
rl_params.goal_shape_ratio
)^2
=#
env.reward =
-(vec_to_center_of_mass[1]^2 + vec_to_center_of_mass[2]^2) /
rl_params.max_elliptic_distance / rl_params.n_particles
# Post act
agent(POST_ACT_STAGE, env)
@ -336,6 +345,7 @@ function update_table_and_actions_hook(
rl_params.actions[id] = action
rl_params.actions_ind[id] = action_ind
return nothing
end
@ -354,14 +364,14 @@ function act_hook(
return nothing
end
function gen_agent(n_states::Int64, n_actions::Int64)
function gen_agent(n_states::Int64, n_actions::Int64, ϵ::Float64)
policy = QBasedPolicy(;
learner=MonteCarloLearner(;
approximator=TabularQApproximator(;
n_state=n_states, n_action=n_actions, opt=InvDecay(1.0)
),
),
explorer=EpsilonGreedyExplorer(0.1),
explorer=EpsilonGreedyExplorer(ϵ),
)
return Agent(; policy=policy, trajectory=VectorSARTTrajectory())
@ -369,42 +379,55 @@ end
function run_rl(;
goal_shape_ratio::Float64,
n_episodes::Int64=100,
n_episodes::Int64=200,
episode_duration::Float64=50.0,
update_actions_at::Float64=0.2,
update_actions_at::Float64=0.1,
n_particles::Int64=100,
seed::Int64=42,
ϵ::Float64=0.01,
parent_dir::String="",
)
@assert 0.0 <= goal_shape_ratio <= 1.0
@assert n_episodes > 0
@assert episode_duration > 0
@assert update_actions_at in 0.01:0.01:episode_duration
@assert update_actions_at in 0.001:0.001:episode_duration
@assert n_particles > 0
@assert 0.0 < ϵ < 1.0
# Setup
Random.seed!(seed)
sim_consts = ReCo.gen_sim_consts(n_particles, 0.0; skin_to_interaction_r_ratio=1.6)
sim_consts = ReCo.gen_sim_consts(
n_particles, 0.0; skin_to_interaction_r_ratio=1.8, packing_ratio=0.15
)
n_particles = sim_consts.n_particles
env = Env(sim_consts.particle_radius, sim_consts.skin_r)
agent = gen_agent(env.n_states, env.n_actions)
agent = gen_agent(env.n_states, env.n_actions, ϵ)
n_steps_before_actions_update = round(Int64, update_actions_at / sim_consts.δt)
hook = TotalRewardPerEpisode()
rl_params = Params(
env, agent, hook, n_steps_before_actions_update, goal_shape_ratio, n_particles
env,
agent,
hook,
n_steps_before_actions_update,
goal_shape_ratio,
n_particles,
sim_consts.half_box_len,
)
parent_dir = "RL" * parent_dir
# Pre experiment
hook(PRE_EXPERIMENT_STAGE, agent, env)
agent(PRE_EXPERIMENT_STAGE, env)
@showprogress 0.6 for episode in 1:n_episodes
dir = ReCo.init_sim_with_sim_consts(sim_consts; parent_dir="RL")
dir = ReCo.init_sim_with_sim_consts(sim_consts; parent_dir=parent_dir)
# Reset
reset!(env)
@ -423,6 +446,8 @@ function run_rl(;
# Post episode
hook(POST_EPISODE_STAGE, agent, env)
agent(POST_EPISODE_STAGE, env)
display(hook.rewards)
end
# Post experiment

View file

@ -64,7 +64,7 @@ function euler!(
end
end
state_update_hook(rl_params, args.particles, args.n_particles)
state_update_hook(rl_params, args.particles)
@simd for p in args.particles
si, co = sincos(p.φ)