mirror of
https://gitlab.rlp.net/mobitar/ReCo.jl.git
synced 2024-11-08 22:21:08 +00:00
Added shape reward term
This commit is contained in:
parent
9b233d53c4
commit
891af721c0
4 changed files with 98 additions and 27 deletions
|
@ -1,4 +1,4 @@
|
||||||
name = "ReCo.jl"
|
name = "ReCo"
|
||||||
uuid = "b25f7548-fcc9-4c91-bc24-841b54f4dd54"
|
uuid = "b25f7548-fcc9-4c91-bc24-841b54f4dd54"
|
||||||
authors = ["MoBit <mo8it@protonmail.com>"]
|
authors = ["MoBit <mo8it@protonmail.com>"]
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
|
|
|
@ -40,7 +40,7 @@ struct EnvHelperSharedProps{H<:AbstractHook}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
function gen_env_helper(::Env, env_helper_params::EnvHelperSharedProps)
|
function gen_env_helper(::Env, env_helper_params::EnvHelperSharedProps; args)
|
||||||
return method_not_implemented()
|
return method_not_implemented()
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -6,17 +6,15 @@ struct LocalCOMEnv <: Env
|
||||||
distance_state_space::Vector{Interval}
|
distance_state_space::Vector{Interval}
|
||||||
direction_angle_state_space::Vector{Interval}
|
direction_angle_state_space::Vector{Interval}
|
||||||
|
|
||||||
max_distance::Float64
|
function LocalCOMEnv(;
|
||||||
|
n_distance_states::Int64=3, n_direction_angle_states::Int64=3, args
|
||||||
function LocalCOMEnv(
|
|
||||||
sim_consts; n_distance_states::Int64=3, n_direction_angle_states::Int64=3
|
|
||||||
)
|
)
|
||||||
@assert n_direction_angle_states > 1
|
@assert n_direction_angle_states > 1
|
||||||
|
|
||||||
direction_angle_state_space = gen_angle_state_space(n_direction_angle_states)
|
direction_angle_state_space = gen_angle_state_space(n_direction_angle_states)
|
||||||
|
|
||||||
min_distance = 0.0
|
min_distance = 0.0
|
||||||
max_distance = sim_consts.skin_r
|
max_distance = args.skin_r
|
||||||
|
|
||||||
distance_state_space = gen_distance_state_space(
|
distance_state_space = gen_distance_state_space(
|
||||||
min_distance, max_distance, n_distance_states
|
min_distance, max_distance, n_distance_states
|
||||||
|
@ -37,29 +35,50 @@ struct LocalCOMEnv <: Env
|
||||||
|
|
||||||
shared = EnvSharedProps(n_states, state_space)
|
shared = EnvSharedProps(n_states, state_space)
|
||||||
|
|
||||||
return new(shared, distance_state_space, direction_angle_state_space, max_distance)
|
return new(shared, distance_state_space, direction_angle_state_space)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
struct LocalCOMEnvHelper <: EnvHelper
|
mutable struct LocalCOMEnvHelper <: EnvHelper
|
||||||
shared::EnvHelperSharedProps
|
shared::EnvHelperSharedProps
|
||||||
|
|
||||||
vec_to_neighbour_sums::Vector{SVector{2,Float64}}
|
vec_to_neighbour_sums::Vector{SVector{2,Float64}}
|
||||||
n_neighbours::Vector{Int64}
|
n_neighbours::Vector{Int64}
|
||||||
sq_norm2d_vec_to_local_center_of_mass::Vector{Float64}
|
|
||||||
|
|
||||||
function LocalCOMEnvHelper(shared::EnvHelperSharedProps)
|
distances_to_local_center_of_mass::Vector{Float64}
|
||||||
|
max_distance_to_local_center_of_mass::Float64
|
||||||
|
|
||||||
|
add_shape_reward_term::Bool
|
||||||
|
|
||||||
|
gyration_tensor_eigvec_to_smaller_eigval::SVector{2,Float64}
|
||||||
|
gyration_tensor_eigvec_to_bigger_eigval::SVector{2,Float64}
|
||||||
|
|
||||||
|
half_box_len::Float64
|
||||||
|
max_elliptical_distance::Float64
|
||||||
|
|
||||||
|
function LocalCOMEnvHelper(shared::EnvHelperSharedProps, half_box_len::Float64, skin_r)
|
||||||
|
max_elliptical_distance =
|
||||||
|
sqrt(2) * half_box_len / shared.goal_gyration_tensor_eigvals_ratio
|
||||||
|
|
||||||
|
max_distance_to_local_center_of_mass = skin_r
|
||||||
|
|
||||||
return new(
|
return new(
|
||||||
shared,
|
shared,
|
||||||
fill(SVector(0.0, 0.0), shared.n_particles),
|
fill(SVector(0.0, 0.0), shared.n_particles),
|
||||||
fill(0, shared.n_particles),
|
fill(0, shared.n_particles),
|
||||||
zeros(shared.n_particles),
|
zeros(shared.n_particles),
|
||||||
|
max_distance_to_local_center_of_mass,
|
||||||
|
false,
|
||||||
|
SVector(0.0, 0.0),
|
||||||
|
SVector(0.0, 0.0),
|
||||||
|
half_box_len,
|
||||||
|
max_elliptical_distance,
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
function gen_env_helper(::LocalCOMEnv, env_helper_params::EnvHelperSharedProps)
|
function gen_env_helper(::LocalCOMEnv, env_helper_shared::EnvHelperSharedProps; args)
|
||||||
return LocalCOMEnvHelper(env_helper_params)
|
return LocalCOMEnvHelper(env_helper_shared, args.half_box_len, args.skin_r)
|
||||||
end
|
end
|
||||||
|
|
||||||
function pre_integration_hook(env_helper::LocalCOMEnvHelper)
|
function pre_integration_hook(env_helper::LocalCOMEnvHelper)
|
||||||
|
@ -86,26 +105,31 @@ end
|
||||||
function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Particle})
|
function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Particle})
|
||||||
n_particles = env_helper.shared.n_particles
|
n_particles = env_helper.shared.n_particles
|
||||||
|
|
||||||
@turbo for id in 1:(n_particles)
|
@turbo for id in 1:n_particles
|
||||||
env_helper.shared.old_states_ind[id] = env_helper.shared.states_ind[id]
|
env_helper.shared.old_states_ind[id] = env_helper.shared.states_ind[id]
|
||||||
end
|
end
|
||||||
|
|
||||||
env = env_helper.shared.env
|
env = env_helper.shared.env
|
||||||
|
|
||||||
|
distance_to_local_center_of_mass_sum = 0.0
|
||||||
|
|
||||||
for id in 1:n_particles
|
for id in 1:n_particles
|
||||||
n_neighbours = env_helper.n_neighbours[id]
|
n_neighbours = env_helper.n_neighbours[id]
|
||||||
|
|
||||||
if n_neighbours == 0
|
if n_neighbours == 0
|
||||||
state_ind = env.shared.n_states
|
state_ind = env.shared.n_states
|
||||||
|
|
||||||
|
distance_to_local_center_of_mass_sum +=
|
||||||
|
env_helper.max_distance_to_local_center_of_mass
|
||||||
else
|
else
|
||||||
vec_to_local_center_of_mass =
|
vec_to_local_center_of_mass =
|
||||||
env_helper.vec_to_neighbour_sums[id] / n_neighbours
|
env_helper.vec_to_neighbour_sums[id] / n_neighbours
|
||||||
|
|
||||||
sq_norm2d_vec_to_local_center_of_mass = sq_norm2d(vec_to_local_center_of_mass)
|
distance = norm2d(vec_to_local_center_of_mass)
|
||||||
env_helper.sq_norm2d_vec_to_local_center_of_mass[id] =
|
|
||||||
sq_norm2d_vec_to_local_center_of_mass
|
|
||||||
|
|
||||||
distance = sqrt(sq_norm2d_vec_to_local_center_of_mass)
|
env_helper.distances_to_local_center_of_mass[id] = distance
|
||||||
|
|
||||||
|
distance_to_local_center_of_mass_sum += distance
|
||||||
|
|
||||||
distance_state = find_state_interval(distance, env.distance_state_space)
|
distance_state = find_state_interval(distance, env.distance_state_space)
|
||||||
|
|
||||||
|
@ -124,20 +148,64 @@ function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Part
|
||||||
env_helper.shared.states_ind[id] = state_ind
|
env_helper.shared.states_ind[id] = state_ind
|
||||||
end
|
end
|
||||||
|
|
||||||
|
mean_distance_to_local_center_of_mass =
|
||||||
|
distance_to_local_center_of_mass_sum / n_particles
|
||||||
|
env_helper.add_shape_reward_term =
|
||||||
|
mean_distance_to_local_center_of_mass /
|
||||||
|
env_helper.max_distance_to_local_center_of_mass < 0.32
|
||||||
|
|
||||||
|
if env_helper.add_shape_reward_term
|
||||||
|
#println(mean_distance_to_local_center_of_mass / env_helper.max_distance_to_local_center_of_mass) # TODO: Remove
|
||||||
|
end
|
||||||
|
|
||||||
|
v1, v2 = Shape.gyration_tensor_eigvecs(particles, env_helper.half_box_len)
|
||||||
|
|
||||||
|
env_helper.gyration_tensor_eigvec_to_smaller_eigval = v1
|
||||||
|
env_helper.gyration_tensor_eigvec_to_bigger_eigval = v2
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
minimizing_reward(value::Float64, max_value::Float64)
|
||||||
|
|
||||||
|
Returns the reward such that it is 0 for value=max_value and 1 for value=0.
|
||||||
|
"""
|
||||||
|
function minimizing_reward(value::Float64, max_value::Float64)
|
||||||
|
return (max_value - value) / (max_value + value)
|
||||||
|
end
|
||||||
|
|
||||||
function update_reward!(env::LocalCOMEnv, env_helper::LocalCOMEnvHelper, particle::Particle)
|
function update_reward!(env::LocalCOMEnv, env_helper::LocalCOMEnvHelper, particle::Particle)
|
||||||
id = particle.id
|
id = particle.id
|
||||||
|
|
||||||
normalization = (env.max_distance * env_helper.shared.n_particles)
|
normalization = env_helper.shared.n_particles # TODO: Add factor from steps
|
||||||
|
|
||||||
n_neighbours = env_helper.n_neighbours[id]
|
n_neighbours = env_helper.n_neighbours[id]
|
||||||
|
|
||||||
if n_neighbours == 0
|
if n_neighbours == 0
|
||||||
env.shared.reward = -(env.max_distance^2) / normalization
|
env.shared.reward = 0.0
|
||||||
else
|
else
|
||||||
env.shared.reward =
|
reward = minimizing_reward(
|
||||||
-(env_helper.sq_norm2d_vec_to_local_center_of_mass[id]) / normalization # TODO: Add shape term
|
env_helper.distances_to_local_center_of_mass[id],
|
||||||
|
env_helper.max_distance_to_local_center_of_mass,
|
||||||
|
)
|
||||||
|
|
||||||
|
if env_helper.add_shape_reward_term
|
||||||
|
elliptical_distance = ReCo.elliptical_distance(
|
||||||
|
particle,
|
||||||
|
env_helper.gyration_tensor_eigvec_to_smaller_eigval,
|
||||||
|
env_helper.gyration_tensor_eigvec_to_bigger_eigval,
|
||||||
|
env_helper.shared.goal_gyration_tensor_eigvals_ratio,
|
||||||
|
)
|
||||||
|
|
||||||
|
reward += unnormalized_reward(
|
||||||
|
elliptical_distance,
|
||||||
|
env_helper.max_elliptical_distance, # TODO: Fix sq
|
||||||
|
)
|
||||||
|
|
||||||
|
# println(elliptical_distance / env_helper.max_elliptical_distance) # TODO: Remove
|
||||||
|
end
|
||||||
|
|
||||||
|
env.shared.reward = reward / normalization
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
|
|
11
src/RL/RL.jl
11
src/RL/RL.jl
|
@ -17,7 +17,6 @@ using ..ReCo:
|
||||||
Particle,
|
Particle,
|
||||||
angle2,
|
angle2,
|
||||||
norm2d,
|
norm2d,
|
||||||
sq_norm2d,
|
|
||||||
Shape,
|
Shape,
|
||||||
DEFAULT_SKIN_TO_INTERACTION_R_RATIO,
|
DEFAULT_SKIN_TO_INTERACTION_R_RATIO,
|
||||||
method_not_implemented
|
method_not_implemented
|
||||||
|
@ -89,7 +88,9 @@ function run_rl(;
|
||||||
)
|
)
|
||||||
n_particles = sim_consts.n_particles # Not always equal to the input!
|
n_particles = sim_consts.n_particles # Not always equal to the input!
|
||||||
|
|
||||||
env = EnvType(sim_consts)
|
env_args = (skin_r=sim_consts.skin_r,)
|
||||||
|
|
||||||
|
env = EnvType(; args=env_args)
|
||||||
|
|
||||||
agent = gen_agent(env.shared.n_states, env.shared.n_actions, ϵ_stable)
|
agent = gen_agent(env.shared.n_states, env.shared.n_actions, ϵ_stable)
|
||||||
|
|
||||||
|
@ -97,7 +98,7 @@ function run_rl(;
|
||||||
|
|
||||||
hook = TotalRewardPerEpisode()
|
hook = TotalRewardPerEpisode()
|
||||||
|
|
||||||
env_helper_params = EnvHelperSharedProps(
|
env_helper_shared = EnvHelperSharedProps(
|
||||||
env,
|
env,
|
||||||
agent,
|
agent,
|
||||||
hook,
|
hook,
|
||||||
|
@ -106,7 +107,9 @@ function run_rl(;
|
||||||
n_particles,
|
n_particles,
|
||||||
)
|
)
|
||||||
|
|
||||||
env_helper = gen_env_helper(env, env_helper_params)
|
env_helper_args = (half_box_len=sim_consts.half_box_len, skin_r=sim_consts.skin_r)
|
||||||
|
|
||||||
|
env_helper = gen_env_helper(env, env_helper_shared; args=env_helper_args)
|
||||||
|
|
||||||
parent_dir = "RL_" * parent_dir_appendix
|
parent_dir = "RL_" * parent_dir_appendix
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue