mirror of
https://gitlab.rlp.net/mobitar/ReCo.jl.git
synced 2025-01-01 15:39:20 +00:00
Added shape reward term
This commit is contained in:
parent
9b233d53c4
commit
891af721c0
4 changed files with 98 additions and 27 deletions
|
@ -1,4 +1,4 @@
|
|||
name = "ReCo.jl"
|
||||
name = "ReCo"
|
||||
uuid = "b25f7548-fcc9-4c91-bc24-841b54f4dd54"
|
||||
authors = ["MoBit <mo8it@protonmail.com>"]
|
||||
version = "0.2.0"
|
||||
|
|
|
@ -40,7 +40,7 @@ struct EnvHelperSharedProps{H<:AbstractHook}
|
|||
end
|
||||
end
|
||||
|
||||
function gen_env_helper(::Env, env_helper_params::EnvHelperSharedProps)
|
||||
function gen_env_helper(::Env, env_helper_params::EnvHelperSharedProps; args)
|
||||
return method_not_implemented()
|
||||
end
|
||||
|
||||
|
|
|
@ -6,17 +6,15 @@ struct LocalCOMEnv <: Env
|
|||
distance_state_space::Vector{Interval}
|
||||
direction_angle_state_space::Vector{Interval}
|
||||
|
||||
max_distance::Float64
|
||||
|
||||
function LocalCOMEnv(
|
||||
sim_consts; n_distance_states::Int64=3, n_direction_angle_states::Int64=3
|
||||
function LocalCOMEnv(;
|
||||
n_distance_states::Int64=3, n_direction_angle_states::Int64=3, args
|
||||
)
|
||||
@assert n_direction_angle_states > 1
|
||||
|
||||
direction_angle_state_space = gen_angle_state_space(n_direction_angle_states)
|
||||
|
||||
min_distance = 0.0
|
||||
max_distance = sim_consts.skin_r
|
||||
max_distance = args.skin_r
|
||||
|
||||
distance_state_space = gen_distance_state_space(
|
||||
min_distance, max_distance, n_distance_states
|
||||
|
@ -37,29 +35,50 @@ struct LocalCOMEnv <: Env
|
|||
|
||||
shared = EnvSharedProps(n_states, state_space)
|
||||
|
||||
return new(shared, distance_state_space, direction_angle_state_space, max_distance)
|
||||
return new(shared, distance_state_space, direction_angle_state_space)
|
||||
end
|
||||
end
|
||||
|
||||
struct LocalCOMEnvHelper <: EnvHelper
|
||||
mutable struct LocalCOMEnvHelper <: EnvHelper
|
||||
shared::EnvHelperSharedProps
|
||||
|
||||
vec_to_neighbour_sums::Vector{SVector{2,Float64}}
|
||||
n_neighbours::Vector{Int64}
|
||||
sq_norm2d_vec_to_local_center_of_mass::Vector{Float64}
|
||||
|
||||
function LocalCOMEnvHelper(shared::EnvHelperSharedProps)
|
||||
distances_to_local_center_of_mass::Vector{Float64}
|
||||
max_distance_to_local_center_of_mass::Float64
|
||||
|
||||
add_shape_reward_term::Bool
|
||||
|
||||
gyration_tensor_eigvec_to_smaller_eigval::SVector{2,Float64}
|
||||
gyration_tensor_eigvec_to_bigger_eigval::SVector{2,Float64}
|
||||
|
||||
half_box_len::Float64
|
||||
max_elliptical_distance::Float64
|
||||
|
||||
function LocalCOMEnvHelper(shared::EnvHelperSharedProps, half_box_len::Float64, skin_r)
|
||||
max_elliptical_distance =
|
||||
sqrt(2) * half_box_len / shared.goal_gyration_tensor_eigvals_ratio
|
||||
|
||||
max_distance_to_local_center_of_mass = skin_r
|
||||
|
||||
return new(
|
||||
shared,
|
||||
fill(SVector(0.0, 0.0), shared.n_particles),
|
||||
fill(0, shared.n_particles),
|
||||
zeros(shared.n_particles),
|
||||
max_distance_to_local_center_of_mass,
|
||||
false,
|
||||
SVector(0.0, 0.0),
|
||||
SVector(0.0, 0.0),
|
||||
half_box_len,
|
||||
max_elliptical_distance,
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
function gen_env_helper(::LocalCOMEnv, env_helper_params::EnvHelperSharedProps)
|
||||
return LocalCOMEnvHelper(env_helper_params)
|
||||
function gen_env_helper(::LocalCOMEnv, env_helper_shared::EnvHelperSharedProps; args)
|
||||
return LocalCOMEnvHelper(env_helper_shared, args.half_box_len, args.skin_r)
|
||||
end
|
||||
|
||||
function pre_integration_hook(env_helper::LocalCOMEnvHelper)
|
||||
|
@ -86,26 +105,31 @@ end
|
|||
function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Particle})
|
||||
n_particles = env_helper.shared.n_particles
|
||||
|
||||
@turbo for id in 1:(n_particles)
|
||||
@turbo for id in 1:n_particles
|
||||
env_helper.shared.old_states_ind[id] = env_helper.shared.states_ind[id]
|
||||
end
|
||||
|
||||
env = env_helper.shared.env
|
||||
|
||||
distance_to_local_center_of_mass_sum = 0.0
|
||||
|
||||
for id in 1:n_particles
|
||||
n_neighbours = env_helper.n_neighbours[id]
|
||||
|
||||
if n_neighbours == 0
|
||||
state_ind = env.shared.n_states
|
||||
|
||||
distance_to_local_center_of_mass_sum +=
|
||||
env_helper.max_distance_to_local_center_of_mass
|
||||
else
|
||||
vec_to_local_center_of_mass =
|
||||
env_helper.vec_to_neighbour_sums[id] / n_neighbours
|
||||
|
||||
sq_norm2d_vec_to_local_center_of_mass = sq_norm2d(vec_to_local_center_of_mass)
|
||||
env_helper.sq_norm2d_vec_to_local_center_of_mass[id] =
|
||||
sq_norm2d_vec_to_local_center_of_mass
|
||||
distance = norm2d(vec_to_local_center_of_mass)
|
||||
|
||||
distance = sqrt(sq_norm2d_vec_to_local_center_of_mass)
|
||||
env_helper.distances_to_local_center_of_mass[id] = distance
|
||||
|
||||
distance_to_local_center_of_mass_sum += distance
|
||||
|
||||
distance_state = find_state_interval(distance, env.distance_state_space)
|
||||
|
||||
|
@ -124,20 +148,64 @@ function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Part
|
|||
env_helper.shared.states_ind[id] = state_ind
|
||||
end
|
||||
|
||||
mean_distance_to_local_center_of_mass =
|
||||
distance_to_local_center_of_mass_sum / n_particles
|
||||
env_helper.add_shape_reward_term =
|
||||
mean_distance_to_local_center_of_mass /
|
||||
env_helper.max_distance_to_local_center_of_mass < 0.32
|
||||
|
||||
if env_helper.add_shape_reward_term
|
||||
#println(mean_distance_to_local_center_of_mass / env_helper.max_distance_to_local_center_of_mass) # TODO: Remove
|
||||
end
|
||||
|
||||
v1, v2 = Shape.gyration_tensor_eigvecs(particles, env_helper.half_box_len)
|
||||
|
||||
env_helper.gyration_tensor_eigvec_to_smaller_eigval = v1
|
||||
env_helper.gyration_tensor_eigvec_to_bigger_eigval = v2
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
minimizing_reward(value::Float64, max_value::Float64)
|
||||
|
||||
Returns the reward such that it is 0 for value=max_value and 1 for value=0.
|
||||
"""
|
||||
function minimizing_reward(value::Float64, max_value::Float64)
|
||||
return (max_value - value) / (max_value + value)
|
||||
end
|
||||
|
||||
function update_reward!(env::LocalCOMEnv, env_helper::LocalCOMEnvHelper, particle::Particle)
|
||||
id = particle.id
|
||||
|
||||
normalization = (env.max_distance * env_helper.shared.n_particles)
|
||||
|
||||
normalization = env_helper.shared.n_particles # TODO: Add factor from steps
|
||||
n_neighbours = env_helper.n_neighbours[id]
|
||||
|
||||
if n_neighbours == 0
|
||||
env.shared.reward = -(env.max_distance^2) / normalization
|
||||
env.shared.reward = 0.0
|
||||
else
|
||||
env.shared.reward =
|
||||
-(env_helper.sq_norm2d_vec_to_local_center_of_mass[id]) / normalization # TODO: Add shape term
|
||||
reward = minimizing_reward(
|
||||
env_helper.distances_to_local_center_of_mass[id],
|
||||
env_helper.max_distance_to_local_center_of_mass,
|
||||
)
|
||||
|
||||
if env_helper.add_shape_reward_term
|
||||
elliptical_distance = ReCo.elliptical_distance(
|
||||
particle,
|
||||
env_helper.gyration_tensor_eigvec_to_smaller_eigval,
|
||||
env_helper.gyration_tensor_eigvec_to_bigger_eigval,
|
||||
env_helper.shared.goal_gyration_tensor_eigvals_ratio,
|
||||
)
|
||||
|
||||
reward += unnormalized_reward(
|
||||
elliptical_distance,
|
||||
env_helper.max_elliptical_distance, # TODO: Fix sq
|
||||
)
|
||||
|
||||
# println(elliptical_distance / env_helper.max_elliptical_distance) # TODO: Remove
|
||||
end
|
||||
|
||||
env.shared.reward = reward / normalization
|
||||
end
|
||||
|
||||
return nothing
|
||||
|
|
11
src/RL/RL.jl
11
src/RL/RL.jl
|
@ -17,7 +17,6 @@ using ..ReCo:
|
|||
Particle,
|
||||
angle2,
|
||||
norm2d,
|
||||
sq_norm2d,
|
||||
Shape,
|
||||
DEFAULT_SKIN_TO_INTERACTION_R_RATIO,
|
||||
method_not_implemented
|
||||
|
@ -89,7 +88,9 @@ function run_rl(;
|
|||
)
|
||||
n_particles = sim_consts.n_particles # Not always equal to the input!
|
||||
|
||||
env = EnvType(sim_consts)
|
||||
env_args = (skin_r=sim_consts.skin_r,)
|
||||
|
||||
env = EnvType(; args=env_args)
|
||||
|
||||
agent = gen_agent(env.shared.n_states, env.shared.n_actions, ϵ_stable)
|
||||
|
||||
|
@ -97,7 +98,7 @@ function run_rl(;
|
|||
|
||||
hook = TotalRewardPerEpisode()
|
||||
|
||||
env_helper_params = EnvHelperSharedProps(
|
||||
env_helper_shared = EnvHelperSharedProps(
|
||||
env,
|
||||
agent,
|
||||
hook,
|
||||
|
@ -106,7 +107,9 @@ function run_rl(;
|
|||
n_particles,
|
||||
)
|
||||
|
||||
env_helper = gen_env_helper(env, env_helper_params)
|
||||
env_helper_args = (half_box_len=sim_consts.half_box_len, skin_r=sim_consts.skin_r)
|
||||
|
||||
env_helper = gen_env_helper(env, env_helper_shared; args=env_helper_args)
|
||||
|
||||
parent_dir = "RL_" * parent_dir_appendix
|
||||
|
||||
|
|
Loading…
Reference in a new issue