1
0
Fork 0
mirror of https://gitlab.rlp.net/mobitar/ReCo.jl.git synced 2025-01-03 15:49:20 +00:00

Added shape reward term

This commit is contained in:
Mo8it 2022-01-14 12:28:47 +01:00
parent 9b233d53c4
commit 891af721c0
4 changed files with 98 additions and 27 deletions

View file

@ -1,4 +1,4 @@
name = "ReCo.jl" name = "ReCo"
uuid = "b25f7548-fcc9-4c91-bc24-841b54f4dd54" uuid = "b25f7548-fcc9-4c91-bc24-841b54f4dd54"
authors = ["MoBit <mo8it@protonmail.com>"] authors = ["MoBit <mo8it@protonmail.com>"]
version = "0.2.0" version = "0.2.0"

View file

@ -40,7 +40,7 @@ struct EnvHelperSharedProps{H<:AbstractHook}
end end
end end
function gen_env_helper(::Env, env_helper_params::EnvHelperSharedProps) function gen_env_helper(::Env, env_helper_params::EnvHelperSharedProps; args)
return method_not_implemented() return method_not_implemented()
end end

View file

@ -6,17 +6,15 @@ struct LocalCOMEnv <: Env
distance_state_space::Vector{Interval} distance_state_space::Vector{Interval}
direction_angle_state_space::Vector{Interval} direction_angle_state_space::Vector{Interval}
max_distance::Float64 function LocalCOMEnv(;
n_distance_states::Int64=3, n_direction_angle_states::Int64=3, args
function LocalCOMEnv(
sim_consts; n_distance_states::Int64=3, n_direction_angle_states::Int64=3
) )
@assert n_direction_angle_states > 1 @assert n_direction_angle_states > 1
direction_angle_state_space = gen_angle_state_space(n_direction_angle_states) direction_angle_state_space = gen_angle_state_space(n_direction_angle_states)
min_distance = 0.0 min_distance = 0.0
max_distance = sim_consts.skin_r max_distance = args.skin_r
distance_state_space = gen_distance_state_space( distance_state_space = gen_distance_state_space(
min_distance, max_distance, n_distance_states min_distance, max_distance, n_distance_states
@ -37,29 +35,50 @@ struct LocalCOMEnv <: Env
shared = EnvSharedProps(n_states, state_space) shared = EnvSharedProps(n_states, state_space)
return new(shared, distance_state_space, direction_angle_state_space, max_distance) return new(shared, distance_state_space, direction_angle_state_space)
end end
end end
struct LocalCOMEnvHelper <: EnvHelper mutable struct LocalCOMEnvHelper <: EnvHelper
shared::EnvHelperSharedProps shared::EnvHelperSharedProps
vec_to_neighbour_sums::Vector{SVector{2,Float64}} vec_to_neighbour_sums::Vector{SVector{2,Float64}}
n_neighbours::Vector{Int64} n_neighbours::Vector{Int64}
sq_norm2d_vec_to_local_center_of_mass::Vector{Float64}
function LocalCOMEnvHelper(shared::EnvHelperSharedProps) distances_to_local_center_of_mass::Vector{Float64}
max_distance_to_local_center_of_mass::Float64
add_shape_reward_term::Bool
gyration_tensor_eigvec_to_smaller_eigval::SVector{2,Float64}
gyration_tensor_eigvec_to_bigger_eigval::SVector{2,Float64}
half_box_len::Float64
max_elliptical_distance::Float64
function LocalCOMEnvHelper(shared::EnvHelperSharedProps, half_box_len::Float64, skin_r)
max_elliptical_distance =
sqrt(2) * half_box_len / shared.goal_gyration_tensor_eigvals_ratio
max_distance_to_local_center_of_mass = skin_r
return new( return new(
shared, shared,
fill(SVector(0.0, 0.0), shared.n_particles), fill(SVector(0.0, 0.0), shared.n_particles),
fill(0, shared.n_particles), fill(0, shared.n_particles),
zeros(shared.n_particles), zeros(shared.n_particles),
max_distance_to_local_center_of_mass,
false,
SVector(0.0, 0.0),
SVector(0.0, 0.0),
half_box_len,
max_elliptical_distance,
) )
end end
end end
function gen_env_helper(::LocalCOMEnv, env_helper_params::EnvHelperSharedProps) function gen_env_helper(::LocalCOMEnv, env_helper_shared::EnvHelperSharedProps; args)
return LocalCOMEnvHelper(env_helper_params) return LocalCOMEnvHelper(env_helper_shared, args.half_box_len, args.skin_r)
end end
function pre_integration_hook(env_helper::LocalCOMEnvHelper) function pre_integration_hook(env_helper::LocalCOMEnvHelper)
@ -86,26 +105,31 @@ end
function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Particle}) function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Particle})
n_particles = env_helper.shared.n_particles n_particles = env_helper.shared.n_particles
@turbo for id in 1:(n_particles) @turbo for id in 1:n_particles
env_helper.shared.old_states_ind[id] = env_helper.shared.states_ind[id] env_helper.shared.old_states_ind[id] = env_helper.shared.states_ind[id]
end end
env = env_helper.shared.env env = env_helper.shared.env
distance_to_local_center_of_mass_sum = 0.0
for id in 1:n_particles for id in 1:n_particles
n_neighbours = env_helper.n_neighbours[id] n_neighbours = env_helper.n_neighbours[id]
if n_neighbours == 0 if n_neighbours == 0
state_ind = env.shared.n_states state_ind = env.shared.n_states
distance_to_local_center_of_mass_sum +=
env_helper.max_distance_to_local_center_of_mass
else else
vec_to_local_center_of_mass = vec_to_local_center_of_mass =
env_helper.vec_to_neighbour_sums[id] / n_neighbours env_helper.vec_to_neighbour_sums[id] / n_neighbours
sq_norm2d_vec_to_local_center_of_mass = sq_norm2d(vec_to_local_center_of_mass) distance = norm2d(vec_to_local_center_of_mass)
env_helper.sq_norm2d_vec_to_local_center_of_mass[id] =
sq_norm2d_vec_to_local_center_of_mass
distance = sqrt(sq_norm2d_vec_to_local_center_of_mass) env_helper.distances_to_local_center_of_mass[id] = distance
distance_to_local_center_of_mass_sum += distance
distance_state = find_state_interval(distance, env.distance_state_space) distance_state = find_state_interval(distance, env.distance_state_space)
@ -124,20 +148,64 @@ function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Part
env_helper.shared.states_ind[id] = state_ind env_helper.shared.states_ind[id] = state_ind
end end
mean_distance_to_local_center_of_mass =
distance_to_local_center_of_mass_sum / n_particles
env_helper.add_shape_reward_term =
mean_distance_to_local_center_of_mass /
env_helper.max_distance_to_local_center_of_mass < 0.32
if env_helper.add_shape_reward_term
#println(mean_distance_to_local_center_of_mass / env_helper.max_distance_to_local_center_of_mass) # TODO: Remove
end
v1, v2 = Shape.gyration_tensor_eigvecs(particles, env_helper.half_box_len)
env_helper.gyration_tensor_eigvec_to_smaller_eigval = v1
env_helper.gyration_tensor_eigvec_to_bigger_eigval = v2
return nothing return nothing
end end
"""
minimizing_reward(value::Float64, max_value::Float64)
Returns the reward such that it is 0 for value=max_value and 1 for value=0.
"""
function minimizing_reward(value::Float64, max_value::Float64)
return (max_value - value) / (max_value + value)
end
function update_reward!(env::LocalCOMEnv, env_helper::LocalCOMEnvHelper, particle::Particle) function update_reward!(env::LocalCOMEnv, env_helper::LocalCOMEnvHelper, particle::Particle)
id = particle.id id = particle.id
normalization = (env.max_distance * env_helper.shared.n_particles) normalization = env_helper.shared.n_particles # TODO: Add factor from steps
n_neighbours = env_helper.n_neighbours[id] n_neighbours = env_helper.n_neighbours[id]
if n_neighbours == 0 if n_neighbours == 0
env.shared.reward = -(env.max_distance^2) / normalization env.shared.reward = 0.0
else else
env.shared.reward = reward = minimizing_reward(
-(env_helper.sq_norm2d_vec_to_local_center_of_mass[id]) / normalization # TODO: Add shape term env_helper.distances_to_local_center_of_mass[id],
env_helper.max_distance_to_local_center_of_mass,
)
if env_helper.add_shape_reward_term
elliptical_distance = ReCo.elliptical_distance(
particle,
env_helper.gyration_tensor_eigvec_to_smaller_eigval,
env_helper.gyration_tensor_eigvec_to_bigger_eigval,
env_helper.shared.goal_gyration_tensor_eigvals_ratio,
)
reward += unnormalized_reward(
elliptical_distance,
env_helper.max_elliptical_distance, # TODO: Fix sq
)
# println(elliptical_distance / env_helper.max_elliptical_distance) # TODO: Remove
end
env.shared.reward = reward / normalization
end end
return nothing return nothing

View file

@ -17,7 +17,6 @@ using ..ReCo:
Particle, Particle,
angle2, angle2,
norm2d, norm2d,
sq_norm2d,
Shape, Shape,
DEFAULT_SKIN_TO_INTERACTION_R_RATIO, DEFAULT_SKIN_TO_INTERACTION_R_RATIO,
method_not_implemented method_not_implemented
@ -89,7 +88,9 @@ function run_rl(;
) )
n_particles = sim_consts.n_particles # Not always equal to the input! n_particles = sim_consts.n_particles # Not always equal to the input!
env = EnvType(sim_consts) env_args = (skin_r=sim_consts.skin_r,)
env = EnvType(; args=env_args)
agent = gen_agent(env.shared.n_states, env.shared.n_actions, ϵ_stable) agent = gen_agent(env.shared.n_states, env.shared.n_actions, ϵ_stable)
@ -97,7 +98,7 @@ function run_rl(;
hook = TotalRewardPerEpisode() hook = TotalRewardPerEpisode()
env_helper_params = EnvHelperSharedProps( env_helper_shared = EnvHelperSharedProps(
env, env,
agent, agent,
hook, hook,
@ -106,7 +107,9 @@ function run_rl(;
n_particles, n_particles,
) )
env_helper = gen_env_helper(env, env_helper_params) env_helper_args = (half_box_len=sim_consts.half_box_len, skin_r=sim_consts.skin_r)
env_helper = gen_env_helper(env, env_helper_shared; args=env_helper_args)
parent_dir = "RL_" * parent_dir_appendix parent_dir = "RL_" * parent_dir_appendix