1
0
Fork 0
mirror of https://gitlab.rlp.net/mobitar/ReCo.jl.git synced 2025-03-15 20:18:19 +00:00
ReCo.jl/src/RL/Envs/LocalCOMWithAdditionalShapeRewardEnv.jl

223 lines
6.8 KiB
Julia
Raw Normal View History

"""
This environment corresponds to the local center of mass strategy with an additional shape reward term.
The minimization variable of the additional reward term is the individual elliptical distance of a particle.
"""
2022-01-29 16:45:36 +01:00
using ..ReCo: ReCo
2022-01-14 13:01:14 +01:00
2022-05-01 18:13:35 +02:00
const DEFAULT_TRIGGER = 0.35
2022-05-01 17:10:15 +02:00
2022-01-29 15:48:13 +01:00
struct LocalCOMWithAdditionalShapeRewardEnv <: Env
2022-01-11 18:39:38 +01:00
shared::EnvSharedProps
distance_state_space::Vector{Interval}
direction_angle_state_space::Vector{Interval}
2022-05-01 18:13:35 +02:00
function LocalCOMWithAdditionalShapeRewardEnv(
args; n_distance_states::Int64=3, n_direction_angle_states::Int64=3
)
2022-01-15 21:27:15 +01:00
@assert n_distance_states > 1
@assert n_direction_angle_states > 1
direction_angle_state_space = gen_angle_state_space(n_direction_angle_states)
min_distance = 0.0
2022-01-29 14:32:04 +01:00
max_distance = args.skin_radius
distance_state_space = gen_distance_state_space(
min_distance, max_distance, n_distance_states
)
n_states = n_distance_states * n_direction_angle_states + 1
# Last state is when no particle is in the skin radius
2022-01-31 01:47:48 +01:00
state_spaces_labels = gen_state_spaces_labels(
("d", "\\theta"), (distance_state_space, direction_angle_state_space)
)
shared = EnvSharedProps(
n_states, (n_distance_states, n_direction_angle_states), state_spaces_labels
)
2022-01-14 12:28:47 +01:00
return new(shared, distance_state_space, direction_angle_state_space)
end
end
2022-01-29 15:48:13 +01:00
mutable struct LocalCOMWithAdditionalShapeRewardEnvHelper <: EnvHelper
2022-01-11 18:39:38 +01:00
shared::EnvHelperSharedProps
2022-02-08 21:20:18 +01:00
vec_to_neighbor_sums::Vector{SVector{2,Float64}}
n_neighbors::Vector{Int64}
2022-01-14 12:28:47 +01:00
distances_to_local_center_of_mass::Vector{Float64}
max_distance_to_local_center_of_mass::Float64
add_shape_reward_term::Bool
2022-01-14 13:01:14 +01:00
center_of_mass::SVector{2,Float64}
2022-01-14 12:28:47 +01:00
gyration_tensor_eigvec_to_smaller_eigval::SVector{2,Float64}
gyration_tensor_eigvec_to_bigger_eigval::SVector{2,Float64}
half_box_len::Float64
max_elliptical_distance::Float64
2022-05-01 18:13:35 +02:00
trigger::Float64
2022-01-29 15:48:13 +01:00
function LocalCOMWithAdditionalShapeRewardEnvHelper(
2022-05-01 18:13:35 +02:00
shared::EnvHelperSharedProps;
half_box_len::Float64,
skin_radius::Float64,
trigger::Float64=DEFAULT_TRIGGER,
2022-01-29 14:32:04 +01:00
)
2022-01-31 18:53:58 +01:00
max_elliptical_distance = sqrt(
2022-02-01 22:57:56 +01:00
half_box_len^2 + (half_box_len / shared.elliptical_b_a_ratio)^2
2022-01-31 18:53:58 +01:00
)
2022-01-14 12:28:47 +01:00
2022-01-29 14:32:04 +01:00
max_distance_to_local_center_of_mass = skin_radius
2022-01-14 12:28:47 +01:00
return new(
2022-01-11 18:39:38 +01:00
shared,
fill(SVector(0.0, 0.0), shared.n_particles),
fill(0, shared.n_particles),
2022-01-31 17:14:24 +01:00
zeros(Float64, shared.n_particles),
2022-01-14 12:28:47 +01:00
max_distance_to_local_center_of_mass,
false,
SVector(0.0, 0.0),
SVector(0.0, 0.0),
2022-01-14 13:01:14 +01:00
SVector(0.0, 0.0),
2022-01-14 12:28:47 +01:00
half_box_len,
max_elliptical_distance,
2022-05-01 18:13:35 +02:00
trigger,
)
end
end
2022-01-29 15:48:13 +01:00
function gen_env_helper(
2022-05-01 18:13:35 +02:00
::LocalCOMWithAdditionalShapeRewardEnv,
env_helper_shared::EnvHelperSharedProps;
kwargs...,
2022-01-29 15:48:13 +01:00
)
2022-05-01 18:13:35 +02:00
return LocalCOMWithAdditionalShapeRewardEnvHelper(env_helper_shared; kwargs...)
end
2022-01-29 15:48:13 +01:00
function pre_integration_hook!(env_helper::LocalCOMWithAdditionalShapeRewardEnvHelper)
2022-01-11 18:39:38 +01:00
@simd for id in 1:(env_helper.shared.n_particles)
2022-02-08 21:20:18 +01:00
env_helper.vec_to_neighbor_sums[id] = SVector(0.0, 0.0)
env_helper.n_neighbors[id] = 0
end
return nothing
end
2022-01-18 02:17:52 +01:00
function state_update_helper_hook!(
2022-01-29 15:48:13 +01:00
env_helper::LocalCOMWithAdditionalShapeRewardEnvHelper,
id1::Int64,
id2::Int64,
r⃗₁₂::SVector{2,Float64},
2022-01-31 01:47:48 +01:00
distance²::Float64,
)
2022-02-08 21:20:18 +01:00
env_helper.vec_to_neighbor_sums[id1] += r⃗₁₂
env_helper.vec_to_neighbor_sums[id2] -= r⃗₁₂
2022-02-08 21:20:18 +01:00
env_helper.n_neighbors[id1] += 1
env_helper.n_neighbors[id2] += 1
return nothing
end
2022-01-29 15:48:13 +01:00
function state_update_hook!(
2022-01-29 16:45:36 +01:00
env_helper::LocalCOMWithAdditionalShapeRewardEnvHelper, particles::Vector{ReCo.Particle}
2022-01-29 15:48:13 +01:00
)
2022-01-11 18:39:38 +01:00
n_particles = env_helper.shared.n_particles
2022-01-11 18:39:38 +01:00
env = env_helper.shared.env
2022-01-14 12:28:47 +01:00
distance_to_local_center_of_mass_sum = 0.0
2022-01-29 17:13:17 +01:00
for particle_id in 1:n_particles
2022-02-08 21:20:18 +01:00
n_neighbors = env_helper.n_neighbors[particle_id]
2022-02-08 21:20:18 +01:00
if n_neighbors == 0
2022-01-15 21:27:15 +01:00
state_id = env.shared.n_states
2022-01-14 12:28:47 +01:00
distance_to_local_center_of_mass_sum +=
env_helper.max_distance_to_local_center_of_mass
else
vec_to_local_center_of_mass =
2022-02-08 21:20:18 +01:00
env_helper.vec_to_neighbor_sums[particle_id] / n_neighbors
2022-01-14 13:01:14 +01:00
distance = ReCo.norm2d(vec_to_local_center_of_mass)
2022-01-29 17:13:17 +01:00
env_helper.distances_to_local_center_of_mass[particle_id] = distance
2022-01-14 12:28:47 +01:00
distance_to_local_center_of_mass_sum += distance
2022-01-15 21:27:15 +01:00
distance_state_ind = find_state_ind(distance, env.distance_state_space)
2022-01-29 17:13:17 +01:00
si, co = sincos(particles[particle_id].φ)
2022-01-14 13:01:14 +01:00
direction_angle = ReCo.angle2(SVector(co, si), vec_to_local_center_of_mass)
2022-01-15 21:27:15 +01:00
direction_state_ind = find_state_ind(
direction_angle, env.direction_angle_state_space
)
2022-01-15 21:27:15 +01:00
state_id = env.shared.state_id_tensor[distance_state_ind, direction_state_ind]
end
2022-01-29 17:13:17 +01:00
env_helper.shared.states_id[particle_id] = state_id
end
2022-01-14 12:28:47 +01:00
mean_distance_to_local_center_of_mass =
distance_to_local_center_of_mass_sum / n_particles
env_helper.add_shape_reward_term =
mean_distance_to_local_center_of_mass /
2022-05-01 18:13:35 +02:00
env_helper.max_distance_to_local_center_of_mass < env_helper.trigger
2022-05-01 17:10:15 +02:00
if env_helper.add_shape_reward_term
print("*")
end
2022-01-14 12:28:47 +01:00
2022-01-14 13:01:14 +01:00
env_helper.center_of_mass = ReCo.center_of_mass(particles, env_helper.half_box_len)
v1, v2 = ReCo.gyration_tensor_eigvecs(
particles, env_helper.half_box_len, env_helper.center_of_mass
)
2022-01-14 12:28:47 +01:00
env_helper.gyration_tensor_eigvec_to_smaller_eigval = v1
env_helper.gyration_tensor_eigvec_to_bigger_eigval = v2
return nothing
end
2022-01-29 15:48:13 +01:00
function update_reward!(
env::LocalCOMWithAdditionalShapeRewardEnv,
env_helper::LocalCOMWithAdditionalShapeRewardEnvHelper,
2022-01-29 16:45:36 +01:00
particle::ReCo.Particle,
2022-01-29 15:48:13 +01:00
)
2022-02-08 21:20:18 +01:00
n_neighbors = env_helper.n_neighbors[particle.id]
2022-01-14 12:28:47 +01:00
2022-02-08 21:20:18 +01:00
if n_neighbors == 0
2022-01-14 12:28:47 +01:00
env.shared.reward = 0.0
else
2022-01-14 12:28:47 +01:00
reward = minimizing_reward(
2022-01-29 17:13:17 +01:00
env_helper.distances_to_local_center_of_mass[particle.id],
2022-01-14 12:28:47 +01:00
env_helper.max_distance_to_local_center_of_mass,
)
if env_helper.add_shape_reward_term
elliptical_distance = ReCo.elliptical_distance(
2022-01-14 13:01:14 +01:00
particle.c,
env_helper.center_of_mass,
2022-01-14 12:28:47 +01:00
env_helper.gyration_tensor_eigvec_to_smaller_eigval,
env_helper.gyration_tensor_eigvec_to_bigger_eigval,
2022-02-01 22:57:56 +01:00
env_helper.shared.elliptical_b_a_ratio,
2022-01-14 13:01:14 +01:00
env_helper.half_box_len,
2022-01-14 12:28:47 +01:00
)
2022-01-15 18:55:01 +01:00
reward += minimizing_reward(
elliptical_distance, env_helper.max_elliptical_distance
2022-01-14 12:28:47 +01:00
)
end
2022-01-31 17:14:24 +01:00
reward /= 2
2022-01-30 03:20:45 +01:00
set_normalized_reward!(env, reward, env_helper)
end
return nothing
2022-03-19 23:11:03 +01:00
end