2022-02-08 23:45:30 +01:00
|
|
|
"""
|
|
|
|
This environment corresponds to the local center of mass strategy with an additional shape reward term.
|
|
|
|
The minimization variable of the additional reward term is the individual elliptical distance of a particle.
|
|
|
|
"""
|
|
|
|
|
2022-01-29 16:45:36 +01:00
|
|
|
using ..ReCo: ReCo
|
2022-01-14 13:01:14 +01:00
|
|
|
|
2022-05-01 18:13:35 +02:00
|
|
|
const DEFAULT_TRIGGER = 0.35
|
2022-05-01 17:10:15 +02:00
|
|
|
|
2022-01-29 15:48:13 +01:00
|
|
|
struct LocalCOMWithAdditionalShapeRewardEnv <: Env
|
2022-01-11 18:39:38 +01:00
|
|
|
shared::EnvSharedProps
|
2022-01-11 01:31:30 +01:00
|
|
|
|
|
|
|
distance_state_space::Vector{Interval}
|
|
|
|
direction_angle_state_space::Vector{Interval}
|
|
|
|
|
2022-05-01 18:13:35 +02:00
|
|
|
function LocalCOMWithAdditionalShapeRewardEnv(
|
|
|
|
args; n_distance_states::Int64=3, n_direction_angle_states::Int64=3
|
2022-01-11 01:31:30 +01:00
|
|
|
)
|
2022-01-15 21:27:15 +01:00
|
|
|
@assert n_distance_states > 1
|
2022-01-11 01:31:30 +01:00
|
|
|
@assert n_direction_angle_states > 1
|
|
|
|
|
|
|
|
direction_angle_state_space = gen_angle_state_space(n_direction_angle_states)
|
|
|
|
|
|
|
|
min_distance = 0.0
|
2022-01-29 14:32:04 +01:00
|
|
|
max_distance = args.skin_radius
|
2022-01-11 01:31:30 +01:00
|
|
|
|
|
|
|
distance_state_space = gen_distance_state_space(
|
|
|
|
min_distance, max_distance, n_distance_states
|
|
|
|
)
|
|
|
|
|
|
|
|
n_states = n_distance_states * n_direction_angle_states + 1
|
|
|
|
# Last state is when no particle is in the skin radius
|
|
|
|
|
2022-01-31 01:47:48 +01:00
|
|
|
state_spaces_labels = gen_state_spaces_labels(
|
|
|
|
("d", "\\theta"), (distance_state_space, direction_angle_state_space)
|
|
|
|
)
|
|
|
|
|
|
|
|
shared = EnvSharedProps(
|
|
|
|
n_states, (n_distance_states, n_direction_angle_states), state_spaces_labels
|
|
|
|
)
|
2022-01-11 01:31:30 +01:00
|
|
|
|
2022-01-14 12:28:47 +01:00
|
|
|
return new(shared, distance_state_space, direction_angle_state_space)
|
2022-01-11 01:31:30 +01:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-01-29 15:48:13 +01:00
|
|
|
mutable struct LocalCOMWithAdditionalShapeRewardEnvHelper <: EnvHelper
|
2022-01-11 18:39:38 +01:00
|
|
|
shared::EnvHelperSharedProps
|
2022-01-11 01:31:30 +01:00
|
|
|
|
2022-02-08 21:20:18 +01:00
|
|
|
vec_to_neighbor_sums::Vector{SVector{2,Float64}}
|
|
|
|
n_neighbors::Vector{Int64}
|
2022-01-11 01:31:30 +01:00
|
|
|
|
2022-01-14 12:28:47 +01:00
|
|
|
distances_to_local_center_of_mass::Vector{Float64}
|
|
|
|
max_distance_to_local_center_of_mass::Float64
|
|
|
|
|
|
|
|
add_shape_reward_term::Bool
|
|
|
|
|
2022-01-14 13:01:14 +01:00
|
|
|
center_of_mass::SVector{2,Float64}
|
2022-01-14 12:28:47 +01:00
|
|
|
gyration_tensor_eigvec_to_smaller_eigval::SVector{2,Float64}
|
|
|
|
gyration_tensor_eigvec_to_bigger_eigval::SVector{2,Float64}
|
|
|
|
|
|
|
|
half_box_len::Float64
|
|
|
|
max_elliptical_distance::Float64
|
|
|
|
|
2022-05-01 18:13:35 +02:00
|
|
|
trigger::Float64
|
|
|
|
|
2022-01-29 15:48:13 +01:00
|
|
|
function LocalCOMWithAdditionalShapeRewardEnvHelper(
|
2022-05-01 18:13:35 +02:00
|
|
|
shared::EnvHelperSharedProps;
|
|
|
|
half_box_len::Float64,
|
|
|
|
skin_radius::Float64,
|
|
|
|
trigger::Float64=DEFAULT_TRIGGER,
|
2022-01-29 14:32:04 +01:00
|
|
|
)
|
2022-01-31 18:53:58 +01:00
|
|
|
max_elliptical_distance = sqrt(
|
2022-02-01 22:57:56 +01:00
|
|
|
half_box_len^2 + (half_box_len / shared.elliptical_b_a_ratio)^2
|
2022-01-31 18:53:58 +01:00
|
|
|
)
|
2022-01-14 12:28:47 +01:00
|
|
|
|
2022-01-29 14:32:04 +01:00
|
|
|
max_distance_to_local_center_of_mass = skin_radius
|
2022-01-14 12:28:47 +01:00
|
|
|
|
2022-01-11 01:31:30 +01:00
|
|
|
return new(
|
2022-01-11 18:39:38 +01:00
|
|
|
shared,
|
|
|
|
fill(SVector(0.0, 0.0), shared.n_particles),
|
|
|
|
fill(0, shared.n_particles),
|
2022-01-31 17:14:24 +01:00
|
|
|
zeros(Float64, shared.n_particles),
|
2022-01-14 12:28:47 +01:00
|
|
|
max_distance_to_local_center_of_mass,
|
|
|
|
false,
|
|
|
|
SVector(0.0, 0.0),
|
|
|
|
SVector(0.0, 0.0),
|
2022-01-14 13:01:14 +01:00
|
|
|
SVector(0.0, 0.0),
|
2022-01-14 12:28:47 +01:00
|
|
|
half_box_len,
|
|
|
|
max_elliptical_distance,
|
2022-05-01 18:13:35 +02:00
|
|
|
trigger,
|
2022-01-11 01:31:30 +01:00
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-01-29 15:48:13 +01:00
|
|
|
function gen_env_helper(
|
2022-05-01 18:13:35 +02:00
|
|
|
::LocalCOMWithAdditionalShapeRewardEnv,
|
|
|
|
env_helper_shared::EnvHelperSharedProps;
|
|
|
|
kwargs...,
|
2022-01-29 15:48:13 +01:00
|
|
|
)
|
2022-05-01 18:13:35 +02:00
|
|
|
return LocalCOMWithAdditionalShapeRewardEnvHelper(env_helper_shared; kwargs...)
|
2022-01-11 01:31:30 +01:00
|
|
|
end
|
|
|
|
|
2022-01-29 15:48:13 +01:00
|
|
|
function pre_integration_hook!(env_helper::LocalCOMWithAdditionalShapeRewardEnvHelper)
|
2022-01-11 18:39:38 +01:00
|
|
|
@simd for id in 1:(env_helper.shared.n_particles)
|
2022-02-08 21:20:18 +01:00
|
|
|
env_helper.vec_to_neighbor_sums[id] = SVector(0.0, 0.0)
|
|
|
|
env_helper.n_neighbors[id] = 0
|
2022-01-11 01:31:30 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
return nothing
|
|
|
|
end
|
|
|
|
|
2022-01-18 02:17:52 +01:00
|
|
|
function state_update_helper_hook!(
|
2022-01-29 15:48:13 +01:00
|
|
|
env_helper::LocalCOMWithAdditionalShapeRewardEnvHelper,
|
|
|
|
id1::Int64,
|
|
|
|
id2::Int64,
|
|
|
|
r⃗₁₂::SVector{2,Float64},
|
2022-01-31 01:47:48 +01:00
|
|
|
distance²::Float64,
|
2022-01-11 01:31:30 +01:00
|
|
|
)
|
2022-02-08 21:20:18 +01:00
|
|
|
env_helper.vec_to_neighbor_sums[id1] += r⃗₁₂
|
|
|
|
env_helper.vec_to_neighbor_sums[id2] -= r⃗₁₂
|
2022-01-11 01:31:30 +01:00
|
|
|
|
2022-02-08 21:20:18 +01:00
|
|
|
env_helper.n_neighbors[id1] += 1
|
|
|
|
env_helper.n_neighbors[id2] += 1
|
2022-01-11 01:31:30 +01:00
|
|
|
|
|
|
|
return nothing
|
|
|
|
end
|
|
|
|
|
2022-01-29 15:48:13 +01:00
|
|
|
function state_update_hook!(
|
2022-01-29 16:45:36 +01:00
|
|
|
env_helper::LocalCOMWithAdditionalShapeRewardEnvHelper, particles::Vector{ReCo.Particle}
|
2022-01-29 15:48:13 +01:00
|
|
|
)
|
2022-01-11 18:39:38 +01:00
|
|
|
n_particles = env_helper.shared.n_particles
|
2022-01-11 01:31:30 +01:00
|
|
|
|
2022-01-11 18:39:38 +01:00
|
|
|
env = env_helper.shared.env
|
2022-01-11 01:31:30 +01:00
|
|
|
|
2022-01-14 12:28:47 +01:00
|
|
|
distance_to_local_center_of_mass_sum = 0.0
|
|
|
|
|
2022-01-29 17:13:17 +01:00
|
|
|
for particle_id in 1:n_particles
|
2022-02-08 21:20:18 +01:00
|
|
|
n_neighbors = env_helper.n_neighbors[particle_id]
|
2022-01-11 01:31:30 +01:00
|
|
|
|
2022-02-08 21:20:18 +01:00
|
|
|
if n_neighbors == 0
|
2022-01-15 21:27:15 +01:00
|
|
|
state_id = env.shared.n_states
|
2022-01-14 12:28:47 +01:00
|
|
|
|
|
|
|
distance_to_local_center_of_mass_sum +=
|
|
|
|
env_helper.max_distance_to_local_center_of_mass
|
2022-01-11 01:31:30 +01:00
|
|
|
else
|
|
|
|
vec_to_local_center_of_mass =
|
2022-02-08 21:20:18 +01:00
|
|
|
env_helper.vec_to_neighbor_sums[particle_id] / n_neighbors
|
2022-01-14 13:01:14 +01:00
|
|
|
distance = ReCo.norm2d(vec_to_local_center_of_mass)
|
2022-01-29 17:13:17 +01:00
|
|
|
env_helper.distances_to_local_center_of_mass[particle_id] = distance
|
2022-01-14 12:28:47 +01:00
|
|
|
distance_to_local_center_of_mass_sum += distance
|
2022-01-15 21:27:15 +01:00
|
|
|
distance_state_ind = find_state_ind(distance, env.distance_state_space)
|
2022-01-11 01:31:30 +01:00
|
|
|
|
2022-01-29 17:13:17 +01:00
|
|
|
si, co = sincos(particles[particle_id].φ)
|
2022-01-14 13:01:14 +01:00
|
|
|
direction_angle = ReCo.angle2(SVector(co, si), vec_to_local_center_of_mass)
|
2022-01-15 21:27:15 +01:00
|
|
|
direction_state_ind = find_state_ind(
|
2022-01-11 01:31:30 +01:00
|
|
|
direction_angle, env.direction_angle_state_space
|
|
|
|
)
|
|
|
|
|
2022-01-15 21:27:15 +01:00
|
|
|
state_id = env.shared.state_id_tensor[distance_state_ind, direction_state_ind]
|
2022-01-11 01:31:30 +01:00
|
|
|
end
|
|
|
|
|
2022-01-29 17:13:17 +01:00
|
|
|
env_helper.shared.states_id[particle_id] = state_id
|
2022-01-11 01:31:30 +01:00
|
|
|
end
|
|
|
|
|
2022-01-14 12:28:47 +01:00
|
|
|
mean_distance_to_local_center_of_mass =
|
|
|
|
distance_to_local_center_of_mass_sum / n_particles
|
|
|
|
env_helper.add_shape_reward_term =
|
|
|
|
mean_distance_to_local_center_of_mass /
|
2022-05-01 18:13:35 +02:00
|
|
|
env_helper.max_distance_to_local_center_of_mass < env_helper.trigger
|
2022-05-01 17:10:15 +02:00
|
|
|
if env_helper.add_shape_reward_term
|
|
|
|
print("*")
|
|
|
|
end
|
2022-01-14 12:28:47 +01:00
|
|
|
|
2022-01-14 13:01:14 +01:00
|
|
|
env_helper.center_of_mass = ReCo.center_of_mass(particles, env_helper.half_box_len)
|
|
|
|
|
|
|
|
v1, v2 = ReCo.gyration_tensor_eigvecs(
|
|
|
|
particles, env_helper.half_box_len, env_helper.center_of_mass
|
|
|
|
)
|
2022-01-14 12:28:47 +01:00
|
|
|
|
|
|
|
env_helper.gyration_tensor_eigvec_to_smaller_eigval = v1
|
|
|
|
env_helper.gyration_tensor_eigvec_to_bigger_eigval = v2
|
|
|
|
|
2022-01-11 01:31:30 +01:00
|
|
|
return nothing
|
|
|
|
end
|
|
|
|
|
2022-01-29 15:48:13 +01:00
|
|
|
function update_reward!(
|
|
|
|
env::LocalCOMWithAdditionalShapeRewardEnv,
|
|
|
|
env_helper::LocalCOMWithAdditionalShapeRewardEnvHelper,
|
2022-01-29 16:45:36 +01:00
|
|
|
particle::ReCo.Particle,
|
2022-01-29 15:48:13 +01:00
|
|
|
)
|
2022-02-08 21:20:18 +01:00
|
|
|
n_neighbors = env_helper.n_neighbors[particle.id]
|
2022-01-14 12:28:47 +01:00
|
|
|
|
2022-02-08 21:20:18 +01:00
|
|
|
if n_neighbors == 0
|
2022-01-14 12:28:47 +01:00
|
|
|
env.shared.reward = 0.0
|
2022-01-11 01:31:30 +01:00
|
|
|
else
|
2022-01-14 12:28:47 +01:00
|
|
|
reward = minimizing_reward(
|
2022-01-29 17:13:17 +01:00
|
|
|
env_helper.distances_to_local_center_of_mass[particle.id],
|
2022-01-14 12:28:47 +01:00
|
|
|
env_helper.max_distance_to_local_center_of_mass,
|
|
|
|
)
|
|
|
|
|
|
|
|
if env_helper.add_shape_reward_term
|
|
|
|
elliptical_distance = ReCo.elliptical_distance(
|
2022-01-14 13:01:14 +01:00
|
|
|
particle.c,
|
|
|
|
env_helper.center_of_mass,
|
2022-01-14 12:28:47 +01:00
|
|
|
env_helper.gyration_tensor_eigvec_to_smaller_eigval,
|
|
|
|
env_helper.gyration_tensor_eigvec_to_bigger_eigval,
|
2022-02-01 22:57:56 +01:00
|
|
|
env_helper.shared.elliptical_b_a_ratio,
|
2022-01-14 13:01:14 +01:00
|
|
|
env_helper.half_box_len,
|
2022-01-14 12:28:47 +01:00
|
|
|
)
|
|
|
|
|
2022-01-15 18:55:01 +01:00
|
|
|
reward += minimizing_reward(
|
|
|
|
elliptical_distance, env_helper.max_elliptical_distance
|
2022-01-14 12:28:47 +01:00
|
|
|
)
|
|
|
|
end
|
|
|
|
|
2022-01-31 17:14:24 +01:00
|
|
|
reward /= 2
|
|
|
|
|
2022-01-30 03:20:45 +01:00
|
|
|
set_normalized_reward!(env, reward, env_helper)
|
2022-01-11 01:31:30 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
return nothing
|
2022-03-19 23:11:03 +01:00
|
|
|
end
|