1
0
Fork 0
mirror of https://gitlab.rlp.net/mobitar/ReCo.jl.git synced 2025-03-15 20:18:19 +00:00
ReCo.jl/src/RL/LocalCOMEnv.jl

222 lines
6.9 KiB
Julia
Raw Normal View History

export LocalCOMEnv
2022-01-14 13:01:14 +01:00
using ..ReCo: Particle
struct LocalCOMEnv <: Env
2022-01-11 18:39:38 +01:00
shared::EnvSharedProps
distance_state_space::Vector{Interval}
direction_angle_state_space::Vector{Interval}
2022-01-14 12:28:47 +01:00
function LocalCOMEnv(;
n_distance_states::Int64=3, n_direction_angle_states::Int64=3, args
)
@assert n_direction_angle_states > 1
direction_angle_state_space = gen_angle_state_space(n_direction_angle_states)
min_distance = 0.0
2022-01-14 12:28:47 +01:00
max_distance = args.skin_r
distance_state_space = gen_distance_state_space(
min_distance, max_distance, n_distance_states
)
n_states = n_distance_states * n_direction_angle_states + 1
state_space = Vector{SVector{2,Interval}}(undef, n_states - 1)
ind = 1
for distance_state in distance_state_space
for direction_angle_state in direction_angle_state_space
state_space[ind] = SVector(distance_state, direction_angle_state)
ind += 1
end
end
# Last state is when no particle is in the skin radius
2022-01-11 18:39:38 +01:00
shared = EnvSharedProps(n_states, state_space)
2022-01-14 12:28:47 +01:00
return new(shared, distance_state_space, direction_angle_state_space)
end
end
2022-01-14 12:28:47 +01:00
mutable struct LocalCOMEnvHelper <: EnvHelper
2022-01-11 18:39:38 +01:00
shared::EnvHelperSharedProps
vec_to_neighbour_sums::Vector{SVector{2,Float64}}
n_neighbours::Vector{Int64}
2022-01-14 12:28:47 +01:00
distances_to_local_center_of_mass::Vector{Float64}
max_distance_to_local_center_of_mass::Float64
add_shape_reward_term::Bool
2022-01-14 13:01:14 +01:00
center_of_mass::SVector{2,Float64}
2022-01-14 12:28:47 +01:00
gyration_tensor_eigvec_to_smaller_eigval::SVector{2,Float64}
gyration_tensor_eigvec_to_bigger_eigval::SVector{2,Float64}
half_box_len::Float64
max_elliptical_distance::Float64
function LocalCOMEnvHelper(shared::EnvHelperSharedProps, half_box_len::Float64, skin_r)
max_elliptical_distance =
sqrt(2) * half_box_len / shared.goal_gyration_tensor_eigvals_ratio
max_distance_to_local_center_of_mass = skin_r
return new(
2022-01-11 18:39:38 +01:00
shared,
fill(SVector(0.0, 0.0), shared.n_particles),
fill(0, shared.n_particles),
zeros(shared.n_particles),
2022-01-14 12:28:47 +01:00
max_distance_to_local_center_of_mass,
false,
SVector(0.0, 0.0),
SVector(0.0, 0.0),
2022-01-14 13:01:14 +01:00
SVector(0.0, 0.0),
2022-01-14 12:28:47 +01:00
half_box_len,
max_elliptical_distance,
)
end
end
2022-01-14 12:28:47 +01:00
function gen_env_helper(::LocalCOMEnv, env_helper_shared::EnvHelperSharedProps; args)
return LocalCOMEnvHelper(env_helper_shared, args.half_box_len, args.skin_r)
end
function pre_integration_hook(env_helper::LocalCOMEnvHelper)
2022-01-11 18:39:38 +01:00
@simd for id in 1:(env_helper.shared.n_particles)
env_helper.vec_to_neighbour_sums[id] = SVector(0.0, 0.0)
env_helper.n_neighbours[id] = 0
end
return nothing
end
function state_update_helper_hook(
env_helper::LocalCOMEnvHelper, id1::Int64, id2::Int64, r⃗₁₂::SVector{2,Float64}
)
env_helper.vec_to_neighbour_sums[id1] += r⃗₁₂
env_helper.vec_to_neighbour_sums[id2] -= r⃗₁₂
env_helper.n_neighbours[id1] += 1
env_helper.n_neighbours[id2] += 1
return nothing
end
function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Particle})
2022-01-11 18:39:38 +01:00
n_particles = env_helper.shared.n_particles
2022-01-14 12:28:47 +01:00
@turbo for id in 1:n_particles
2022-01-11 18:39:38 +01:00
env_helper.shared.old_states_ind[id] = env_helper.shared.states_ind[id]
end
2022-01-11 18:39:38 +01:00
env = env_helper.shared.env
2022-01-14 12:28:47 +01:00
distance_to_local_center_of_mass_sum = 0.0
for id in 1:n_particles
n_neighbours = env_helper.n_neighbours[id]
if n_neighbours == 0
2022-01-11 18:39:38 +01:00
state_ind = env.shared.n_states
2022-01-14 12:28:47 +01:00
distance_to_local_center_of_mass_sum +=
env_helper.max_distance_to_local_center_of_mass
else
vec_to_local_center_of_mass =
env_helper.vec_to_neighbour_sums[id] / n_neighbours
2022-01-14 13:01:14 +01:00
distance = ReCo.norm2d(vec_to_local_center_of_mass)
2022-01-11 18:39:38 +01:00
2022-01-14 12:28:47 +01:00
env_helper.distances_to_local_center_of_mass[id] = distance
distance_to_local_center_of_mass_sum += distance
distance_state = find_state_interval(distance, env.distance_state_space)
si, co = sincos(particles[id].φ)
2022-01-14 13:01:14 +01:00
direction_angle = ReCo.angle2(SVector(co, si), vec_to_local_center_of_mass)
direction_angle_state = find_state_interval(
direction_angle, env.direction_angle_state_space
)
state = SVector{2,Interval}(distance_state, direction_angle_state)
2022-01-11 18:39:38 +01:00
state_ind = find_state_ind(state, env.shared.state_space)
end
2022-01-11 18:39:38 +01:00
env_helper.shared.states_ind[id] = state_ind
end
2022-01-14 12:28:47 +01:00
mean_distance_to_local_center_of_mass =
distance_to_local_center_of_mass_sum / n_particles
env_helper.add_shape_reward_term =
mean_distance_to_local_center_of_mass /
env_helper.max_distance_to_local_center_of_mass < 0.32
if env_helper.add_shape_reward_term
#println(mean_distance_to_local_center_of_mass / env_helper.max_distance_to_local_center_of_mass) # TODO: Remove
end
2022-01-14 13:01:14 +01:00
env_helper.center_of_mass = ReCo.center_of_mass(particles, env_helper.half_box_len)
v1, v2 = ReCo.gyration_tensor_eigvecs(
particles, env_helper.half_box_len, env_helper.center_of_mass
)
2022-01-14 12:28:47 +01:00
env_helper.gyration_tensor_eigvec_to_smaller_eigval = v1
env_helper.gyration_tensor_eigvec_to_bigger_eigval = v2
return nothing
end
2022-01-14 12:28:47 +01:00
"""
minimizing_reward(value::Float64, max_value::Float64)
Returns the reward such that it is 0 for value=max_value and 1 for value=0.
"""
function minimizing_reward(value::Float64, max_value::Float64)
return (max_value - value) / (max_value + value)
end
function update_reward!(env::LocalCOMEnv, env_helper::LocalCOMEnvHelper, particle::Particle)
id = particle.id
2022-01-14 12:28:47 +01:00
normalization = env_helper.shared.n_particles # TODO: Add factor from steps
n_neighbours = env_helper.n_neighbours[id]
2022-01-14 12:28:47 +01:00
if n_neighbours == 0
2022-01-14 12:28:47 +01:00
env.shared.reward = 0.0
else
2022-01-14 12:28:47 +01:00
reward = minimizing_reward(
env_helper.distances_to_local_center_of_mass[id],
env_helper.max_distance_to_local_center_of_mass,
)
if env_helper.add_shape_reward_term
elliptical_distance = ReCo.elliptical_distance(
2022-01-14 13:01:14 +01:00
particle.c,
env_helper.center_of_mass,
2022-01-14 12:28:47 +01:00
env_helper.gyration_tensor_eigvec_to_smaller_eigval,
env_helper.gyration_tensor_eigvec_to_bigger_eigval,
env_helper.shared.goal_gyration_tensor_eigvals_ratio,
2022-01-14 13:01:14 +01:00
env_helper.half_box_len,
2022-01-14 12:28:47 +01:00
)
reward += unnormalized_reward(
elliptical_distance,
env_helper.max_elliptical_distance, # TODO: Fix sq
)
# println(elliptical_distance / env_helper.max_elliptical_distance) # TODO: Remove
end
env.shared.reward = reward / normalization
end
return nothing
end