From 42de1a46094ed46d1733a2508af4fd9ea521101c Mon Sep 17 00:00:00 2001 From: Mo8it Date: Mon, 31 Jan 2022 02:46:15 +0100 Subject: [PATCH] Added LocalCOMWithAdditionalShapeRewardEnv2 --- .../LocalCOMWithAdditionalShapeRewardEnv.jl | 2 +- .../LocalCOMWithAdditionalShapeRewardEnv2.jl | 187 ++++++++++++++++++ src/RL/RL.jl | 7 +- src/ReCo.jl | 1 + 4 files changed, 195 insertions(+), 2 deletions(-) create mode 100644 src/RL/Envs/LocalCOMWithAdditionalShapeRewardEnv2.jl diff --git a/src/RL/Envs/LocalCOMWithAdditionalShapeRewardEnv.jl b/src/RL/Envs/LocalCOMWithAdditionalShapeRewardEnv.jl index 29d825f..fe1a604 100644 --- a/src/RL/Envs/LocalCOMWithAdditionalShapeRewardEnv.jl +++ b/src/RL/Envs/LocalCOMWithAdditionalShapeRewardEnv.jl @@ -153,7 +153,7 @@ function state_update_hook!( distance_to_local_center_of_mass_sum / n_particles env_helper.add_shape_reward_term = mean_distance_to_local_center_of_mass / - env_helper.max_distance_to_local_center_of_mass < 0.3 + env_helper.max_distance_to_local_center_of_mass < 0.32 env_helper.center_of_mass = ReCo.center_of_mass(particles, env_helper.half_box_len) diff --git a/src/RL/Envs/LocalCOMWithAdditionalShapeRewardEnv2.jl b/src/RL/Envs/LocalCOMWithAdditionalShapeRewardEnv2.jl new file mode 100644 index 0000000..b0b0f7e --- /dev/null +++ b/src/RL/Envs/LocalCOMWithAdditionalShapeRewardEnv2.jl @@ -0,0 +1,187 @@ +export LocalCOMWithAdditionalShapeRewardEnv2 + +using ..ReCo: ReCo + +struct LocalCOMWithAdditionalShapeRewardEnv2 <: Env + shared::EnvSharedProps + + distance_state_space::Vector{Interval} + direction_angle_state_space::Vector{Interval} + + function LocalCOMWithAdditionalShapeRewardEnv2(; + n_distance_states::Int64=3, n_direction_angle_states::Int64=3, args + ) + @assert n_distance_states > 1 + @assert n_direction_angle_states > 1 + + direction_angle_state_space = gen_angle_state_space(n_direction_angle_states) + + min_distance = 0.0 + max_distance = args.skin_radius + + distance_state_space = gen_distance_state_space( + min_distance, max_distance, n_distance_states + ) + + n_states = n_distance_states * n_direction_angle_states + 1 + # Last state is when no particle is in the skin radius + + state_spaces_labels = gen_state_spaces_labels( + ("d", "\\theta"), (distance_state_space, direction_angle_state_space) + ) + + shared = EnvSharedProps( + n_states, (n_distance_states, n_direction_angle_states), state_spaces_labels + ) + + return new(shared, distance_state_space, direction_angle_state_space) + end +end + +mutable struct LocalCOMWithAdditionalShapeRewardEnv2Helper <: EnvHelper + shared::EnvHelperSharedProps + + vec_to_neighbour_sums::Vector{SVector{2,Float64}} + n_neighbours::Vector{Int64} + + distances_to_local_center_of_mass::Vector{Float64} + max_distance_to_local_center_of_mass::Float64 + + add_shape_reward_term::Bool + + current_κ::Float64 + goal_κ::Float64 + max_distance_to_goal_κ::Float64 + + half_box_len::Float64 + + function LocalCOMWithAdditionalShapeRewardEnv2Helper( + shared::EnvHelperSharedProps, half_box_len::Float64, skin_radius::Float64 + ) + goal_κ = 0.4 + max_distance_to_goal_κ = max(1 - goal_κ, goal_κ) + + max_distance_to_local_center_of_mass = skin_radius + + return new( + shared, + fill(SVector(0.0, 0.0), shared.n_particles), + fill(0, shared.n_particles), + zeros(shared.n_particles), + max_distance_to_local_center_of_mass, + false, + 1.0, + goal_κ, + max_distance_to_goal_κ, + half_box_len, + ) + end +end + +function gen_env_helper( + ::LocalCOMWithAdditionalShapeRewardEnv2, env_helper_shared::EnvHelperSharedProps; args +) + return LocalCOMWithAdditionalShapeRewardEnv2Helper( + env_helper_shared, args.half_box_len, args.skin_radius + ) +end + +function pre_integration_hook!(env_helper::LocalCOMWithAdditionalShapeRewardEnv2Helper) + @simd for id in 1:(env_helper.shared.n_particles) + env_helper.vec_to_neighbour_sums[id] = SVector(0.0, 0.0) + env_helper.n_neighbours[id] = 0 + end + + return nothing +end + +function state_update_helper_hook!( + env_helper::LocalCOMWithAdditionalShapeRewardEnv2Helper, + id1::Int64, + id2::Int64, + r⃗₁₂::SVector{2,Float64}, + distance²::Float64, +) + env_helper.vec_to_neighbour_sums[id1] += r⃗₁₂ + env_helper.vec_to_neighbour_sums[id2] -= r⃗₁₂ + + env_helper.n_neighbours[id1] += 1 + env_helper.n_neighbours[id2] += 1 + + return nothing +end + +function state_update_hook!( + env_helper::LocalCOMWithAdditionalShapeRewardEnv2Helper, + particles::Vector{ReCo.Particle}, +) + n_particles = env_helper.shared.n_particles + + env = env_helper.shared.env + + distance_to_local_center_of_mass_sum = 0.0 + + for particle_id in 1:n_particles + n_neighbours = env_helper.n_neighbours[particle_id] + + if n_neighbours == 0 + state_id = env.shared.n_states + + distance_to_local_center_of_mass_sum += + env_helper.max_distance_to_local_center_of_mass + else + vec_to_local_center_of_mass = + env_helper.vec_to_neighbour_sums[particle_id] / n_neighbours + distance = ReCo.norm2d(vec_to_local_center_of_mass) + env_helper.distances_to_local_center_of_mass[particle_id] = distance + distance_to_local_center_of_mass_sum += distance + distance_state_ind = find_state_ind(distance, env.distance_state_space) + + si, co = sincos(particles[particle_id].φ) + direction_angle = ReCo.angle2(SVector(co, si), vec_to_local_center_of_mass) + direction_state_ind = find_state_ind( + direction_angle, env.direction_angle_state_space + ) + + state_id = env.shared.state_id_tensor[distance_state_ind, direction_state_ind] + end + + env_helper.shared.states_id[particle_id] = state_id + end + + mean_distance_to_local_center_of_mass = + distance_to_local_center_of_mass_sum / n_particles + env_helper.add_shape_reward_term = + mean_distance_to_local_center_of_mass / + env_helper.max_distance_to_local_center_of_mass < 0.32 + + return nothing +end + +function update_reward!( + env::LocalCOMWithAdditionalShapeRewardEnv2, + env_helper::LocalCOMWithAdditionalShapeRewardEnv2Helper, + particle::ReCo.Particle, +) + n_neighbours = env_helper.n_neighbours[particle.id] + + if n_neighbours == 0 + env.shared.reward = 0.0 + else + reward = minimizing_reward( + env_helper.distances_to_local_center_of_mass[particle.id], + env_helper.max_distance_to_local_center_of_mass, + ) + + if env_helper.add_shape_reward_term + reward += minimizing_reward( + abs(env_helper.current_κ - env_helper.goal_κ), + env_helper.max_distance_to_goal_κ, + ) + end + + set_normalized_reward!(env, reward, env_helper) + end + + return nothing +end \ No newline at end of file diff --git a/src/RL/RL.jl b/src/RL/RL.jl index d373f83..977e545 100644 --- a/src/RL/RL.jl +++ b/src/RL/RL.jl @@ -1,7 +1,11 @@ module RL export run_rl, - LocalCOMWithAdditionalShapeRewardEnv, OriginEnv, NearestNeighbourEnv, LocalCOMEnv + LocalCOMWithAdditionalShapeRewardEnv, + LocalCOMWithAdditionalShapeRewardEnv2, + OriginEnv, + NearestNeighbourEnv, + LocalCOMEnv using Base: OneTo @@ -190,6 +194,7 @@ function gen_state_spaces_labels( end include("Envs/LocalCOMWithAdditionalShapeRewardEnv.jl") +include("Envs/LocalCOMWithAdditionalShapeRewardEnv2.jl") include("Envs/OriginEnv.jl") include("Envs/NearestNeighbourEnv.jl") include("Envs/LocalCOMEnv.jl") diff --git a/src/ReCo.jl b/src/ReCo.jl index f7a3bab..e51d7cd 100644 --- a/src/ReCo.jl +++ b/src/ReCo.jl @@ -6,6 +6,7 @@ export init_sim, animate, plot_snapshot, LocalCOMWithAdditionalShapeRewardEnv, + LocalCOMWithAdditionalShapeRewardEnv2, OriginEnv, NearestNeighbourEnv, LocalCOMEnv