From 9c00da84ea9dad809119127be1c56b84a5ca83c5 Mon Sep 17 00:00:00 2001
From: Mo8it <mo8it@protonmail.com>
Date: Sat, 8 Jan 2022 22:44:20 +0100
Subject: [PATCH] back to local center of mass with reward as distance to it

---
 src/RL.jl | 126 ++++++++++++++++++++++++++----------------------------
 1 file changed, 61 insertions(+), 65 deletions(-)

diff --git a/src/RL.jl b/src/RL.jl
index 5aa17da..e78155c 100644
--- a/src/RL.jl
+++ b/src/RL.jl
@@ -44,20 +44,15 @@ mutable struct Env <: AbstractEnv
 
     distance_state_space::Vector{Interval}
     direction_angle_state_space::Vector{Interval}
-    position_angle_state_space::Vector{Interval}
 
     n_states::Int64
-    state_space::Vector{SVector{3,Interval}}
+    state_space::Vector{SVector{2,Interval}}
     state_ind_space::OneTo{Int64}
     state_ind::Int64
 
     reward::Float64
     terminated::Bool
 
-    center_of_mass::SVector{2,Float64} # TODO: Use or remove
-    gyration_tensor_eigvec_to_smaller_eigval::SVector{2,Float64}
-    gyration_tensor_eigvec_to_bigger_eigval::SVector{2,Float64}
-
     function Env(;
         max_distance::Float64,
         min_distance::Float64=0.0,
@@ -65,9 +60,8 @@ mutable struct Env <: AbstractEnv
         n_ω_actions::Int64=3,
         max_v::Float64=40.0,
         max_ω::Float64=π / 2,
-        n_distance_states::Int64=4,
+        n_distance_states::Int64=3,
         n_direction_angle_states::Int64=3,
-        n_position_angle_states::Int64=8,
     )
         @assert min_distance >= 0.0
         @assert max_distance > min_distance
@@ -77,7 +71,6 @@ mutable struct Env <: AbstractEnv
         @assert max_ω > 0
         @assert n_distance_states > 1
         @assert n_direction_angle_states > 1
-        @assert n_position_angle_states > 1
 
         v_action_space = range(; start=0.0, stop=max_v, length=n_v_actions)
         ω_action_space = range(; start=-max_ω, stop=max_ω, length=n_ω_actions)
@@ -115,23 +108,19 @@ mutable struct Env <: AbstractEnv
         end
 
         direction_angle_state_space = angle_state_space(n_direction_angle_states)
-        position_angle_state_space = angle_state_space(n_position_angle_states)
 
-        n_states = n_distance_states * n_direction_angle_states * n_position_angle_states
+        n_states = n_distance_states * n_direction_angle_states + 1
 
-        state_space = Vector{SVector{3,Interval}}(undef, n_states)
+        state_space = Vector{SVector{2,Interval}}(undef, n_states - 1)
 
         ind = 1
         for distance_state in distance_state_space
             for direction_angle_state in direction_angle_state_space
-                for position_angle_state in position_angle_state_space
-                    state_space[ind] = SVector(
-                        distance_state, direction_angle_state, position_angle_state
-                    )
-                    ind += 1
-                end
+                state_space[ind] = SVector(distance_state, direction_angle_state)
+                ind += 1
             end
         end
+        # Last state is when no particle is in the skin radius
 
         state_ind_space = OneTo(n_states)
 
@@ -141,21 +130,18 @@ mutable struct Env <: AbstractEnv
             action_ind_space,
             distance_state_space,
             direction_angle_state_space,
-            position_angle_state_space,
             n_states,
             state_space,
             state_ind_space,
             INITIAL_STATE_IND,
             INITIAL_REWARD,
             false,
-            SVector(0.0, 0.0),
         )
     end
 end
 
 function reset!(env::Env)
     env.state_ind = env.n_states
-    env.reward = INITIAL_REWARD
     env.terminated = false
 
     return nothing
@@ -187,8 +173,10 @@ struct Params{H<:AbstractHook}
     goal_gyration_tensor_eigvals_ratio::Float64
 
     n_particles::Int64
-    half_box_len::Float64
-    max_elliptic_distance::Float64
+    max_distance::Float64
+
+    vec_to_neighbour_sums::Vector{SVector{2,Float64}}
+    n_neighbours::Vector{Int64}
 
     function Params(
         env::Env,
@@ -197,10 +185,8 @@ struct Params{H<:AbstractHook}
         n_steps_before_actions_update::Int64,
         goal_gyration_tensor_eigvals_ratio::Float64,
         n_particles::Int64,
-        half_box_len::Float64,
+        max_distance::Float64,
     ) where {H<:AbstractHook}
-        max_elliptic_distance = sqrt(2) * half_box_len
-
         n_states = env.n_states
 
         return new{H}(
@@ -214,23 +200,35 @@ struct Params{H<:AbstractHook}
             n_steps_before_actions_update,
             goal_gyration_tensor_eigvals_ratio,
             n_particles,
-            half_box_len,
-            max_elliptic_distance,
+            max_distance,
+            fill(SVector(0.0, 0.0), n_particles),
+            fill(0, n_particles),
         )
     end
 end
 
 function pre_integration_hook(rl_params::Params)
+    @simd for id in 1:(rl_params.n_particles)
+        rl_params.vec_to_neighbour_sums[id] = SVector(0.0, 0.0)
+        rl_params.n_neighbours[id] = 0
+    end
+
     return nothing
 end
 
 function state_update_helper_hook(
     rl_params::Params, id1::Int64, id2::Int64, r⃗₁₂::SVector{2,Float64}
 )
+    rl_params.vec_to_neighbour_sums[id1] += r⃗₁₂
+    rl_params.vec_to_neighbour_sums[id2] -= r⃗₁₂
+
+    rl_params.n_neighbours[id1] += 1
+    rl_params.n_neighbours[id2] += 1
+
     return nothing
 end
 
-function find_state_ind(state::S, state_space::Vector{S}) where {S<:SVector{3,Interval}}
+function find_state_ind(state::S, state_space::Vector{S}) where {S<:SVector{2,Interval}}
     return findfirst(x -> x == state, state_space)
 end
 
@@ -249,44 +247,35 @@ function state_update_hook(rl_params::Params, particles::Vector{Particle})
 
     env = rl_params.env
 
-    env.center_of_mass = Shape.center_of_mass(particles, rl_params.half_box_len)
-
     for id in 1:(rl_params.n_particles)
-        particle = particles[id]
+        n_neighbours = rl_params.n_neighbours[id]
 
-        vec_to_center_of_mass = ReCo.minimum_image(
-            env.center_of_mass - particle.c, rl_params.half_box_len
-        )
+        if n_neighbours == 0
+            state_ind = env.n_states
+        else
+            vec_to_local_center_of_mass = rl_params.vec_to_neighbour_sums[id] / n_neighbours
 
-        distance = sqrt(vec_to_center_of_mass[1]^2 + vec_to_center_of_mass[2]^2)
+            distance = sqrt(
+                vec_to_local_center_of_mass[1]^2 + vec_to_local_center_of_mass[2]^2
+            )
 
-        distance_state = find_state_interval(distance, env.distance_state_space)
+            distance_state = find_state_interval(distance, env.distance_state_space)
 
-        si, co = sincos(particles[id].φ)
+            si, co = sincos(particles[id].φ)
 
-        direction_angle = angle2(SVector(co, si), vec_to_center_of_mass)
-        position_angle = atan(-vec_to_center_of_mass[2], -vec_to_center_of_mass[1])
+            direction_angle = angle2(SVector(co, si), vec_to_local_center_of_mass)
 
-        direction_angle_state = find_state_interval(
-            direction_angle, env.direction_angle_state_space
-        )
-        position_angle_state = find_state_interval(
-            position_angle, env.position_angle_state_space
-        )
+            direction_angle_state = find_state_interval(
+                direction_angle, env.direction_angle_state_space
+            )
 
-        state = SVector{3,Interval}(
-            distance_state, direction_angle_state, position_angle_state
-        )
-        state_ind = find_state_ind(state, env.state_space)
+            state = SVector{2,Interval}(distance_state, direction_angle_state)
+            state_ind = find_state_ind(state, env.state_space)
+        end
 
         rl_params.states_ind[id] = state_ind
     end
 
-    v1, v2 = Shape.gyration_tensor_eigvecs(particles, rl_params.half_box_len) # TODO: Reuse center_of_mass
-
-    env.gyration_tensor_eigvec_to_smaller_eigval = v1
-    env.gyration_tensor_eigvec_to_bigger_eigval = v2
-
     return nothing
 end
 
@@ -295,13 +284,19 @@ function get_env_agent_hook(rl_params::Params)
 end
 
 function update_reward!(env::Env, rl_params::Params, particle::Particle)
-    env.reward =
-        -Shape.elliptical_distance(
-            particle,
-            env.gyration_tensor_eigvec_to_smaller_eigval,
-            env.gyration_tensor_eigvec_to_bigger_eigval,
-            rl_params.goal_gyration_tensor_eigvals_ratio,
-        ) / (rl_params.max_elliptic_distance^2 * rl_params.n_particles)
+    id = particle.id
+
+    normalization = (rl_params.max_distance * rl_params.n_particles)
+
+    n_neighbours = rl_params.n_neighbours[id]
+    if n_neighbours == 0
+        env.reward = -(rl_params.max_distance^2) / normalization
+    else
+        vec_to_local_center_of_mass = rl_params.vec_to_neighbour_sums[id] / n_neighbours # TODO: Reuse vec_to_local_center_of_mass from state_update_hook
+        env.reward =
+            -(vec_to_local_center_of_mass[1]^2 + vec_to_local_center_of_mass[2]^2) /
+            normalization
+    end
 
     return nothing
 end
@@ -403,11 +398,12 @@ function run_rl(;
     Random.seed!(seed)
 
     sim_consts = ReCo.gen_sim_consts(
-        n_particles, 0.0; skin_to_interaction_r_ratio=1.5, packing_ratio=0.22
+        n_particles, 0.0; skin_to_interaction_r_ratio=2.0, packing_ratio=0.22
     )
     n_particles = sim_consts.n_particles
 
-    env = Env(; max_distance=sqrt(2) * sim_consts.half_box_len)
+    max_distance = sim_consts.skin_r
+    env = Env(; max_distance=max_distance)
 
     agent = gen_agent(env.n_states, env.n_actions, ϵ_stable)
 
@@ -422,7 +418,7 @@ function run_rl(;
         n_steps_before_actions_update,
         goal_gyration_tensor_eigvals_ratio,
         n_particles,
-        sim_consts.half_box_len,
+        max_distance,
     )
 
     parent_dir = "RL" * parent_dir