back to local center of mass with reward as distance to it

2024-12-30 17:43:34 +00:00 · 2022-01-08 22:44:20 +01:00 · 2022-01-08 22:44:20 +01:00 · 9c00da84ea
commit 9c00da84ea
parent 275b69c928
1 changed files with 61 additions and 65 deletions
--- a/src/RL.jl
+++ b/src/RL.jl
@ -44,20 +44,15 @@ mutable struct Env <: AbstractEnv

    distance_state_space::Vector{Interval}
    direction_angle_state_space::Vector{Interval}
-    position_angle_state_space::Vector{Interval}

    n_states::Int64
-    state_space::Vector{SVector{3,Interval}}
+    state_space::Vector{SVector{2,Interval}}
    state_ind_space::OneTo{Int64}
    state_ind::Int64

    reward::Float64
    terminated::Bool

-    center_of_mass::SVector{2,Float64} # TODO: Use or remove
-    gyration_tensor_eigvec_to_smaller_eigval::SVector{2,Float64}
-    gyration_tensor_eigvec_to_bigger_eigval::SVector{2,Float64}
-
    function Env(;
        max_distance::Float64,
        min_distance::Float64=0.0,
@ -65,9 +60,8 @@ mutable struct Env <: AbstractEnv
        n_ω_actions::Int64=3,
        max_v::Float64=40.0,
        max_ω::Float64=π / 2,
-        n_distance_states::Int64=4,
+        n_distance_states::Int64=3,
        n_direction_angle_states::Int64=3,
-        n_position_angle_states::Int64=8,
    )
        @assert min_distance >= 0.0
        @assert max_distance > min_distance
@ -77,7 +71,6 @@ mutable struct Env <: AbstractEnv
        @assert max_ω > 0
        @assert n_distance_states > 1
        @assert n_direction_angle_states > 1
-        @assert n_position_angle_states > 1

        v_action_space = range(; start=0.0, stop=max_v, length=n_v_actions)
        ω_action_space = range(; start=-max_ω, stop=max_ω, length=n_ω_actions)
@ -115,23 +108,19 @@ mutable struct Env <: AbstractEnv
        end

        direction_angle_state_space = angle_state_space(n_direction_angle_states)
-        position_angle_state_space = angle_state_space(n_position_angle_states)

-        n_states = n_distance_states * n_direction_angle_states * n_position_angle_states
+        n_states = n_distance_states * n_direction_angle_states + 1

-        state_space = Vector{SVector{3,Interval}}(undef, n_states)
+        state_space = Vector{SVector{2,Interval}}(undef, n_states - 1)

        ind = 1
        for distance_state in distance_state_space
            for direction_angle_state in direction_angle_state_space
-                for position_angle_state in position_angle_state_space
-                    state_space[ind] = SVector(
-                        distance_state, direction_angle_state, position_angle_state
-                    )
-                    ind += 1
-                end
+                state_space[ind] = SVector(distance_state, direction_angle_state)
+                ind += 1
            end
        end
+        # Last state is when no particle is in the skin radius

        state_ind_space = OneTo(n_states)

@ -141,21 +130,18 @@ mutable struct Env <: AbstractEnv
            action_ind_space,
            distance_state_space,
            direction_angle_state_space,
-            position_angle_state_space,
            n_states,
            state_space,
            state_ind_space,
            INITIAL_STATE_IND,
            INITIAL_REWARD,
            false,
-            SVector(0.0, 0.0),
        )
    end
 end

 function reset!(env::Env)
    env.state_ind = env.n_states
-    env.reward = INITIAL_REWARD
    env.terminated = false

    return nothing
@ -187,8 +173,10 @@ struct Params{H<:AbstractHook}
    goal_gyration_tensor_eigvals_ratio::Float64

    n_particles::Int64
-    half_box_len::Float64
-    max_elliptic_distance::Float64
+    max_distance::Float64
+
+    vec_to_neighbour_sums::Vector{SVector{2,Float64}}
+    n_neighbours::Vector{Int64}

    function Params(
        env::Env,
@ -197,10 +185,8 @@ struct Params{H<:AbstractHook}
        n_steps_before_actions_update::Int64,
        goal_gyration_tensor_eigvals_ratio::Float64,
        n_particles::Int64,
-        half_box_len::Float64,
+        max_distance::Float64,
    ) where {H<:AbstractHook}
-        max_elliptic_distance = sqrt(2) * half_box_len
-
        n_states = env.n_states

        return new{H}(
@ -214,23 +200,35 @@ struct Params{H<:AbstractHook}
            n_steps_before_actions_update,
            goal_gyration_tensor_eigvals_ratio,
            n_particles,
-            half_box_len,
-            max_elliptic_distance,
+            max_distance,
+            fill(SVector(0.0, 0.0), n_particles),
+            fill(0, n_particles),
        )
    end
 end

 function pre_integration_hook(rl_params::Params)
+    @simd for id in 1:(rl_params.n_particles)
+        rl_params.vec_to_neighbour_sums[id] = SVector(0.0, 0.0)
+        rl_params.n_neighbours[id] = 0
+    end
+
    return nothing
 end

 function state_update_helper_hook(
    rl_params::Params, id1::Int64, id2::Int64, r⃗₁₂::SVector{2,Float64}
 )
+    rl_params.vec_to_neighbour_sums[id1] += r⃗₁₂
+    rl_params.vec_to_neighbour_sums[id2] -= r⃗₁₂
+
+    rl_params.n_neighbours[id1] += 1
+    rl_params.n_neighbours[id2] += 1
+
    return nothing
 end

-function find_state_ind(state::S, state_space::Vector{S}) where {S<:SVector{3,Interval}}
+function find_state_ind(state::S, state_space::Vector{S}) where {S<:SVector{2,Interval}}
    return findfirst(x -> x == state, state_space)
 end

@ -249,44 +247,35 @@ function state_update_hook(rl_params::Params, particles::Vector{Particle})

    env = rl_params.env

-    env.center_of_mass = Shape.center_of_mass(particles, rl_params.half_box_len)
-
    for id in 1:(rl_params.n_particles)
-        particle = particles[id]
+        n_neighbours = rl_params.n_neighbours[id]

-        vec_to_center_of_mass = ReCo.minimum_image(
-            env.center_of_mass - particle.c, rl_params.half_box_len
-        )
+        if n_neighbours == 0
+            state_ind = env.n_states
+        else
+            vec_to_local_center_of_mass = rl_params.vec_to_neighbour_sums[id] / n_neighbours

-        distance = sqrt(vec_to_center_of_mass[1]^2 + vec_to_center_of_mass[2]^2)
+            distance = sqrt(
+                vec_to_local_center_of_mass[1]^2 + vec_to_local_center_of_mass[2]^2
+            )

-        distance_state = find_state_interval(distance, env.distance_state_space)
+            distance_state = find_state_interval(distance, env.distance_state_space)

-        si, co = sincos(particles[id].φ)
+            si, co = sincos(particles[id].φ)

-        direction_angle = angle2(SVector(co, si), vec_to_center_of_mass)
-        position_angle = atan(-vec_to_center_of_mass[2], -vec_to_center_of_mass[1])
+            direction_angle = angle2(SVector(co, si), vec_to_local_center_of_mass)

-        direction_angle_state = find_state_interval(
-            direction_angle, env.direction_angle_state_space
-        )
-        position_angle_state = find_state_interval(
-            position_angle, env.position_angle_state_space
-        )
+            direction_angle_state = find_state_interval(
+                direction_angle, env.direction_angle_state_space
+            )

-        state = SVector{3,Interval}(
-            distance_state, direction_angle_state, position_angle_state
-        )
-        state_ind = find_state_ind(state, env.state_space)
+            state = SVector{2,Interval}(distance_state, direction_angle_state)
+            state_ind = find_state_ind(state, env.state_space)
+        end

        rl_params.states_ind[id] = state_ind
    end

-    v1, v2 = Shape.gyration_tensor_eigvecs(particles, rl_params.half_box_len) # TODO: Reuse center_of_mass
-
-    env.gyration_tensor_eigvec_to_smaller_eigval = v1
-    env.gyration_tensor_eigvec_to_bigger_eigval = v2
-
    return nothing
 end

@ -295,13 +284,19 @@ function get_env_agent_hook(rl_params::Params)
 end

 function update_reward!(env::Env, rl_params::Params, particle::Particle)
-    env.reward =
-        -Shape.elliptical_distance(
-            particle,
-            env.gyration_tensor_eigvec_to_smaller_eigval,
-            env.gyration_tensor_eigvec_to_bigger_eigval,
-            rl_params.goal_gyration_tensor_eigvals_ratio,
-        ) / (rl_params.max_elliptic_distance^2 * rl_params.n_particles)
+    id = particle.id
+
+    normalization = (rl_params.max_distance * rl_params.n_particles)
+
+    n_neighbours = rl_params.n_neighbours[id]
+    if n_neighbours == 0
+        env.reward = -(rl_params.max_distance^2) / normalization
+    else
+        vec_to_local_center_of_mass = rl_params.vec_to_neighbour_sums[id] / n_neighbours # TODO: Reuse vec_to_local_center_of_mass from state_update_hook
+        env.reward =
+            -(vec_to_local_center_of_mass[1]^2 + vec_to_local_center_of_mass[2]^2) /
+            normalization
+    end

    return nothing
 end
@ -403,11 +398,12 @@ function run_rl(;
    Random.seed!(seed)

    sim_consts = ReCo.gen_sim_consts(
-        n_particles, 0.0; skin_to_interaction_r_ratio=1.5, packing_ratio=0.22
+        n_particles, 0.0; skin_to_interaction_r_ratio=2.0, packing_ratio=0.22
    )
    n_particles = sim_consts.n_particles

-    env = Env(; max_distance=sqrt(2) * sim_consts.half_box_len)
+    max_distance = sim_consts.skin_r
+    env = Env(; max_distance=max_distance)

    agent = gen_agent(env.n_states, env.n_actions, ϵ_stable)

@ -422,7 +418,7 @@ function run_rl(;
        n_steps_before_actions_update,
        goal_gyration_tensor_eigvals_ratio,
        n_particles,
-        sim_consts.half_box_len,
+        max_distance,
    )

    parent_dir = "RL" * parent_dir