back to local center of mass with reward as distance to it

2025-09-04 09:12:35 +00:00 · 2022-01-08 22:44:20 +01:00 · 2022-01-08 22:44:20 +01:00 · 9c00da84ea
commit 9c00da84ea
parent 275b69c928
1 changed files with 61 additions and 65 deletions
--- a/src/RL.jl
+++ b/src/RL.jl
@ -44,20 +44,15 @@ mutable struct Env <: AbstractEnv
    distance_state_space::Vector{Interval}
    direction_angle_state_space::Vector{Interval}
    position_angle_state_space::Vector{Interval}
    n_states::Int64
-    state_space::Vector{SVector{3,Interval}}
+    state_space::Vector{SVector{2,Interval}}
    state_ind_space::OneTo{Int64}
    state_ind::Int64
    reward::Float64
    terminated::Bool
    center_of_mass::SVector{2,Float64} # TODO: Use or remove
    gyration_tensor_eigvec_to_smaller_eigval::SVector{2,Float64}
    gyration_tensor_eigvec_to_bigger_eigval::SVector{2,Float64}
    function Env(;
        max_distance::Float64,
        min_distance::Float64=0.0,
@ -65,9 +60,8 @@ mutable struct Env <: AbstractEnv
        n_ω_actions::Int64=3,
        max_v::Float64=40.0,
        max_ω::Float64=π / 2,
-        n_distance_states::Int64=4,
+        n_distance_states::Int64=3,
        n_direction_angle_states::Int64=3,
        n_position_angle_states::Int64=8,
    )
        @assert min_distance >= 0.0
        @assert max_distance > min_distance
@ -77,7 +71,6 @@ mutable struct Env <: AbstractEnv
        @assert max_ω > 0
        @assert n_distance_states > 1
        @assert n_direction_angle_states > 1
        @assert n_position_angle_states > 1
        v_action_space = range(; start=0.0, stop=max_v, length=n_v_actions)
        ω_action_space = range(; start=-max_ω, stop=max_ω, length=n_ω_actions)
@ -115,23 +108,19 @@ mutable struct Env <: AbstractEnv
        end
        direction_angle_state_space = angle_state_space(n_direction_angle_states)
        position_angle_state_space = angle_state_space(n_position_angle_states)
-        n_states = n_distance_states * n_direction_angle_states * n_position_angle_states
+        n_states = n_distance_states * n_direction_angle_states + 1
-        state_space = Vector{SVector{3,Interval}}(undef, n_states)
+        state_space = Vector{SVector{2,Interval}}(undef, n_states - 1)
        ind = 1
        for distance_state in distance_state_space
            for direction_angle_state in direction_angle_state_space
-                for position_angle_state in position_angle_state_space
+                state_space[ind] = SVector(distance_state, direction_angle_state)
                    state_space[ind] = SVector(
                        distance_state, direction_angle_state, position_angle_state
                    )
                ind += 1
            end
        end
-        end
+        # Last state is when no particle is in the skin radius
        state_ind_space = OneTo(n_states)
@ -141,21 +130,18 @@ mutable struct Env <: AbstractEnv
            action_ind_space,
            distance_state_space,
            direction_angle_state_space,
            position_angle_state_space,
            n_states,
            state_space,
            state_ind_space,
            INITIAL_STATE_IND,
            INITIAL_REWARD,
            false,
            SVector(0.0, 0.0),
        )
    end
 end
 function reset!(env::Env)
    env.state_ind = env.n_states
    env.reward = INITIAL_REWARD
    env.terminated = false
    return nothing
@ -187,8 +173,10 @@ struct Params{H<:AbstractHook}
    goal_gyration_tensor_eigvals_ratio::Float64
    n_particles::Int64
-    half_box_len::Float64
+    max_distance::Float64
-    max_elliptic_distance::Float64
+
    vec_to_neighbour_sums::Vector{SVector{2,Float64}}
    n_neighbours::Vector{Int64}
    function Params(
        env::Env,
@ -197,10 +185,8 @@ struct Params{H<:AbstractHook}
        n_steps_before_actions_update::Int64,
        goal_gyration_tensor_eigvals_ratio::Float64,
        n_particles::Int64,
-        half_box_len::Float64,
+        max_distance::Float64,
    ) where {H<:AbstractHook}
        max_elliptic_distance = sqrt(2) * half_box_len
        n_states = env.n_states
        return new{H}(
@ -214,23 +200,35 @@ struct Params{H<:AbstractHook}
            n_steps_before_actions_update,
            goal_gyration_tensor_eigvals_ratio,
            n_particles,
-            half_box_len,
+            max_distance,
-            max_elliptic_distance,
+            fill(SVector(0.0, 0.0), n_particles),
            fill(0, n_particles),
        )
    end
 end
 function pre_integration_hook(rl_params::Params)
    @simd for id in 1:(rl_params.n_particles)
        rl_params.vec_to_neighbour_sums[id] = SVector(0.0, 0.0)
        rl_params.n_neighbours[id] = 0
    end
    return nothing
 end
 function state_update_helper_hook(
    rl_params::Params, id1::Int64, id2::Int64, r⃗₁₂::SVector{2,Float64}
 )
    rl_params.vec_to_neighbour_sums[id1] += r⃗₁₂
    rl_params.vec_to_neighbour_sums[id2] -= r⃗₁₂
    rl_params.n_neighbours[id1] += 1
    rl_params.n_neighbours[id2] += 1
    return nothing
 end
-function find_state_ind(state::S, state_space::Vector{S}) where {S<:SVector{3,Interval}}
+function find_state_ind(state::S, state_space::Vector{S}) where {S<:SVector{2,Interval}}
    return findfirst(x -> x == state, state_space)
 end
@ -249,44 +247,35 @@ function state_update_hook(rl_params::Params, particles::Vector{Particle})
    env = rl_params.env
    env.center_of_mass = Shape.center_of_mass(particles, rl_params.half_box_len)
    for id in 1:(rl_params.n_particles)
-        particle = particles[id]
+        n_neighbours = rl_params.n_neighbours[id]
-        vec_to_center_of_mass = ReCo.minimum_image(
+        if n_neighbours == 0
-            env.center_of_mass - particle.c, rl_params.half_box_len
+            state_ind = env.n_states
        else
            vec_to_local_center_of_mass = rl_params.vec_to_neighbour_sums[id] / n_neighbours
            distance = sqrt(
                vec_to_local_center_of_mass[1]^2 + vec_to_local_center_of_mass[2]^2
            )
        distance = sqrt(vec_to_center_of_mass[1]^2 + vec_to_center_of_mass[2]^2)
            distance_state = find_state_interval(distance, env.distance_state_space)
            si, co = sincos(particles[id].φ)
-        direction_angle = angle2(SVector(co, si), vec_to_center_of_mass)
+            direction_angle = angle2(SVector(co, si), vec_to_local_center_of_mass)
        position_angle = atan(-vec_to_center_of_mass[2], -vec_to_center_of_mass[1])
            direction_angle_state = find_state_interval(
                direction_angle, env.direction_angle_state_space
            )
        position_angle_state = find_state_interval(
            position_angle, env.position_angle_state_space
        )
-        state = SVector{3,Interval}(
+            state = SVector{2,Interval}(distance_state, direction_angle_state)
            distance_state, direction_angle_state, position_angle_state
        )
            state_ind = find_state_ind(state, env.state_space)
        end
        rl_params.states_ind[id] = state_ind
    end
    v1, v2 = Shape.gyration_tensor_eigvecs(particles, rl_params.half_box_len) # TODO: Reuse center_of_mass
    env.gyration_tensor_eigvec_to_smaller_eigval = v1
    env.gyration_tensor_eigvec_to_bigger_eigval = v2
    return nothing
 end
@ -295,13 +284,19 @@ function get_env_agent_hook(rl_params::Params)
 end
 function update_reward!(env::Env, rl_params::Params, particle::Particle)
    id = particle.id
    normalization = (rl_params.max_distance * rl_params.n_particles)
    n_neighbours = rl_params.n_neighbours[id]
    if n_neighbours == 0
        env.reward = -(rl_params.max_distance^2) / normalization
    else
        vec_to_local_center_of_mass = rl_params.vec_to_neighbour_sums[id] / n_neighbours # TODO: Reuse vec_to_local_center_of_mass from state_update_hook
        env.reward =
-        -Shape.elliptical_distance(
+            -(vec_to_local_center_of_mass[1]^2 + vec_to_local_center_of_mass[2]^2) /
-            particle,
+            normalization
-            env.gyration_tensor_eigvec_to_smaller_eigval,
+    end
            env.gyration_tensor_eigvec_to_bigger_eigval,
            rl_params.goal_gyration_tensor_eigvals_ratio,
        ) / (rl_params.max_elliptic_distance^2 * rl_params.n_particles)
    return nothing
 end
@ -403,11 +398,12 @@ function run_rl(;
    Random.seed!(seed)
    sim_consts = ReCo.gen_sim_consts(
-        n_particles, 0.0; skin_to_interaction_r_ratio=1.5, packing_ratio=0.22
+        n_particles, 0.0; skin_to_interaction_r_ratio=2.0, packing_ratio=0.22
    )
    n_particles = sim_consts.n_particles
-    env = Env(; max_distance=sqrt(2) * sim_consts.half_box_len)
+    max_distance = sim_consts.skin_r
    env = Env(; max_distance=max_distance)
    agent = gen_agent(env.n_states, env.n_actions, ϵ_stable)
@ -422,7 +418,7 @@ function run_rl(;
        n_steps_before_actions_update,
        goal_gyration_tensor_eigvals_ratio,
        n_particles,
-        sim_consts.half_box_len,
+        max_distance,
    )
    parent_dir = "RL" * parent_dir