Added shape reward term

2025-09-04 09:12:35 +00:00 · 2022-01-14 12:28:47 +01:00 · 2022-01-14 12:28:47 +01:00 · 891af721c0
commit 891af721c0
parent 9b233d53c4
4 changed files with 98 additions and 27 deletions
--- a/Project.toml
+++ b/Project.toml
@ -1,4 +1,4 @@
-name = "ReCo.jl"
+name = "ReCo"
 uuid = "b25f7548-fcc9-4c91-bc24-841b54f4dd54"
 authors = ["MoBit <mo8it@protonmail.com>"]
 version = "0.2.0"
--- a/src/RL/EnvHelper.jl
+++ b/src/RL/EnvHelper.jl
@ -40,7 +40,7 @@ struct EnvHelperSharedProps{H<:AbstractHook}
    end
 end

-function gen_env_helper(::Env, env_helper_params::EnvHelperSharedProps)
+function gen_env_helper(::Env, env_helper_params::EnvHelperSharedProps; args)
    return method_not_implemented()
 end

--- a/src/RL/LocalCOMEnv.jl
+++ b/src/RL/LocalCOMEnv.jl
@ -6,17 +6,15 @@ struct LocalCOMEnv <: Env
    distance_state_space::Vector{Interval}
    direction_angle_state_space::Vector{Interval}

-    max_distance::Float64
-
-    function LocalCOMEnv(
-        sim_consts; n_distance_states::Int64=3, n_direction_angle_states::Int64=3
+    function LocalCOMEnv(;
+        n_distance_states::Int64=3, n_direction_angle_states::Int64=3, args
    )
        @assert n_direction_angle_states > 1

        direction_angle_state_space = gen_angle_state_space(n_direction_angle_states)

        min_distance = 0.0
-        max_distance = sim_consts.skin_r
+        max_distance = args.skin_r

        distance_state_space = gen_distance_state_space(
            min_distance, max_distance, n_distance_states
@ -37,29 +35,50 @@ struct LocalCOMEnv <: Env

        shared = EnvSharedProps(n_states, state_space)

-        return new(shared, distance_state_space, direction_angle_state_space, max_distance)
+        return new(shared, distance_state_space, direction_angle_state_space)
    end
 end

-struct LocalCOMEnvHelper <: EnvHelper
+mutable struct LocalCOMEnvHelper <: EnvHelper
    shared::EnvHelperSharedProps

    vec_to_neighbour_sums::Vector{SVector{2,Float64}}
    n_neighbours::Vector{Int64}
-    sq_norm2d_vec_to_local_center_of_mass::Vector{Float64}

-    function LocalCOMEnvHelper(shared::EnvHelperSharedProps)
+    distances_to_local_center_of_mass::Vector{Float64}
+    max_distance_to_local_center_of_mass::Float64
+
+    add_shape_reward_term::Bool
+
+    gyration_tensor_eigvec_to_smaller_eigval::SVector{2,Float64}
+    gyration_tensor_eigvec_to_bigger_eigval::SVector{2,Float64}
+
+    half_box_len::Float64
+    max_elliptical_distance::Float64
+
+    function LocalCOMEnvHelper(shared::EnvHelperSharedProps, half_box_len::Float64, skin_r)
+        max_elliptical_distance =
+            sqrt(2) * half_box_len / shared.goal_gyration_tensor_eigvals_ratio
+
+        max_distance_to_local_center_of_mass = skin_r
+
        return new(
            shared,
            fill(SVector(0.0, 0.0), shared.n_particles),
            fill(0, shared.n_particles),
            zeros(shared.n_particles),
+            max_distance_to_local_center_of_mass,
+            false,
+            SVector(0.0, 0.0),
+            SVector(0.0, 0.0),
+            half_box_len,
+            max_elliptical_distance,
        )
    end
 end

-function gen_env_helper(::LocalCOMEnv, env_helper_params::EnvHelperSharedProps)
-    return LocalCOMEnvHelper(env_helper_params)
+function gen_env_helper(::LocalCOMEnv, env_helper_shared::EnvHelperSharedProps; args)
+    return LocalCOMEnvHelper(env_helper_shared, args.half_box_len, args.skin_r)
 end

 function pre_integration_hook(env_helper::LocalCOMEnvHelper)
@ -86,26 +105,31 @@ end
 function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Particle})
    n_particles = env_helper.shared.n_particles

-    @turbo for id in 1:(n_particles)
+    @turbo for id in 1:n_particles
        env_helper.shared.old_states_ind[id] = env_helper.shared.states_ind[id]
    end

    env = env_helper.shared.env

+    distance_to_local_center_of_mass_sum = 0.0
+
    for id in 1:n_particles
        n_neighbours = env_helper.n_neighbours[id]

        if n_neighbours == 0
            state_ind = env.shared.n_states
+
+            distance_to_local_center_of_mass_sum +=
+                env_helper.max_distance_to_local_center_of_mass
        else
            vec_to_local_center_of_mass =
                env_helper.vec_to_neighbour_sums[id] / n_neighbours

-            sq_norm2d_vec_to_local_center_of_mass = sq_norm2d(vec_to_local_center_of_mass)
-            env_helper.sq_norm2d_vec_to_local_center_of_mass[id] =
-                sq_norm2d_vec_to_local_center_of_mass
+            distance = norm2d(vec_to_local_center_of_mass)

-            distance = sqrt(sq_norm2d_vec_to_local_center_of_mass)
+            env_helper.distances_to_local_center_of_mass[id] = distance
+
+            distance_to_local_center_of_mass_sum += distance

            distance_state = find_state_interval(distance, env.distance_state_space)

@ -124,20 +148,64 @@ function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Part
        env_helper.shared.states_ind[id] = state_ind
    end

+    mean_distance_to_local_center_of_mass =
+        distance_to_local_center_of_mass_sum / n_particles
+    env_helper.add_shape_reward_term =
+        mean_distance_to_local_center_of_mass /
+        env_helper.max_distance_to_local_center_of_mass < 0.32
+
+    if env_helper.add_shape_reward_term
+        #println(mean_distance_to_local_center_of_mass / env_helper.max_distance_to_local_center_of_mass) # TODO: Remove
+    end
+
+    v1, v2 = Shape.gyration_tensor_eigvecs(particles, env_helper.half_box_len)
+
+    env_helper.gyration_tensor_eigvec_to_smaller_eigval = v1
+    env_helper.gyration_tensor_eigvec_to_bigger_eigval = v2
+
    return nothing
 end

+"""
+    minimizing_reward(value::Float64, max_value::Float64)
+
+Returns the reward such that it is 0 for value=max_value and 1 for value=0.
+"""
+function minimizing_reward(value::Float64, max_value::Float64)
+    return (max_value - value) / (max_value + value)
+end
+
 function update_reward!(env::LocalCOMEnv, env_helper::LocalCOMEnvHelper, particle::Particle)
    id = particle.id

-    normalization = (env.max_distance * env_helper.shared.n_particles)
-
+    normalization = env_helper.shared.n_particles # TODO: Add factor from steps
    n_neighbours = env_helper.n_neighbours[id]
+
    if n_neighbours == 0
-        env.shared.reward = -(env.max_distance^2) / normalization
+        env.shared.reward = 0.0
    else
-        env.shared.reward =
-            -(env_helper.sq_norm2d_vec_to_local_center_of_mass[id]) / normalization # TODO: Add shape term
+        reward = minimizing_reward(
+            env_helper.distances_to_local_center_of_mass[id],
+            env_helper.max_distance_to_local_center_of_mass,
+        )
+
+        if env_helper.add_shape_reward_term
+            elliptical_distance = ReCo.elliptical_distance(
+                particle,
+                env_helper.gyration_tensor_eigvec_to_smaller_eigval,
+                env_helper.gyration_tensor_eigvec_to_bigger_eigval,
+                env_helper.shared.goal_gyration_tensor_eigvals_ratio,
+            )
+
+            reward += unnormalized_reward(
+                elliptical_distance,
+                env_helper.max_elliptical_distance, # TODO: Fix sq
+            )
+
+            # println(elliptical_distance / env_helper.max_elliptical_distance) # TODO: Remove
+        end
+
+        env.shared.reward = reward / normalization
    end

    return nothing
--- a/src/RL/RL.jl
+++ b/src/RL/RL.jl
@ -17,7 +17,6 @@ using ..ReCo:
    Particle,
    angle2,
    norm2d,
-    sq_norm2d,
    Shape,
    DEFAULT_SKIN_TO_INTERACTION_R_RATIO,
    method_not_implemented
@ -89,7 +88,9 @@ function run_rl(;
    )
    n_particles = sim_consts.n_particles # Not always equal to the input!

-    env = EnvType(sim_consts)
+    env_args = (skin_r=sim_consts.skin_r,)
+
+    env = EnvType(; args=env_args)

    agent = gen_agent(env.shared.n_states, env.shared.n_actions, ϵ_stable)

@ -97,7 +98,7 @@ function run_rl(;

    hook = TotalRewardPerEpisode()

-    env_helper_params = EnvHelperSharedProps(
+    env_helper_shared = EnvHelperSharedProps(
        env,
        agent,
        hook,
@ -106,7 +107,9 @@ function run_rl(;
        n_particles,
    )

-    env_helper = gen_env_helper(env, env_helper_params)
+    env_helper_args = (half_box_len=sim_consts.half_box_len, skin_r=sim_consts.skin_r)
+
+    env_helper = gen_env_helper(env, env_helper_shared; args=env_helper_args)

    parent_dir = "RL_" * parent_dir_appendix