Added shape reward term

2025-09-04 09:12:35 +00:00 · 2022-01-14 12:28:47 +01:00 · 2022-01-14 12:28:47 +01:00 · 891af721c0
commit 891af721c0
parent 9b233d53c4
4 changed files with 98 additions and 27 deletions
--- a/Project.toml
+++ b/Project.toml
@ -1,4 +1,4 @@
-name = "ReCo.jl"
+name = "ReCo"
 uuid = "b25f7548-fcc9-4c91-bc24-841b54f4dd54"
 authors = ["MoBit <mo8it@protonmail.com>"]
 version = "0.2.0"
--- a/src/RL/EnvHelper.jl
+++ b/src/RL/EnvHelper.jl
@ -40,7 +40,7 @@ struct EnvHelperSharedProps{H<:AbstractHook}
    end
 end
-function gen_env_helper(::Env, env_helper_params::EnvHelperSharedProps)
+function gen_env_helper(::Env, env_helper_params::EnvHelperSharedProps; args)
    return method_not_implemented()
 end
--- a/src/RL/LocalCOMEnv.jl
+++ b/src/RL/LocalCOMEnv.jl
@ -6,17 +6,15 @@ struct LocalCOMEnv <: Env
    distance_state_space::Vector{Interval}
    direction_angle_state_space::Vector{Interval}
-    max_distance::Float64
+    function LocalCOMEnv(;
-
+        n_distance_states::Int64=3, n_direction_angle_states::Int64=3, args
    function LocalCOMEnv(
        sim_consts; n_distance_states::Int64=3, n_direction_angle_states::Int64=3
    )
        @assert n_direction_angle_states > 1
        direction_angle_state_space = gen_angle_state_space(n_direction_angle_states)
        min_distance = 0.0
-        max_distance = sim_consts.skin_r
+        max_distance = args.skin_r
        distance_state_space = gen_distance_state_space(
            min_distance, max_distance, n_distance_states
@ -37,29 +35,50 @@ struct LocalCOMEnv <: Env
        shared = EnvSharedProps(n_states, state_space)
-        return new(shared, distance_state_space, direction_angle_state_space, max_distance)
+        return new(shared, distance_state_space, direction_angle_state_space)
    end
 end
-struct LocalCOMEnvHelper <: EnvHelper
+mutable struct LocalCOMEnvHelper <: EnvHelper
    shared::EnvHelperSharedProps
    vec_to_neighbour_sums::Vector{SVector{2,Float64}}
    n_neighbours::Vector{Int64}
    sq_norm2d_vec_to_local_center_of_mass::Vector{Float64}
-    function LocalCOMEnvHelper(shared::EnvHelperSharedProps)
+    distances_to_local_center_of_mass::Vector{Float64}
    max_distance_to_local_center_of_mass::Float64
    add_shape_reward_term::Bool
    gyration_tensor_eigvec_to_smaller_eigval::SVector{2,Float64}
    gyration_tensor_eigvec_to_bigger_eigval::SVector{2,Float64}
    half_box_len::Float64
    max_elliptical_distance::Float64
    function LocalCOMEnvHelper(shared::EnvHelperSharedProps, half_box_len::Float64, skin_r)
        max_elliptical_distance =
            sqrt(2) * half_box_len / shared.goal_gyration_tensor_eigvals_ratio
        max_distance_to_local_center_of_mass = skin_r
        return new(
            shared,
            fill(SVector(0.0, 0.0), shared.n_particles),
            fill(0, shared.n_particles),
            zeros(shared.n_particles),
            max_distance_to_local_center_of_mass,
            false,
            SVector(0.0, 0.0),
            SVector(0.0, 0.0),
            half_box_len,
            max_elliptical_distance,
        )
    end
 end
-function gen_env_helper(::LocalCOMEnv, env_helper_params::EnvHelperSharedProps)
+function gen_env_helper(::LocalCOMEnv, env_helper_shared::EnvHelperSharedProps; args)
-    return LocalCOMEnvHelper(env_helper_params)
+    return LocalCOMEnvHelper(env_helper_shared, args.half_box_len, args.skin_r)
 end
 function pre_integration_hook(env_helper::LocalCOMEnvHelper)
@ -86,26 +105,31 @@ end
 function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Particle})
    n_particles = env_helper.shared.n_particles
-    @turbo for id in 1:(n_particles)
+    @turbo for id in 1:n_particles
        env_helper.shared.old_states_ind[id] = env_helper.shared.states_ind[id]
    end
    env = env_helper.shared.env
    distance_to_local_center_of_mass_sum = 0.0
    for id in 1:n_particles
        n_neighbours = env_helper.n_neighbours[id]
        if n_neighbours == 0
            state_ind = env.shared.n_states
            distance_to_local_center_of_mass_sum +=
                env_helper.max_distance_to_local_center_of_mass
        else
            vec_to_local_center_of_mass =
                env_helper.vec_to_neighbour_sums[id] / n_neighbours
-            sq_norm2d_vec_to_local_center_of_mass = sq_norm2d(vec_to_local_center_of_mass)
+            distance = norm2d(vec_to_local_center_of_mass)
            env_helper.sq_norm2d_vec_to_local_center_of_mass[id] =
                sq_norm2d_vec_to_local_center_of_mass
-            distance = sqrt(sq_norm2d_vec_to_local_center_of_mass)
+            env_helper.distances_to_local_center_of_mass[id] = distance
            distance_to_local_center_of_mass_sum += distance
            distance_state = find_state_interval(distance, env.distance_state_space)
@ -124,20 +148,64 @@ function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Part
        env_helper.shared.states_ind[id] = state_ind
    end
    mean_distance_to_local_center_of_mass =
        distance_to_local_center_of_mass_sum / n_particles
    env_helper.add_shape_reward_term =
        mean_distance_to_local_center_of_mass /
        env_helper.max_distance_to_local_center_of_mass < 0.32
    if env_helper.add_shape_reward_term
        #println(mean_distance_to_local_center_of_mass / env_helper.max_distance_to_local_center_of_mass) # TODO: Remove
    end
    v1, v2 = Shape.gyration_tensor_eigvecs(particles, env_helper.half_box_len)
    env_helper.gyration_tensor_eigvec_to_smaller_eigval = v1
    env_helper.gyration_tensor_eigvec_to_bigger_eigval = v2
    return nothing
 end
 """
    minimizing_reward(value::Float64, max_value::Float64)
 Returns the reward such that it is 0 for value=max_value and 1 for value=0.
 """
 function minimizing_reward(value::Float64, max_value::Float64)
    return (max_value - value) / (max_value + value)
 end
 function update_reward!(env::LocalCOMEnv, env_helper::LocalCOMEnvHelper, particle::Particle)
    id = particle.id
-    normalization = (env.max_distance * env_helper.shared.n_particles)
+    normalization = env_helper.shared.n_particles # TODO: Add factor from steps
    n_neighbours = env_helper.n_neighbours[id]
    if n_neighbours == 0
-        env.shared.reward = -(env.max_distance^2) / normalization
+        env.shared.reward = 0.0
    else
-        env.shared.reward =
+        reward = minimizing_reward(
-            -(env_helper.sq_norm2d_vec_to_local_center_of_mass[id]) / normalization # TODO: Add shape term
+            env_helper.distances_to_local_center_of_mass[id],
            env_helper.max_distance_to_local_center_of_mass,
        )
        if env_helper.add_shape_reward_term
            elliptical_distance = ReCo.elliptical_distance(
                particle,
                env_helper.gyration_tensor_eigvec_to_smaller_eigval,
                env_helper.gyration_tensor_eigvec_to_bigger_eigval,
                env_helper.shared.goal_gyration_tensor_eigvals_ratio,
            )
            reward += unnormalized_reward(
                elliptical_distance,
                env_helper.max_elliptical_distance, # TODO: Fix sq
            )
            # println(elliptical_distance / env_helper.max_elliptical_distance) # TODO: Remove
        end
        env.shared.reward = reward / normalization
    end
    return nothing
--- a/src/RL/RL.jl
+++ b/src/RL/RL.jl
@ -17,7 +17,6 @@ using ..ReCo:
    Particle,
    angle2,
    norm2d,
    sq_norm2d,
    Shape,
    DEFAULT_SKIN_TO_INTERACTION_R_RATIO,
    method_not_implemented
@ -89,7 +88,9 @@ function run_rl(;
    )
    n_particles = sim_consts.n_particles # Not always equal to the input!
-    env = EnvType(sim_consts)
+    env_args = (skin_r=sim_consts.skin_r,)
    env = EnvType(; args=env_args)
    agent = gen_agent(env.shared.n_states, env.shared.n_actions, ϵ_stable)
@ -97,7 +98,7 @@ function run_rl(;
    hook = TotalRewardPerEpisode()
-    env_helper_params = EnvHelperSharedProps(
+    env_helper_shared = EnvHelperSharedProps(
        env,
        agent,
        hook,
@ -106,7 +107,9 @@ function run_rl(;
        n_particles,
    )
-    env_helper = gen_env_helper(env, env_helper_params)
+    env_helper_args = (half_box_len=sim_consts.half_box_len, skin_r=sim_consts.skin_r)
    env_helper = gen_env_helper(env, env_helper_shared; args=env_helper_args)
    parent_dir = "RL_" * parent_dir_appendix