diff --git a/Project.toml b/Project.toml index fa9322c..9623ca8 100644 --- a/Project.toml +++ b/Project.toml @@ -1,4 +1,4 @@ -name = "ReCo.jl" +name = "ReCo" uuid = "b25f7548-fcc9-4c91-bc24-841b54f4dd54" authors = ["MoBit "] version = "0.2.0" diff --git a/src/RL/EnvHelper.jl b/src/RL/EnvHelper.jl index a840987..9eb8d39 100644 --- a/src/RL/EnvHelper.jl +++ b/src/RL/EnvHelper.jl @@ -40,7 +40,7 @@ struct EnvHelperSharedProps{H<:AbstractHook} end end -function gen_env_helper(::Env, env_helper_params::EnvHelperSharedProps) +function gen_env_helper(::Env, env_helper_params::EnvHelperSharedProps; args) return method_not_implemented() end diff --git a/src/RL/LocalCOMEnv.jl b/src/RL/LocalCOMEnv.jl index d2d351f..80b8e5b 100644 --- a/src/RL/LocalCOMEnv.jl +++ b/src/RL/LocalCOMEnv.jl @@ -6,17 +6,15 @@ struct LocalCOMEnv <: Env distance_state_space::Vector{Interval} direction_angle_state_space::Vector{Interval} - max_distance::Float64 - - function LocalCOMEnv( - sim_consts; n_distance_states::Int64=3, n_direction_angle_states::Int64=3 + function LocalCOMEnv(; + n_distance_states::Int64=3, n_direction_angle_states::Int64=3, args ) @assert n_direction_angle_states > 1 direction_angle_state_space = gen_angle_state_space(n_direction_angle_states) min_distance = 0.0 - max_distance = sim_consts.skin_r + max_distance = args.skin_r distance_state_space = gen_distance_state_space( min_distance, max_distance, n_distance_states @@ -37,29 +35,50 @@ struct LocalCOMEnv <: Env shared = EnvSharedProps(n_states, state_space) - return new(shared, distance_state_space, direction_angle_state_space, max_distance) + return new(shared, distance_state_space, direction_angle_state_space) end end -struct LocalCOMEnvHelper <: EnvHelper +mutable struct LocalCOMEnvHelper <: EnvHelper shared::EnvHelperSharedProps vec_to_neighbour_sums::Vector{SVector{2,Float64}} n_neighbours::Vector{Int64} - sq_norm2d_vec_to_local_center_of_mass::Vector{Float64} - function LocalCOMEnvHelper(shared::EnvHelperSharedProps) + distances_to_local_center_of_mass::Vector{Float64} + max_distance_to_local_center_of_mass::Float64 + + add_shape_reward_term::Bool + + gyration_tensor_eigvec_to_smaller_eigval::SVector{2,Float64} + gyration_tensor_eigvec_to_bigger_eigval::SVector{2,Float64} + + half_box_len::Float64 + max_elliptical_distance::Float64 + + function LocalCOMEnvHelper(shared::EnvHelperSharedProps, half_box_len::Float64, skin_r) + max_elliptical_distance = + sqrt(2) * half_box_len / shared.goal_gyration_tensor_eigvals_ratio + + max_distance_to_local_center_of_mass = skin_r + return new( shared, fill(SVector(0.0, 0.0), shared.n_particles), fill(0, shared.n_particles), zeros(shared.n_particles), + max_distance_to_local_center_of_mass, + false, + SVector(0.0, 0.0), + SVector(0.0, 0.0), + half_box_len, + max_elliptical_distance, ) end end -function gen_env_helper(::LocalCOMEnv, env_helper_params::EnvHelperSharedProps) - return LocalCOMEnvHelper(env_helper_params) +function gen_env_helper(::LocalCOMEnv, env_helper_shared::EnvHelperSharedProps; args) + return LocalCOMEnvHelper(env_helper_shared, args.half_box_len, args.skin_r) end function pre_integration_hook(env_helper::LocalCOMEnvHelper) @@ -86,26 +105,31 @@ end function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Particle}) n_particles = env_helper.shared.n_particles - @turbo for id in 1:(n_particles) + @turbo for id in 1:n_particles env_helper.shared.old_states_ind[id] = env_helper.shared.states_ind[id] end env = env_helper.shared.env + distance_to_local_center_of_mass_sum = 0.0 + for id in 1:n_particles n_neighbours = env_helper.n_neighbours[id] if n_neighbours == 0 state_ind = env.shared.n_states + + distance_to_local_center_of_mass_sum += + env_helper.max_distance_to_local_center_of_mass else vec_to_local_center_of_mass = env_helper.vec_to_neighbour_sums[id] / n_neighbours - sq_norm2d_vec_to_local_center_of_mass = sq_norm2d(vec_to_local_center_of_mass) - env_helper.sq_norm2d_vec_to_local_center_of_mass[id] = - sq_norm2d_vec_to_local_center_of_mass + distance = norm2d(vec_to_local_center_of_mass) - distance = sqrt(sq_norm2d_vec_to_local_center_of_mass) + env_helper.distances_to_local_center_of_mass[id] = distance + + distance_to_local_center_of_mass_sum += distance distance_state = find_state_interval(distance, env.distance_state_space) @@ -124,20 +148,64 @@ function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Part env_helper.shared.states_ind[id] = state_ind end + mean_distance_to_local_center_of_mass = + distance_to_local_center_of_mass_sum / n_particles + env_helper.add_shape_reward_term = + mean_distance_to_local_center_of_mass / + env_helper.max_distance_to_local_center_of_mass < 0.32 + + if env_helper.add_shape_reward_term + #println(mean_distance_to_local_center_of_mass / env_helper.max_distance_to_local_center_of_mass) # TODO: Remove + end + + v1, v2 = Shape.gyration_tensor_eigvecs(particles, env_helper.half_box_len) + + env_helper.gyration_tensor_eigvec_to_smaller_eigval = v1 + env_helper.gyration_tensor_eigvec_to_bigger_eigval = v2 + return nothing end +""" + minimizing_reward(value::Float64, max_value::Float64) + +Returns the reward such that it is 0 for value=max_value and 1 for value=0. +""" +function minimizing_reward(value::Float64, max_value::Float64) + return (max_value - value) / (max_value + value) +end + function update_reward!(env::LocalCOMEnv, env_helper::LocalCOMEnvHelper, particle::Particle) id = particle.id - normalization = (env.max_distance * env_helper.shared.n_particles) - + normalization = env_helper.shared.n_particles # TODO: Add factor from steps n_neighbours = env_helper.n_neighbours[id] + if n_neighbours == 0 - env.shared.reward = -(env.max_distance^2) / normalization + env.shared.reward = 0.0 else - env.shared.reward = - -(env_helper.sq_norm2d_vec_to_local_center_of_mass[id]) / normalization # TODO: Add shape term + reward = minimizing_reward( + env_helper.distances_to_local_center_of_mass[id], + env_helper.max_distance_to_local_center_of_mass, + ) + + if env_helper.add_shape_reward_term + elliptical_distance = ReCo.elliptical_distance( + particle, + env_helper.gyration_tensor_eigvec_to_smaller_eigval, + env_helper.gyration_tensor_eigvec_to_bigger_eigval, + env_helper.shared.goal_gyration_tensor_eigvals_ratio, + ) + + reward += unnormalized_reward( + elliptical_distance, + env_helper.max_elliptical_distance, # TODO: Fix sq + ) + + # println(elliptical_distance / env_helper.max_elliptical_distance) # TODO: Remove + end + + env.shared.reward = reward / normalization end return nothing diff --git a/src/RL/RL.jl b/src/RL/RL.jl index 783c4c9..5287f8e 100644 --- a/src/RL/RL.jl +++ b/src/RL/RL.jl @@ -17,7 +17,6 @@ using ..ReCo: Particle, angle2, norm2d, - sq_norm2d, Shape, DEFAULT_SKIN_TO_INTERACTION_R_RATIO, method_not_implemented @@ -89,7 +88,9 @@ function run_rl(; ) n_particles = sim_consts.n_particles # Not always equal to the input! - env = EnvType(sim_consts) + env_args = (skin_r=sim_consts.skin_r,) + + env = EnvType(; args=env_args) agent = gen_agent(env.shared.n_states, env.shared.n_actions, ϵ_stable) @@ -97,7 +98,7 @@ function run_rl(; hook = TotalRewardPerEpisode() - env_helper_params = EnvHelperSharedProps( + env_helper_shared = EnvHelperSharedProps( env, agent, hook, @@ -106,7 +107,9 @@ function run_rl(; n_particles, ) - env_helper = gen_env_helper(env, env_helper_params) + env_helper_args = (half_box_len=sim_consts.half_box_len, skin_r=sim_consts.skin_r) + + env_helper = gen_env_helper(env, env_helper_shared; args=env_helper_args) parent_dir = "RL_" * parent_dir_appendix