diff --git a/src/RL/Env.jl b/src/RL/Env.jl index cf5d2d9..7a874ca 100644 --- a/src/RL/Env.jl +++ b/src/RL/Env.jl @@ -10,12 +10,16 @@ mutable struct EnvSharedProps{n_state_dims} state_id_space::OneTo{Int64} state_id::Int64 + action_spaces_labels::Vector{Vector{LaTeXStrings.LaTeXString}} + state_spaces_labels::Vector{Vector{LaTeXStrings.LaTeXString}} + reward::Float64 terminated::Bool function EnvSharedProps( n_states::Int64, # Can be different from the sum of state_id_tensor_dims - state_id_tensor_dims::NTuple{n_state_dims,Int64}; + state_id_tensor_dims::NTuple{n_state_dims,Int64}, + state_spaces_labels::Vector{Vector{LaTeXStrings.LaTeXString}}; n_v_actions::Int64=2, n_ω_actions::Int64=3, max_v::Float64=40.0, @@ -29,6 +33,10 @@ mutable struct EnvSharedProps{n_state_dims} v_action_space = range(; start=0.0, stop=max_v, length=n_v_actions) ω_action_space = range(; start=-max_ω, stop=max_ω, length=n_ω_actions) + action_spaces_labels = gen_action_spaces_labels( + ("v", "\\omega"), (v_action_space, ω_action_space) + ) + n_actions = n_v_actions * n_ω_actions action_space = Vector{SVector{2,Float64}}(undef, n_actions) @@ -61,6 +69,8 @@ mutable struct EnvSharedProps{n_state_dims} state_id_tensor, state_id_space, INITIAL_STATE_IND, + action_spaces_labels, + state_spaces_labels, INITIAL_REWARD, false, ) @@ -91,4 +101,21 @@ end function RLBase.is_terminated(env::Env) return env.shared.terminated +end + +function gen_action_space_labels(action_label::String, action_space::AbstractRange) + labels = Vector{LaTeXStrings.LaTeXString}(undef, length(action_space)) + for (action_ind, action) in enumerate(action_space) + labels[action_ind] = LaTeXStrings.latexstring( + action_label * " = $(round(action; digits=2))" + ) + end + + return labels +end + +function gen_action_spaces_labels( + actions_labels::NTuple{N,String}, action_spaces::NTuple{N,AbstractRange} +) where {N} + return [gen_action_space_labels(actions_labels[i], action_spaces[i]) for i in 1:N] end \ No newline at end of file diff --git a/src/RL/Envs/OriginEnv.jl b/src/RL/Envs/OriginEnv.jl index 272a3fe..4a12e72 100644 --- a/src/RL/Envs/OriginEnv.jl +++ b/src/RL/Envs/OriginEnv.jl @@ -9,7 +9,7 @@ struct OriginEnv <: Env direction_angle_state_space::Vector{Interval} function OriginEnv(; - n_distance_states::Int64=4, n_direction_angle_states::Int64=3, args + n_distance_states::Int64=3, n_direction_angle_states::Int64=3, args ) @assert n_distance_states > 1 @assert n_direction_angle_states > 1 @@ -25,7 +25,13 @@ struct OriginEnv <: Env n_states = n_distance_states * n_direction_angle_states - shared = EnvSharedProps(n_states, (n_distance_states, n_direction_angle_states)) + state_spaces_labels = gen_state_spaces_labels( + ("d", "\\theta"), (distance_state_space, direction_angle_state_space) + ) + + shared = EnvSharedProps( + n_states, (n_distance_states, n_direction_angle_states), state_spaces_labels + ) return new(shared, distance_state_space, direction_angle_state_space) end diff --git a/src/RL/RL.jl b/src/RL/RL.jl index d72b284..fe1aea7 100644 --- a/src/RL/RL.jl +++ b/src/RL/RL.jl @@ -11,6 +11,8 @@ using StaticArrays: SVector using LoopVectorization: @turbo using Random: Random using ProgressMeter: ProgressMeter +using JLD2: JLD2 +using LaTeXStrings: LaTeXStrings, @L_str using ..ReCo: ReCo @@ -54,7 +56,7 @@ end function run_rl(; EnvType::Type{E}, - parent_dir_appendix::String, + process_dir::String, elliptical_a_b_ratio::Float64=1.0, n_episodes::Int64=200, episode_duration::Float64=50.0, @@ -85,7 +87,6 @@ function run_rl(; n_particles = sim_consts.n_particles # Not always equal to the input! env_args = (skin_radius=sim_consts.skin_radius, half_box_len=sim_consts.half_box_len) - env = EnvType(; args=env_args) agent = gen_agent(env.shared.n_states, env.shared.n_actions, ϵ_stable) @@ -104,7 +105,7 @@ function run_rl(; env_helper = gen_env_helper(env, env_helper_shared; args=env_helper_args) - parent_dir = "RL_" * parent_dir_appendix + parent_dir = "RL/" * process_dir # Pre experiment hook(PRE_EXPERIMENT_STAGE, agent, env) @@ -137,9 +138,31 @@ function run_rl(; # Post experiment hook(POST_EXPERIMENT_STAGE, agent, env) + process_dir = ReCo.DEFAULT_EXPORTS_DIR * "/$parent_dir" + + JLD2.save_object("$process_dir/env_helper.jld2", env_helper) + return env_helper end +function gen_state_space_labels(state_label::String, state_space::Vector{Interval}) + labels = Vector{LaTeXStrings.LaTeXString}(undef, length(state_space)) + for (state_ind, state) in enumerate(state_space) + labels[state_ind] = LaTeXStrings.latexstring( + state_label * + " = $(round(state.first; digits=2)):$(round(state.last, digits=2))", + ) + end + + return labels +end + +function gen_state_spaces_labels( + states_labels::NTuple{N,String}, state_spaces::NTuple{N,Vector{Interval}} +) where {N} + return [gen_state_space_labels(states_labels[i], state_spaces[i]) for i in 1:N] +end + include("Envs/LocalCOMWithAdditionalShapeRewardEnv.jl") include("Envs/OriginEnv.jl") diff --git a/src/setup.jl b/src/setup.jl index a15f755..82c84ce 100644 --- a/src/setup.jl +++ b/src/setup.jl @@ -1,6 +1,6 @@ const DEFAULT_PACKING_RATIO = 0.5 const DEFAULT_δt = 1e-5 -const DEFAULT_SKIN_TO_INTERACTION_R_RATIO = 2.0 +const DEFAULT_SKIN_TO_INTERACTION_R_RATIO = 2.5 const DEFAULT_EXPORTS_DIR = "exports" const DEFAULT_PARENT_DIR = "" const DEFAULT_COMMENT = "" @@ -69,7 +69,7 @@ function gen_sim_consts( skin_radius = skin_to_interaction_radius_ratio * interaction_radius - buffer = 2.5 + buffer = 3 max_approach_after_one_integration_step = buffer * (2 * v₀ * δt) @assert skin_radius >= interaction_radius + max_approach_after_one_integration_step