Add RL documentation

2024-12-21 00:51:21 +00:00 · 2022-02-07 17:50:57 +01:00 · 2022-02-07 17:50:57 +01:00 · 0325ee36f9
commit 0325ee36f9
parent 98b2d673f7
5 changed files with 74 additions and 26 deletions
--- a/README.adoc
+++ b/README.adoc
@ -48,9 +48,7 @@ Import the package:
 using ReCo
 ----
-It might take a moment to precompile.
+Initialize a simulation with 100 particles having a self-propulsion velocity of 40.0 and return the relative path to the simulation directory:
 Initialize the simulation with 100 particles having a self-propulsion velocity of 40.0 and return the relative path to the simulation directory:
 [source, julia]
 ----
@ -67,7 +65,22 @@ run_sim(sim_dir, duration=20.0)
 The values for the number of particles, self-propulsion velocity and simulation duration are used here as an example. For more information about possible values and other optional arguments, press `?` in the REPL after running `using ReCo`. Then type `init_sim` or `run_sim` followed by pressing enter. This will show the method's documention.
 == Run reinforcement learning
-// TODO
+Import the package:
 [source, julia]
 ----
 using ReCo
 ----
 Run a reinforcement learning process and return the environment helper:
 [source, julia]
 ----
 env_helper = run_rl(ENVTYPE)
 ----
 ENVTYPE has to be replaced by one of the environments named after the file names in the directory `ReCo.jl/RL/Envs`, for example: `LocalCOMEnv`. A description of an environment is included at the beginning of the corresponding file.
 For more information about all possible optional arguments, press `?` in the REPL after running `using ReCo`. Then type `run_rl` followed by pressing enter.
 == Run analysis
--- a/src/RL/Envs/COMCompassEnv.jl
+++ b/src/RL/Envs/COMCompassEnv.jl
--- a/src/RL/Envs/OriginCompassEnv.jl
+++ b/src/RL/Envs/OriginCompassEnv.jl
--- a/src/RL/RL.jl
+++ b/src/RL/RL.jl
@ -26,6 +26,19 @@ using ..ReCo: ReCo
 const INITIAL_STATE_IND = 1
 const INITIAL_REWARD = 0.0
 const DEFAULT_ELLIPTICAL_b_a_RATIO = 1.0
 const DEFAULT_N_EPISODES = 200
 const DEFAULT_EPISODE_DURATION = 20.0
 const DEFAULT_UPDATE_ACTIONS_AT = 0.1
 const DEFAULT_N_PARTICLES = 100
 const DEFAULT_SEED = 42
 const DEFAULT_ϵ_STABLE = 0.00001
 const DEFAULT_PACKING_FRACTION = 0.15
 const DEFAULT_SHOW_PROGRESS = true
 const DEFAULT_SHOW_SIMULATION_PROGRESS = true
 const DEFAULT_REWARD_DISCOUNT = 0.1
 const DEFAULT_N_EPISODES_BEFORE_ENV_HELPER_SAVING = 10
 include("Env.jl")
 include("EnvHelper.jl")
@ -63,23 +76,45 @@ function gen_agent(
    return Agent(; policy=policy, trajectory=trajectory)
 end
-function run_rl(;
+"""
-    EnvType::Type{E},
+    run_rl(EnvType; <keyword arguments>)
-    process_dir::String,
+
-    elliptical_b_a_ratio::Float64=1.0,
+Run a reinforcement learning process and return the environment helper.
-    n_episodes::Int64=200,
+
-    episode_duration::Float64=50.0,
+# Arguments
-    update_actions_at::Float64=0.1,
+- `EnvType::Type{<:Env}`: Environment type. It has to be one of the environments named after the file names in the directory `ReCo.jl/RL/Envs`, for example: `LocalCOMEnv`. A description of an environment is included at the beginning of the corresponding file.
-    n_particles::Int64=100,
+- `process_dir::String=string(EnvType)`: Path to the reinforcement learning process directory relative to `ReCo.jl/exports/RL`.
-    seed::Int64=42,
+- `elliptical_b_a_ratio::Float64=$DEFAULT_ELLIPTICAL_b_a_RATIO`: Goal elliptical b to a ratio.
-    ϵ_stable::Float64=0.00001,
+- `n_episodes::Int64=$DEFAULT_N_EPISODES`: Number of episodes.
 - `episode_duration::Float64=$DEFAULT_EPISODE_DURATION`: Duration of each episode.
 - `update_actions_at::Float64=$DEFAULT_UPDATE_ACTIONS_AT`: Actions update interval. It can be described as the reinforcement learning step duration in an episode.
 - `n_particles::Int64=$DEFAULT_N_PARTICLES`: Number of particles.
 - `seed::Int64=$DEFAULT_SEED`: Random number generator seed.
 - `ϵ_stable::Float64=$DEFAULT_ϵ_STABLE`: Stable value of ϵ after its decay. ϵ is the one from the epsilon greedy method for exploration and exploitation.
 - `skin_to_interaction_radius_ratio::Float64=$(ReCo.DEFAULT_SKIN_TO_INTERACTION_RADIUS_RATIO)`: Ratio of the skin radius to the interaction radius of a particle.
 - `packing_fraction::Float64=$DEFAULT_PACKING_FRACTION`: Particles packing fraction.
 - `show_progress::Bool=$DEFAULT_SHOW_PROGRESS`: Show a progress bar for the reinforcement learning process.
 - `show_simulation_progress::Bool=$DEFAULT_SHOW_SIMULATION_PROGRESS`: Show a progress bar for each episode as a simulation.
 - `reward_discount::Float64=$DEFAULT_REWARD_DISCOUNT`: Reward discount.
 - `n_episodes_before_env_helper_saving::Int64=$DEFAULT_N_EPISODES_BEFORE_ENV_HELPER_SAVING`: Number of episodes before saving the environment helper for the case that the process is stopped while still running.
 """
 function run_rl(
    EnvType::Type{<:Env};
    process_dir::String=string(EnvType),
    elliptical_b_a_ratio::Float64=DEFAULT_ELLIPTICAL_b_a_RATIO,
    n_episodes::Int64=DEFAULT_N_EPISODES,
    episode_duration::Float64=DEFAULT_EPISODE_DURATION,
    update_actions_at::Float64=DEFAULT_UPDATE_ACTIONS_AT,
    n_particles::Int64=DEFAULT_N_PARTICLES,
    seed::Int64=DEFAULT_SEED,
    ϵ_stable::Float64=DEFAULT_ϵ_STABLE,
    skin_to_interaction_radius_ratio::Float64=ReCo.DEFAULT_SKIN_TO_INTERACTION_RADIUS_RATIO,
-    packing_fraction::Float64=0.15,
+    packing_fraction::Float64=DEFAULT_PACKING_FRACTION,
-    show_progress::Bool=true,
+    show_progress::Bool=DEFAULT_SHOW_PROGRESS,
-    reward_discount::Float64=0.1,
+    show_simulation_progress::Bool=DEFAULT_SHOW_SIMULATION_PROGRESS,
-    show_simulation_progress::Bool=true,
+    reward_discount::Float64=DEFAULT_REWARD_DISCOUNT,
-    n_episodes_before_env_helper_saving::Int64=10,
+    n_episodes_before_env_helper_saving::Int64=DEFAULT_N_EPISODES_BEFORE_ENV_HELPER_SAVING,
-) where {E<:Env}
+)
    @assert 0.0 <= elliptical_b_a_ratio <= 1.0
    @assert n_episodes > 0
    @assert episode_duration > 0
@ -200,7 +235,7 @@ include("Envs/LocalCOMWithAdditionalShapeRewardEnv2.jl")
 include("Envs/OriginEnv.jl")
 include("Envs/NearestNeighbourEnv.jl")
 include("Envs/LocalCOMEnv.jl")
-include("Envs/OriginCompass.jl")
+include("Envs/OriginCompassEnv.jl")
-include("Envs/COMCompass.jl")
+include("Envs/COMCompassEnv.jl")
 end # module
--- a/src/setup.jl
+++ b/src/setup.jl
@ -161,7 +161,7 @@ end
 """
    init_sim(n_particles::Int64, v₀::Float64; <keyword arguments>)
-Initialize simulation and return the relative path of the simulation directory.
+Initialize a simulation and return the relative path of the simulation directory.
 # Arguments
 - `n_particles::Int64`: Number of particles.
@ -169,9 +169,9 @@ Initialize simulation and return the relative path of the simulation directory.
 - `δt::Float64=$DEFAULT_δt`: Integration time step.
 - `packing_fraction::Float64=$DEFAULT_PACKING_FRACTION`: Packing fraction.
 - `skin_to_interaction_radius_ratio::Float64=$DEFAULT_SKIN_TO_INTERACTION_RADIUS_RATIO`: Ratio of skin radius to interaction radius.
- `exports_dir::String="$DEFAULT_EXPORTS_DIR"`: Relative path to exports directory.
+- `exports_dir::String="$DEFAULT_EXPORTS_DIR"`: Path to exports directory relative to the directory `ReCo.jl`.
- `parent_dir::String="$DEFAULT_PARENT_DIR"`: Name of the directory within the simulation directory is placed.
+- `parent_dir::String="$DEFAULT_PARENT_DIR"`: Directory relative to `exports_dir` where the simulation directory is placed.
- `comment::String="$DEFAULT_COMMENT"`: Comment to append to the directory name.
+- `comment::String="$DEFAULT_COMMENT"`: Comment to append to the simulation directory name.
 - `half_box_len::Float64=$DEFAULT_HALF_BOX_LEN` Half box length. The default of 0.0 means that the half box length will be calculated from the packing fraction. Otherwise, the provided half box length will be used. It is not possible to provide a half box length and a packing fraction at the same time.
 """
 function init_sim(;