1
0
Fork 0
mirror of https://gitlab.rlp.net/mobitar/ReCo.jl.git synced 2024-12-21 00:51:21 +00:00

Add RL documentation

This commit is contained in:
Mo8it 2022-02-07 17:50:57 +01:00
parent 98b2d673f7
commit 0325ee36f9
5 changed files with 74 additions and 26 deletions

View file

@ -48,9 +48,7 @@ Import the package:
using ReCo
----
It might take a moment to precompile.
Initialize the simulation with 100 particles having a self-propulsion velocity of 40.0 and return the relative path to the simulation directory:
Initialize a simulation with 100 particles having a self-propulsion velocity of 40.0 and return the relative path to the simulation directory:
[source, julia]
----
@ -67,7 +65,22 @@ run_sim(sim_dir, duration=20.0)
The values for the number of particles, self-propulsion velocity and simulation duration are used here as an example. For more information about possible values and other optional arguments, press `?` in the REPL after running `using ReCo`. Then type `init_sim` or `run_sim` followed by pressing enter. This will show the method's documention.
== Run reinforcement learning
// TODO
Import the package:
[source, julia]
----
using ReCo
----
Run a reinforcement learning process and return the environment helper:
[source, julia]
----
env_helper = run_rl(ENVTYPE)
----
ENVTYPE has to be replaced by one of the environments named after the file names in the directory `ReCo.jl/RL/Envs`, for example: `LocalCOMEnv`. A description of an environment is included at the beginning of the corresponding file.
For more information about all possible optional arguments, press `?` in the REPL after running `using ReCo`. Then type `run_rl` followed by pressing enter.
== Run analysis

View file

@ -26,6 +26,19 @@ using ..ReCo: ReCo
const INITIAL_STATE_IND = 1
const INITIAL_REWARD = 0.0
const DEFAULT_ELLIPTICAL_b_a_RATIO = 1.0
const DEFAULT_N_EPISODES = 200
const DEFAULT_EPISODE_DURATION = 20.0
const DEFAULT_UPDATE_ACTIONS_AT = 0.1
const DEFAULT_N_PARTICLES = 100
const DEFAULT_SEED = 42
const DEFAULT_ϵ_STABLE = 0.00001
const DEFAULT_PACKING_FRACTION = 0.15
const DEFAULT_SHOW_PROGRESS = true
const DEFAULT_SHOW_SIMULATION_PROGRESS = true
const DEFAULT_REWARD_DISCOUNT = 0.1
const DEFAULT_N_EPISODES_BEFORE_ENV_HELPER_SAVING = 10
include("Env.jl")
include("EnvHelper.jl")
@ -63,23 +76,45 @@ function gen_agent(
return Agent(; policy=policy, trajectory=trajectory)
end
function run_rl(;
EnvType::Type{E},
process_dir::String,
elliptical_b_a_ratio::Float64=1.0,
n_episodes::Int64=200,
episode_duration::Float64=50.0,
update_actions_at::Float64=0.1,
n_particles::Int64=100,
seed::Int64=42,
ϵ_stable::Float64=0.00001,
"""
run_rl(EnvType; <keyword arguments>)
Run a reinforcement learning process and return the environment helper.
# Arguments
- `EnvType::Type{<:Env}`: Environment type. It has to be one of the environments named after the file names in the directory `ReCo.jl/RL/Envs`, for example: `LocalCOMEnv`. A description of an environment is included at the beginning of the corresponding file.
- `process_dir::String=string(EnvType)`: Path to the reinforcement learning process directory relative to `ReCo.jl/exports/RL`.
- `elliptical_b_a_ratio::Float64=$DEFAULT_ELLIPTICAL_b_a_RATIO`: Goal elliptical b to a ratio.
- `n_episodes::Int64=$DEFAULT_N_EPISODES`: Number of episodes.
- `episode_duration::Float64=$DEFAULT_EPISODE_DURATION`: Duration of each episode.
- `update_actions_at::Float64=$DEFAULT_UPDATE_ACTIONS_AT`: Actions update interval. It can be described as the reinforcement learning step duration in an episode.
- `n_particles::Int64=$DEFAULT_N_PARTICLES`: Number of particles.
- `seed::Int64=$DEFAULT_SEED`: Random number generator seed.
- `ϵ_stable::Float64=$DEFAULT_ϵ_STABLE`: Stable value of ϵ after its decay. ϵ is the one from the epsilon greedy method for exploration and exploitation.
- `skin_to_interaction_radius_ratio::Float64=$(ReCo.DEFAULT_SKIN_TO_INTERACTION_RADIUS_RATIO)`: Ratio of the skin radius to the interaction radius of a particle.
- `packing_fraction::Float64=$DEFAULT_PACKING_FRACTION`: Particles packing fraction.
- `show_progress::Bool=$DEFAULT_SHOW_PROGRESS`: Show a progress bar for the reinforcement learning process.
- `show_simulation_progress::Bool=$DEFAULT_SHOW_SIMULATION_PROGRESS`: Show a progress bar for each episode as a simulation.
- `reward_discount::Float64=$DEFAULT_REWARD_DISCOUNT`: Reward discount.
- `n_episodes_before_env_helper_saving::Int64=$DEFAULT_N_EPISODES_BEFORE_ENV_HELPER_SAVING`: Number of episodes before saving the environment helper for the case that the process is stopped while still running.
"""
function run_rl(
EnvType::Type{<:Env};
process_dir::String=string(EnvType),
elliptical_b_a_ratio::Float64=DEFAULT_ELLIPTICAL_b_a_RATIO,
n_episodes::Int64=DEFAULT_N_EPISODES,
episode_duration::Float64=DEFAULT_EPISODE_DURATION,
update_actions_at::Float64=DEFAULT_UPDATE_ACTIONS_AT,
n_particles::Int64=DEFAULT_N_PARTICLES,
seed::Int64=DEFAULT_SEED,
ϵ_stable::Float64=DEFAULT_ϵ_STABLE,
skin_to_interaction_radius_ratio::Float64=ReCo.DEFAULT_SKIN_TO_INTERACTION_RADIUS_RATIO,
packing_fraction::Float64=0.15,
show_progress::Bool=true,
reward_discount::Float64=0.1,
show_simulation_progress::Bool=true,
n_episodes_before_env_helper_saving::Int64=10,
) where {E<:Env}
packing_fraction::Float64=DEFAULT_PACKING_FRACTION,
show_progress::Bool=DEFAULT_SHOW_PROGRESS,
show_simulation_progress::Bool=DEFAULT_SHOW_SIMULATION_PROGRESS,
reward_discount::Float64=DEFAULT_REWARD_DISCOUNT,
n_episodes_before_env_helper_saving::Int64=DEFAULT_N_EPISODES_BEFORE_ENV_HELPER_SAVING,
)
@assert 0.0 <= elliptical_b_a_ratio <= 1.0
@assert n_episodes > 0
@assert episode_duration > 0
@ -200,7 +235,7 @@ include("Envs/LocalCOMWithAdditionalShapeRewardEnv2.jl")
include("Envs/OriginEnv.jl")
include("Envs/NearestNeighbourEnv.jl")
include("Envs/LocalCOMEnv.jl")
include("Envs/OriginCompass.jl")
include("Envs/COMCompass.jl")
include("Envs/OriginCompassEnv.jl")
include("Envs/COMCompassEnv.jl")
end # module

View file

@ -161,7 +161,7 @@ end
"""
init_sim(n_particles::Int64, v₀::Float64; <keyword arguments>)
Initialize simulation and return the relative path of the simulation directory.
Initialize a simulation and return the relative path of the simulation directory.
# Arguments
- `n_particles::Int64`: Number of particles.
@ -169,9 +169,9 @@ Initialize simulation and return the relative path of the simulation directory.
- `δt::Float64=$DEFAULT_δt`: Integration time step.
- `packing_fraction::Float64=$DEFAULT_PACKING_FRACTION`: Packing fraction.
- `skin_to_interaction_radius_ratio::Float64=$DEFAULT_SKIN_TO_INTERACTION_RADIUS_RATIO`: Ratio of skin radius to interaction radius.
- `exports_dir::String="$DEFAULT_EXPORTS_DIR"`: Relative path to exports directory.
- `parent_dir::String="$DEFAULT_PARENT_DIR"`: Name of the directory within the simulation directory is placed.
- `comment::String="$DEFAULT_COMMENT"`: Comment to append to the directory name.
- `exports_dir::String="$DEFAULT_EXPORTS_DIR"`: Path to exports directory relative to the directory `ReCo.jl`.
- `parent_dir::String="$DEFAULT_PARENT_DIR"`: Directory relative to `exports_dir` where the simulation directory is placed.
- `comment::String="$DEFAULT_COMMENT"`: Comment to append to the simulation directory name.
- `half_box_len::Float64=$DEFAULT_HALF_BOX_LEN` Half box length. The default of 0.0 means that the half box length will be calculated from the packing fraction. Otherwise, the provided half box length will be used. It is not possible to provide a half box length and a packing fraction at the same time.
"""
function init_sim(;