mirror of
https://gitlab.rlp.net/mobitar/ReCo.jl.git
synced 2024-11-08 22:21:08 +00:00
Add RL documentation
This commit is contained in:
parent
98b2d673f7
commit
0325ee36f9
5 changed files with 74 additions and 26 deletions
21
README.adoc
21
README.adoc
|
@ -48,9 +48,7 @@ Import the package:
|
||||||
using ReCo
|
using ReCo
|
||||||
----
|
----
|
||||||
|
|
||||||
It might take a moment to precompile.
|
Initialize a simulation with 100 particles having a self-propulsion velocity of 40.0 and return the relative path to the simulation directory:
|
||||||
|
|
||||||
Initialize the simulation with 100 particles having a self-propulsion velocity of 40.0 and return the relative path to the simulation directory:
|
|
||||||
|
|
||||||
[source, julia]
|
[source, julia]
|
||||||
----
|
----
|
||||||
|
@ -67,7 +65,22 @@ run_sim(sim_dir, duration=20.0)
|
||||||
The values for the number of particles, self-propulsion velocity and simulation duration are used here as an example. For more information about possible values and other optional arguments, press `?` in the REPL after running `using ReCo`. Then type `init_sim` or `run_sim` followed by pressing enter. This will show the method's documention.
|
The values for the number of particles, self-propulsion velocity and simulation duration are used here as an example. For more information about possible values and other optional arguments, press `?` in the REPL after running `using ReCo`. Then type `init_sim` or `run_sim` followed by pressing enter. This will show the method's documention.
|
||||||
|
|
||||||
== Run reinforcement learning
|
== Run reinforcement learning
|
||||||
// TODO
|
Import the package:
|
||||||
|
|
||||||
|
[source, julia]
|
||||||
|
----
|
||||||
|
using ReCo
|
||||||
|
----
|
||||||
|
|
||||||
|
Run a reinforcement learning process and return the environment helper:
|
||||||
|
[source, julia]
|
||||||
|
----
|
||||||
|
env_helper = run_rl(ENVTYPE)
|
||||||
|
----
|
||||||
|
|
||||||
|
ENVTYPE has to be replaced by one of the environments named after the file names in the directory `ReCo.jl/RL/Envs`, for example: `LocalCOMEnv`. A description of an environment is included at the beginning of the corresponding file.
|
||||||
|
|
||||||
|
For more information about all possible optional arguments, press `?` in the REPL after running `using ReCo`. Then type `run_rl` followed by pressing enter.
|
||||||
|
|
||||||
== Run analysis
|
== Run analysis
|
||||||
|
|
||||||
|
|
71
src/RL/RL.jl
71
src/RL/RL.jl
|
@ -26,6 +26,19 @@ using ..ReCo: ReCo
|
||||||
const INITIAL_STATE_IND = 1
|
const INITIAL_STATE_IND = 1
|
||||||
const INITIAL_REWARD = 0.0
|
const INITIAL_REWARD = 0.0
|
||||||
|
|
||||||
|
const DEFAULT_ELLIPTICAL_b_a_RATIO = 1.0
|
||||||
|
const DEFAULT_N_EPISODES = 200
|
||||||
|
const DEFAULT_EPISODE_DURATION = 20.0
|
||||||
|
const DEFAULT_UPDATE_ACTIONS_AT = 0.1
|
||||||
|
const DEFAULT_N_PARTICLES = 100
|
||||||
|
const DEFAULT_SEED = 42
|
||||||
|
const DEFAULT_ϵ_STABLE = 0.00001
|
||||||
|
const DEFAULT_PACKING_FRACTION = 0.15
|
||||||
|
const DEFAULT_SHOW_PROGRESS = true
|
||||||
|
const DEFAULT_SHOW_SIMULATION_PROGRESS = true
|
||||||
|
const DEFAULT_REWARD_DISCOUNT = 0.1
|
||||||
|
const DEFAULT_N_EPISODES_BEFORE_ENV_HELPER_SAVING = 10
|
||||||
|
|
||||||
include("Env.jl")
|
include("Env.jl")
|
||||||
include("EnvHelper.jl")
|
include("EnvHelper.jl")
|
||||||
|
|
||||||
|
@ -63,23 +76,45 @@ function gen_agent(
|
||||||
return Agent(; policy=policy, trajectory=trajectory)
|
return Agent(; policy=policy, trajectory=trajectory)
|
||||||
end
|
end
|
||||||
|
|
||||||
function run_rl(;
|
"""
|
||||||
EnvType::Type{E},
|
run_rl(EnvType; <keyword arguments>)
|
||||||
process_dir::String,
|
|
||||||
elliptical_b_a_ratio::Float64=1.0,
|
Run a reinforcement learning process and return the environment helper.
|
||||||
n_episodes::Int64=200,
|
|
||||||
episode_duration::Float64=50.0,
|
# Arguments
|
||||||
update_actions_at::Float64=0.1,
|
- `EnvType::Type{<:Env}`: Environment type. It has to be one of the environments named after the file names in the directory `ReCo.jl/RL/Envs`, for example: `LocalCOMEnv`. A description of an environment is included at the beginning of the corresponding file.
|
||||||
n_particles::Int64=100,
|
- `process_dir::String=string(EnvType)`: Path to the reinforcement learning process directory relative to `ReCo.jl/exports/RL`.
|
||||||
seed::Int64=42,
|
- `elliptical_b_a_ratio::Float64=$DEFAULT_ELLIPTICAL_b_a_RATIO`: Goal elliptical b to a ratio.
|
||||||
ϵ_stable::Float64=0.00001,
|
- `n_episodes::Int64=$DEFAULT_N_EPISODES`: Number of episodes.
|
||||||
|
- `episode_duration::Float64=$DEFAULT_EPISODE_DURATION`: Duration of each episode.
|
||||||
|
- `update_actions_at::Float64=$DEFAULT_UPDATE_ACTIONS_AT`: Actions update interval. It can be described as the reinforcement learning step duration in an episode.
|
||||||
|
- `n_particles::Int64=$DEFAULT_N_PARTICLES`: Number of particles.
|
||||||
|
- `seed::Int64=$DEFAULT_SEED`: Random number generator seed.
|
||||||
|
- `ϵ_stable::Float64=$DEFAULT_ϵ_STABLE`: Stable value of ϵ after its decay. ϵ is the one from the epsilon greedy method for exploration and exploitation.
|
||||||
|
- `skin_to_interaction_radius_ratio::Float64=$(ReCo.DEFAULT_SKIN_TO_INTERACTION_RADIUS_RATIO)`: Ratio of the skin radius to the interaction radius of a particle.
|
||||||
|
- `packing_fraction::Float64=$DEFAULT_PACKING_FRACTION`: Particles packing fraction.
|
||||||
|
- `show_progress::Bool=$DEFAULT_SHOW_PROGRESS`: Show a progress bar for the reinforcement learning process.
|
||||||
|
- `show_simulation_progress::Bool=$DEFAULT_SHOW_SIMULATION_PROGRESS`: Show a progress bar for each episode as a simulation.
|
||||||
|
- `reward_discount::Float64=$DEFAULT_REWARD_DISCOUNT`: Reward discount.
|
||||||
|
- `n_episodes_before_env_helper_saving::Int64=$DEFAULT_N_EPISODES_BEFORE_ENV_HELPER_SAVING`: Number of episodes before saving the environment helper for the case that the process is stopped while still running.
|
||||||
|
"""
|
||||||
|
function run_rl(
|
||||||
|
EnvType::Type{<:Env};
|
||||||
|
process_dir::String=string(EnvType),
|
||||||
|
elliptical_b_a_ratio::Float64=DEFAULT_ELLIPTICAL_b_a_RATIO,
|
||||||
|
n_episodes::Int64=DEFAULT_N_EPISODES,
|
||||||
|
episode_duration::Float64=DEFAULT_EPISODE_DURATION,
|
||||||
|
update_actions_at::Float64=DEFAULT_UPDATE_ACTIONS_AT,
|
||||||
|
n_particles::Int64=DEFAULT_N_PARTICLES,
|
||||||
|
seed::Int64=DEFAULT_SEED,
|
||||||
|
ϵ_stable::Float64=DEFAULT_ϵ_STABLE,
|
||||||
skin_to_interaction_radius_ratio::Float64=ReCo.DEFAULT_SKIN_TO_INTERACTION_RADIUS_RATIO,
|
skin_to_interaction_radius_ratio::Float64=ReCo.DEFAULT_SKIN_TO_INTERACTION_RADIUS_RATIO,
|
||||||
packing_fraction::Float64=0.15,
|
packing_fraction::Float64=DEFAULT_PACKING_FRACTION,
|
||||||
show_progress::Bool=true,
|
show_progress::Bool=DEFAULT_SHOW_PROGRESS,
|
||||||
reward_discount::Float64=0.1,
|
show_simulation_progress::Bool=DEFAULT_SHOW_SIMULATION_PROGRESS,
|
||||||
show_simulation_progress::Bool=true,
|
reward_discount::Float64=DEFAULT_REWARD_DISCOUNT,
|
||||||
n_episodes_before_env_helper_saving::Int64=10,
|
n_episodes_before_env_helper_saving::Int64=DEFAULT_N_EPISODES_BEFORE_ENV_HELPER_SAVING,
|
||||||
) where {E<:Env}
|
)
|
||||||
@assert 0.0 <= elliptical_b_a_ratio <= 1.0
|
@assert 0.0 <= elliptical_b_a_ratio <= 1.0
|
||||||
@assert n_episodes > 0
|
@assert n_episodes > 0
|
||||||
@assert episode_duration > 0
|
@assert episode_duration > 0
|
||||||
|
@ -200,7 +235,7 @@ include("Envs/LocalCOMWithAdditionalShapeRewardEnv2.jl")
|
||||||
include("Envs/OriginEnv.jl")
|
include("Envs/OriginEnv.jl")
|
||||||
include("Envs/NearestNeighbourEnv.jl")
|
include("Envs/NearestNeighbourEnv.jl")
|
||||||
include("Envs/LocalCOMEnv.jl")
|
include("Envs/LocalCOMEnv.jl")
|
||||||
include("Envs/OriginCompass.jl")
|
include("Envs/OriginCompassEnv.jl")
|
||||||
include("Envs/COMCompass.jl")
|
include("Envs/COMCompassEnv.jl")
|
||||||
|
|
||||||
end # module
|
end # module
|
|
@ -161,7 +161,7 @@ end
|
||||||
"""
|
"""
|
||||||
init_sim(n_particles::Int64, v₀::Float64; <keyword arguments>)
|
init_sim(n_particles::Int64, v₀::Float64; <keyword arguments>)
|
||||||
|
|
||||||
Initialize simulation and return the relative path of the simulation directory.
|
Initialize a simulation and return the relative path of the simulation directory.
|
||||||
|
|
||||||
# Arguments
|
# Arguments
|
||||||
- `n_particles::Int64`: Number of particles.
|
- `n_particles::Int64`: Number of particles.
|
||||||
|
@ -169,9 +169,9 @@ Initialize simulation and return the relative path of the simulation directory.
|
||||||
- `δt::Float64=$DEFAULT_δt`: Integration time step.
|
- `δt::Float64=$DEFAULT_δt`: Integration time step.
|
||||||
- `packing_fraction::Float64=$DEFAULT_PACKING_FRACTION`: Packing fraction.
|
- `packing_fraction::Float64=$DEFAULT_PACKING_FRACTION`: Packing fraction.
|
||||||
- `skin_to_interaction_radius_ratio::Float64=$DEFAULT_SKIN_TO_INTERACTION_RADIUS_RATIO`: Ratio of skin radius to interaction radius.
|
- `skin_to_interaction_radius_ratio::Float64=$DEFAULT_SKIN_TO_INTERACTION_RADIUS_RATIO`: Ratio of skin radius to interaction radius.
|
||||||
- `exports_dir::String="$DEFAULT_EXPORTS_DIR"`: Relative path to exports directory.
|
- `exports_dir::String="$DEFAULT_EXPORTS_DIR"`: Path to exports directory relative to the directory `ReCo.jl`.
|
||||||
- `parent_dir::String="$DEFAULT_PARENT_DIR"`: Name of the directory within the simulation directory is placed.
|
- `parent_dir::String="$DEFAULT_PARENT_DIR"`: Directory relative to `exports_dir` where the simulation directory is placed.
|
||||||
- `comment::String="$DEFAULT_COMMENT"`: Comment to append to the directory name.
|
- `comment::String="$DEFAULT_COMMENT"`: Comment to append to the simulation directory name.
|
||||||
- `half_box_len::Float64=$DEFAULT_HALF_BOX_LEN` Half box length. The default of 0.0 means that the half box length will be calculated from the packing fraction. Otherwise, the provided half box length will be used. It is not possible to provide a half box length and a packing fraction at the same time.
|
- `half_box_len::Float64=$DEFAULT_HALF_BOX_LEN` Half box length. The default of 0.0 means that the half box length will be calculated from the packing fraction. Otherwise, the provided half box length will be used. It is not possible to provide a half box length and a packing fraction at the same time.
|
||||||
"""
|
"""
|
||||||
function init_sim(;
|
function init_sim(;
|
||||||
|
|
Loading…
Reference in a new issue