ReCo.jl/src/RL/RL.jl

module RL

export run_rl, LocalCOMEnv

using Base: OneTo

using ReinforcementLearning
using Flux: InvDecay
using Intervals
using StaticArrays: SVector
using LoopVectorization: @turbo
using Random: Random
using ProgressMeter: ProgressMeter

using ..ReCo: ReCo

const INITIAL_STATE_IND = 1
const INITIAL_REWARD = 0.0

include("Env.jl")
include("EnvHelper.jl")

include("States.jl")
include("Hooks.jl")

function gen_agent(n_states::Int64, n_actions::Int64, ϵ_stable::Float64)
    # TODO: Optimize warmup and decay
    warmup_steps = 500_000
    decay_steps = 5_000_000

    policy = QBasedPolicy(;
        learner=MonteCarloLearner(;
            approximator=TabularQApproximator(;
                n_state=n_states, n_action=n_actions, opt=InvDecay(1.0)
            ),
        ),
        explorer=EpsilonGreedyExplorer(;
            kind=:linear,
            ϵ_init=1.0,
            ϵ_stable=ϵ_stable,
            warmup_steps=warmup_steps,
            decay_steps=decay_steps,
        ),
    )

    trajectory = VectorSARTTrajectory(;
        state=Int64, action=Int64, reward=Float64, terminal=Bool
    )

    return Agent(; policy=policy, trajectory=trajectory)
end

function run_rl(;
    EnvType::Type{E},
    parent_dir_appendix::String,
    goal_gyration_tensor_eigvals_ratio::Float64,
    n_episodes::Int64=200,
    episode_duration::Float64=50.0,
    update_actions_at::Float64=0.1,
    n_particles::Int64=100,
    seed::Int64=42,
    ϵ_stable::Float64=0.0001,
    skin_to_interaction_r_ratio::Float64=ReCo.DEFAULT_SKIN_TO_INTERACTION_R_RATIO,
    packing_ratio::Float64=0.22,
    show_progress::Bool=true,
) where {E<:Env}
    @assert 0.0 <= goal_gyration_tensor_eigvals_ratio <= 1.0
    @assert n_episodes > 0
    @assert episode_duration > 0
    @assert update_actions_at in 0.001:0.001:episode_duration
    @assert n_particles > 0
    @assert 0.0 < ϵ_stable < 1.0

    # Setup
    Random.seed!(seed)

    sim_consts = ReCo.gen_sim_consts(
        n_particles,
        0.0;
        skin_to_interaction_r_ratio=skin_to_interaction_r_ratio,
        packing_ratio=packing_ratio,
    )
    n_particles = sim_consts.n_particles # Not always equal to the input!

    env_args = (skin_r=sim_consts.skin_r,)

    env = EnvType(; args=env_args)

    agent = gen_agent(env.shared.n_states, env.shared.n_actions, ϵ_stable)

    n_steps_before_actions_update = round(Int64, update_actions_at / sim_consts.δt)

    hook = TotalRewardPerEpisode()

    env_helper_shared = EnvHelperSharedProps(
        env,
        agent,
        hook,
        n_steps_before_actions_update,
        goal_gyration_tensor_eigvals_ratio,
        n_particles,
    )

    env_helper_args = (half_box_len=sim_consts.half_box_len, skin_r=sim_consts.skin_r)

    env_helper = gen_env_helper(env, env_helper_shared; args=env_helper_args)

    parent_dir = "RL_" * parent_dir_appendix

    # Pre experiment
    hook(PRE_EXPERIMENT_STAGE, agent, env)
    agent(PRE_EXPERIMENT_STAGE, env)

    progress = ProgressMeter.Progress(n_episodes; dt=2, enabled=show_progress, desc="RL: ")

    for episode in 1:n_episodes
        dir = ReCo.init_sim_with_sim_consts(sim_consts; parent_dir=parent_dir)

        # Reset
        reset!(env)

        # Pre espisode
        hook(PRE_EPISODE_STAGE, agent, env)
        agent(PRE_EPISODE_STAGE, env)

        # Episode
        ReCo.run_sim(dir; duration=episode_duration, seed=episode, env_helper=env_helper)

        env.shared.terminated = true

        # Post episode
        hook(POST_EPISODE_STAGE, agent, env)
        agent(POST_EPISODE_STAGE, env)

        ProgressMeter.next!(progress; showvalues=[(:rewards, hook.rewards)])
    end

    # Post experiment
    hook(POST_EXPERIMENT_STAGE, agent, env)

    return env_helper
end

include("LocalCOMEnv.jl")

end # module
Fixes 2021-12-12 23:19:18 +00:00			`module RL`
Restructure for RL 2021-12-12 14:29:08 +00:00
Restructured code to include all environments 2022-01-11 00:31:30 +00:00			`export run_rl, LocalCOMEnv`
Fixes incl. state 2021-12-14 03:03:14 +00:00
Added compass for circle 2022-01-06 00:48:37 +00:00			`using Base: OneTo`

Added reinforcement_learning.jl 2021-12-10 02:16:45 +00:00			`using ReinforcementLearning`
Added all methods for RL 2021-12-12 17:27:56 +00:00			`using Flux: InvDecay`
Restructure for RL 2021-12-12 14:29:08 +00:00			`using Intervals`
Added all methods for RL 2021-12-12 17:27:56 +00:00			`using StaticArrays: SVector`
Fixes incl. state 2021-12-14 03:03:14 +00:00			`using LoopVectorization: @turbo`
Fixes 2021-12-12 23:19:18 +00:00			`using Random: Random`
Replaced progress macro 2022-01-23 03:21:06 +00:00			`using ProgressMeter: ProgressMeter`
Fixes 2021-12-12 23:19:18 +00:00
Fixed elliptical_distance 2022-01-14 12:01:14 +00:00			`using ..ReCo: ReCo`
Restructure for RL 2021-12-12 14:29:08 +00:00
Added compass for circle 2022-01-06 00:48:37 +00:00			`const INITIAL_STATE_IND = 1`
Restructured code to include all environments 2022-01-11 00:31:30 +00:00			`const INITIAL_REWARD = 0.0`

RL code organization 2022-01-11 18:00:41 +00:00			`include("Env.jl")`
			`include("EnvHelper.jl")`
Added compass for circle 2022-01-06 00:48:37 +00:00
RL code organization 2022-01-11 18:00:41 +00:00			`include("States.jl")`
			`include("Hooks.jl")`
Restructure for RL 2021-12-12 14:29:08 +00:00
Added compass for circle 2022-01-06 00:48:37 +00:00			`function gen_agent(n_states::Int64, n_actions::Int64, ϵ_stable::Float64)`
			`# TODO: Optimize warmup and decay`
Fix reward 2022-01-15 17:55:01 +00:00			`warmup_steps = 500_000`
			`decay_steps = 5_000_000`
Added compass for circle 2022-01-06 00:48:37 +00:00
Only one agent 2021-12-20 23:31:44 +00:00			`policy = QBasedPolicy(;`
			`learner=MonteCarloLearner(;`
			`approximator=TabularQApproximator(;`
			`n_state=n_states, n_action=n_actions, opt=InvDecay(1.0)`
			`),`
			`),`
Added compass for circle 2022-01-06 00:48:37 +00:00			`explorer=EpsilonGreedyExplorer(;`
			`kind=:linear,`
			`ϵ_init=1.0,`
			`ϵ_stable=ϵ_stable,`
			`warmup_steps=warmup_steps,`
			`decay_steps=decay_steps,`
			`),`
Only one agent 2021-12-20 23:31:44 +00:00			`)`

Specify trajectory types 2022-01-11 17:43:43 +00:00			`trajectory = VectorSARTTrajectory(;`
			`state=Int64, action=Int64, reward=Float64, terminal=Bool`
			`)`

			`return Agent(; policy=policy, trajectory=trajectory)`
Only one agent 2021-12-20 23:31:44 +00:00			`end`

Added tests 2021-12-15 03:45:15 +00:00			`function run_rl(;`
Restructured code to include all environments 2022-01-11 00:31:30 +00:00			`EnvType::Type{E},`
			`parent_dir_appendix::String,`
compass to center of mass 2022-01-06 23:30:05 +00:00			`goal_gyration_tensor_eigvals_ratio::Float64,`
RL circle around center of mass 2021-12-28 16:15:00 +00:00			`n_episodes::Int64=200,`
Fixes incl. state 2021-12-14 03:03:14 +00:00			`episode_duration::Float64=50.0,`
RL circle around center of mass 2021-12-28 16:15:00 +00:00			`update_actions_at::Float64=0.1,`
state and action ind 2021-12-13 01:24:34 +00:00			`n_particles::Int64=100,`
Fixes incl. state 2021-12-14 03:03:14 +00:00			`seed::Int64=42,`
Added compass for circle 2022-01-06 00:48:37 +00:00			`ϵ_stable::Float64=0.0001,`
Fixed elliptical_distance 2022-01-14 12:01:14 +00:00			`skin_to_interaction_r_ratio::Float64=ReCo.DEFAULT_SKIN_TO_INTERACTION_R_RATIO,`
Follow Julian style guide 2022-01-18 01:17:52 +00:00			`packing_ratio::Float64=0.22,`
Replaced progress macro 2022-01-23 03:21:06 +00:00			`show_progress::Bool=true,`
Restructured code to include all environments 2022-01-11 00:31:30 +00:00			`) where {E<:Env}`
compass to center of mass 2022-01-06 23:30:05 +00:00			`@assert 0.0 <= goal_gyration_tensor_eigvals_ratio <= 1.0`
Restructure for RL 2021-12-12 14:29:08 +00:00			`@assert n_episodes > 0`
			`@assert episode_duration > 0`
RL circle around center of mass 2021-12-28 16:15:00 +00:00			`@assert update_actions_at in 0.001:0.001:episode_duration`
Restructure for RL 2021-12-12 14:29:08 +00:00			`@assert n_particles > 0`
Added compass for circle 2022-01-06 00:48:37 +00:00			`@assert 0.0 < ϵ_stable < 1.0`
Restructure for RL 2021-12-12 14:29:08 +00:00
Fixes incl. state 2021-12-14 03:03:14 +00:00			`# Setup`
			`Random.seed!(seed)`
Added reinforcement_learning.jl 2021-12-10 02:16:45 +00:00
RL circle around center of mass 2021-12-28 16:15:00 +00:00			`sim_consts = ReCo.gen_sim_consts(`
Restructured code to include all environments 2022-01-11 00:31:30 +00:00			`n_particles,`
			`0.0;`
			`skin_to_interaction_r_ratio=skin_to_interaction_r_ratio,`
			`packing_ratio=packing_ratio,`
RL circle around center of mass 2021-12-28 16:15:00 +00:00			`)`
RL code organization 2022-01-11 18:00:41 +00:00			`n_particles = sim_consts.n_particles # Not always equal to the input!`
Restructure for RL 2021-12-12 14:29:08 +00:00
Added shape reward term 2022-01-14 11:28:47 +00:00			`env_args = (skin_r=sim_consts.skin_r,)`

			`env = EnvType(; args=env_args)`
Only one agent 2021-12-20 23:31:44 +00:00
Added norm2d and sq_norm2d 2022-01-11 17:39:38 +00:00			`agent = gen_agent(env.shared.n_states, env.shared.n_actions, ϵ_stable)`
Restructure for RL 2021-12-12 14:29:08 +00:00
Fixes 2021-12-12 23:19:18 +00:00			`n_steps_before_actions_update = round(Int64, update_actions_at / sim_consts.δt)`

Only one agent 2021-12-20 23:31:44 +00:00			`hook = TotalRewardPerEpisode()`

Added shape reward term 2022-01-14 11:28:47 +00:00			`env_helper_shared = EnvHelperSharedProps(`
RL circle around center of mass 2021-12-28 16:15:00 +00:00			`env,`
			`agent,`
			`hook,`
			`n_steps_before_actions_update,`
compass to center of mass 2022-01-06 23:30:05 +00:00			`goal_gyration_tensor_eigvals_ratio,`
RL circle around center of mass 2021-12-28 16:15:00 +00:00			`n_particles,`
Added all methods for RL 2021-12-12 17:27:56 +00:00			`)`

Added shape reward term 2022-01-14 11:28:47 +00:00			`env_helper_args = (half_box_len=sim_consts.half_box_len, skin_r=sim_consts.skin_r)`

			`env_helper = gen_env_helper(env, env_helper_shared; args=env_helper_args)`
Restructured code to include all environments 2022-01-11 00:31:30 +00:00
			`parent_dir = "RL_" * parent_dir_appendix`
RL circle around center of mass 2021-12-28 16:15:00 +00:00
Fixes incl. state 2021-12-14 03:03:14 +00:00			`# Pre experiment`
Only one agent 2021-12-20 23:31:44 +00:00			`hook(PRE_EXPERIMENT_STAGE, agent, env)`
			`agent(PRE_EXPERIMENT_STAGE, env)`
Restructure for RL 2021-12-12 14:29:08 +00:00
Final mean squared displacement 2022-01-23 04:26:27 +00:00			`progress = ProgressMeter.Progress(n_episodes; dt=2, enabled=show_progress, desc="RL: ")`
Replaced progress macro 2022-01-23 03:21:06 +00:00
			`for episode in 1:n_episodes`
RL circle around center of mass 2021-12-28 16:15:00 +00:00			`dir = ReCo.init_sim_with_sim_consts(sim_consts; parent_dir=parent_dir)`
Added all methods for RL 2021-12-12 17:27:56 +00:00
Fixes incl. state 2021-12-14 03:03:14 +00:00			`# Reset`
Only one agent 2021-12-20 23:31:44 +00:00			`reset!(env)`
Restructure for RL 2021-12-12 14:29:08 +00:00
Fixes incl. state 2021-12-14 03:03:14 +00:00			`# Pre espisode`
Only one agent 2021-12-20 23:31:44 +00:00			`hook(PRE_EPISODE_STAGE, agent, env)`
			`agent(PRE_EPISODE_STAGE, env)`
Fixes incl. state 2021-12-14 03:03:14 +00:00
			`# Episode`
Added radial distribution function 2022-01-24 19:43:37 +00:00			`ReCo.run_sim(dir; duration=episode_duration, seed=episode, env_helper=env_helper)`
Added all methods for RL 2021-12-12 17:27:56 +00:00
Added norm2d and sq_norm2d 2022-01-11 17:39:38 +00:00			`env.shared.terminated = true`
Added all methods for RL 2021-12-12 17:27:56 +00:00
Only one agent 2021-12-20 23:31:44 +00:00			`# Post episode`
			`hook(POST_EPISODE_STAGE, agent, env)`
			`agent(POST_EPISODE_STAGE, env)`
RL circle around center of mass 2021-12-28 16:15:00 +00:00
Final mean squared displacement 2022-01-23 04:26:27 +00:00			`ProgressMeter.next!(progress; showvalues=[(:rewards, hook.rewards)])`
Restructure for RL 2021-12-12 14:29:08 +00:00			`end`

Fixes incl. state 2021-12-14 03:03:14 +00:00			`# Post experiment`
Only one agent 2021-12-20 23:31:44 +00:00			`hook(POST_EXPERIMENT_STAGE, agent, env)`
Added all methods for RL 2021-12-12 17:27:56 +00:00
Restructured code to include all environments 2022-01-11 00:31:30 +00:00			`return env_helper`
Restructure for RL 2021-12-12 14:29:08 +00:00			`end`
Added reinforcement_learning.jl 2021-12-10 02:16:45 +00:00
Restructured code to include all environments 2022-01-11 00:31:30 +00:00			`include("LocalCOMEnv.jl")`

Restructure for RL 2021-12-12 14:29:08 +00:00			`end # module`