ReCo.jl/src/RL/RL.jl

module RL

export run_rl, LocalCOMWithAdditionalShapeRewardEnv, OriginEnv

using Base: OneTo

using ReinforcementLearning
using Flux: Flux
using Intervals
using StaticArrays: SVector
using LoopVectorization: @turbo
using Random: Random
using ProgressMeter: ProgressMeter
using JLD2: JLD2
using LaTeXStrings: LaTeXStrings, @L_str

using ..ReCo: ReCo

const INITIAL_STATE_IND = 1
const INITIAL_REWARD = 0.0

include("Env.jl")
include("EnvHelper.jl")

include("States.jl")
include("Hooks.jl")
include("Reward.jl")

function gen_agent(
    n_states::Int64, n_actions::Int64, ϵ_stable::Float64, reward_discount::Float64
)
    # TODO: Optimize warmup and decay
    warmup_steps = 500_000
    decay_steps = 5_000_000

    policy = QBasedPolicy(;
        learner=MonteCarloLearner(;
            approximator=TabularQApproximator(;
                n_state=n_states, n_action=n_actions, opt=Flux.InvDecay(1.0)
            ),
            γ=reward_discount,
        ),
        explorer=EpsilonGreedyExplorer(;
            kind=:linear,
            ϵ_init=1.0,
            ϵ_stable=ϵ_stable,
            warmup_steps=warmup_steps,
            decay_steps=decay_steps,
        ),
    )

    trajectory = VectorSARTTrajectory(;
        state=Int64, action=Int64, reward=Float64, terminal=Bool
    )

    return Agent(; policy=policy, trajectory=trajectory)
end

function run_rl(;
    EnvType::Type{E},
    process_dir::String,
    elliptical_a_b_ratio::Float64=1.0,
    n_episodes::Int64=200,
    episode_duration::Float64=50.0,
    update_actions_at::Float64=0.1,
    n_particles::Int64=100,
    seed::Int64=42,
    ϵ_stable::Float64=0.0001,
    skin_to_interaction_radius_ratio::Float64=ReCo.DEFAULT_SKIN_TO_INTERACTION_RADIUS_RATIO,
    packing_ratio::Float64=0.15,
    show_progress::Bool=true,
    reward_discount::Float64=0.1,
    show_simulation_progress::Bool=true,
    n_episodes_before_env_helper_saving::Int64=10,
) where {E<:Env}
    @assert 0.0 <= elliptical_a_b_ratio <= 1.0
    @assert n_episodes > 0
    @assert episode_duration > 0
    @assert update_actions_at in 0.001:0.001:episode_duration
    @assert n_particles > 0
    @assert 0.0 < ϵ_stable < 1.0
    @assert 0.0 <= reward_discount <= 1.0
    @assert n_episodes_before_env_helper_saving > 0

    # Setup
    Random.seed!(seed)

    sim_consts = ReCo.gen_sim_consts(
        n_particles,
        0.0;
        skin_to_interaction_radius_ratio=skin_to_interaction_radius_ratio,
        packing_ratio=packing_ratio,
    )
    n_particles = sim_consts.n_particles # Not always equal to the input!

    env_args = (skin_radius=sim_consts.skin_radius, half_box_len=sim_consts.half_box_len)
    env = EnvType(; args=env_args)

    agent = gen_agent(env.shared.n_states, env.shared.n_actions, ϵ_stable, reward_discount)

    n_steps_before_actions_update = round(Int64, update_actions_at / sim_consts.δt)

    hook = TotalRewardPerEpisode()

    n_actions_updates_per_episode = ceil(Int64, episode_duration / update_actions_at)
    env_helper_shared = EnvHelperSharedProps(
        env,
        agent,
        hook,
        n_steps_before_actions_update,
        n_actions_updates_per_episode,
        elliptical_a_b_ratio,
        n_particles,
    )

    env_helper_args = (
        half_box_len=sim_consts.half_box_len, skin_radius=sim_consts.skin_radius
    )

    env_helper = gen_env_helper(env, env_helper_shared; args=env_helper_args)

    parent_dir = "RL/" * process_dir

    env_helper_path = ReCo.DEFAULT_EXPORTS_DIR * "/$parent_dir/env_helper.jld2"

    # Pre experiment
    hook(PRE_EXPERIMENT_STAGE, agent, env)
    agent(PRE_EXPERIMENT_STAGE, env)

    progress = ProgressMeter.Progress(n_episodes; dt=2, enabled=show_progress, desc="RL: ")

    for episode in 1:n_episodes
        dir = ReCo.init_sim_with_sim_consts(sim_consts; parent_dir=parent_dir)

        # Reset
        reset!(env)

        # Pre espisode
        hook(PRE_EPISODE_STAGE, agent, env)
        agent(PRE_EPISODE_STAGE, env)

        # Episode
        ReCo.run_sim(
            dir;
            duration=episode_duration,
            seed=episode,
            env_helper=env_helper,
            show_progress=show_simulation_progress,
        )

        env.shared.terminated = true

        # Post episode
        hook(POST_EPISODE_STAGE, agent, env)
        agent(POST_EPISODE_STAGE, env)

        if episode % n_episodes_before_env_helper_saving == 0
            JLD2.save_object(env_helper_path, env_helper)
        end

        ProgressMeter.next!(progress; showvalues=[(:rewards, hook.rewards)])
    end

    # Post experiment
    hook(POST_EXPERIMENT_STAGE, agent, env)

    return env_helper
end

function gen_state_space_labels(state_label::String, state_space::Vector{Interval})
    labels = Vector{LaTeXStrings.LaTeXString}(undef, length(state_space))
    for (state_ind, state) in enumerate(state_space)
        labels[state_ind] = LaTeXStrings.latexstring(
            "\$" *
            state_label *
            "\$=$(round(state.first; digits=2)):$(round(state.last, digits=2))",
        )
    end

    return labels
end

function gen_state_spaces_labels(
    states_labels::NTuple{N,String}, state_spaces::NTuple{N,Vector{Interval}}
) where {N}
    return [gen_state_space_labels(states_labels[i], state_spaces[i]) for i in 1:N]
end

include("Envs/LocalCOMWithAdditionalShapeRewardEnv.jl")
include("Envs/OriginEnv.jl")

end # module
-												Fixes

											
										
										
											2021-12-12 23:19:18 +00:00
+								module RL
-												Restructure for RL

											
										
										
											2021-12-12 14:29:08 +00:00
-												Added OriginEnv

											
										
										
											2022-01-29 16:13:17 +00:00
+								export run_rl, LocalCOMWithAdditionalShapeRewardEnv, OriginEnv
-												Fixes incl. state

											
										
										
											2021-12-14 03:03:14 +00:00
-												Added compass for circle

											
										
										
											2022-01-06 00:48:37 +00:00
+								using Base: OneTo
-												Added reinforcement_learning.jl

											
										
										
											2021-12-10 02:16:45 +00:00
+								using ReinforcementLearning
-												Added minimizing reward

											
										
										
											2022-01-29 01:26:55 +00:00
+								using Flux: Flux
-												Restructure for RL

											
										
										
											2021-12-12 14:29:08 +00:00
+								using Intervals
-												Added all methods for RL

											
										
										
											2021-12-12 17:27:56 +00:00
+								using StaticArrays: SVector
-												Fixes incl. state

											
										
										
											2021-12-14 03:03:14 +00:00
+								using LoopVectorization: @turbo
-												Fixes

											
										
										
											2021-12-12 23:19:18 +00:00
+								using Random: Random
-												Replaced progress macro

											
										
										
											2022-01-23 03:21:06 +00:00
+								using ProgressMeter: ProgressMeter
-												Save env_helper and added states and actions labels

											
										
										
											2022-01-30 00:22:37 +00:00
+								using JLD2: JLD2
 								using LaTeXStrings: LaTeXStrings, @L_str
-												Fixes

											
										
										
											2021-12-12 23:19:18 +00:00
-												Fixed elliptical_distance

											
										
										
											2022-01-14 12:01:14 +00:00
+								using ..ReCo: ReCo
-												Restructure for RL

											
										
										
											2021-12-12 14:29:08 +00:00
-												Added compass for circle

											
										
										
											2022-01-06 00:48:37 +00:00
+								const INITIAL_STATE_IND = 1
-												Restructured code to include all environments

											
										
										
											2022-01-11 00:31:30 +00:00
+								const INITIAL_REWARD = 0.0
-												RL code organization

											
										
										
											2022-01-11 18:00:41 +00:00
+								include("Env.jl")
 								include("EnvHelper.jl")
-												Added compass for circle

											
										
										
											2022-01-06 00:48:37 +00:00
-												RL code organization

											
										
										
											2022-01-11 18:00:41 +00:00
+								include("States.jl")
 								include("Hooks.jl")
-												Added minimizing reward

											
										
										
											2022-01-29 01:26:55 +00:00
+								include("Reward.jl")
-												Restructure for RL

											
										
										
											2021-12-12 14:29:08 +00:00
-												Added reward discount analysis

											
										
										
											2022-01-30 01:28:34 +00:00
+								function gen_agent(
 								    n_states::Int64, n_actions::Int64, ϵ_stable::Float64, reward_discount::Float64
 								)
-												Added compass for circle

											
										
										
											2022-01-06 00:48:37 +00:00
+								    # TODO: Optimize warmup and decay
-												Fix reward

											
										
										
											2022-01-15 17:55:01 +00:00
+								    warmup_steps = 500_000
 								    decay_steps = 5_000_000
-												Added compass for circle

											
										
										
											2022-01-06 00:48:37 +00:00
-												Only one agent

											
										
										
											2021-12-20 23:31:44 +00:00
+								    policy = QBasedPolicy(;
 								        learner=MonteCarloLearner(;
 								            approximator=TabularQApproximator(;
-												Added minimizing reward

											
										
										
											2022-01-29 01:26:55 +00:00
+								                n_state=n_states, n_action=n_actions, opt=Flux.InvDecay(1.0)
-												Only one agent

											
										
										
											2021-12-20 23:31:44 +00:00
+								            ),
-												Added reward discount analysis

											
										
										
											2022-01-30 01:28:34 +00:00
+								            γ=reward_discount,
-												Only one agent

											
										
										
											2021-12-20 23:31:44 +00:00
+								        ),
-												Added compass for circle

											
										
										
											2022-01-06 00:48:37 +00:00
+								        explorer=EpsilonGreedyExplorer(;
 								            kind=:linear,
 								            ϵ_init=1.0,
 								            ϵ_stable=ϵ_stable,
 								            warmup_steps=warmup_steps,
 								            decay_steps=decay_steps,
 								        ),
-												Only one agent

											
										
										
											2021-12-20 23:31:44 +00:00
+								    )
-												Specify trajectory types

											
										
										
											2022-01-11 17:43:43 +00:00
+								    trajectory = VectorSARTTrajectory(;
 								        state=Int64, action=Int64, reward=Float64, terminal=Bool
 								    )
 								    return Agent(; policy=policy, trajectory=trajectory)
-												Only one agent

											
										
										
											2021-12-20 23:31:44 +00:00
+								end
-												Added tests

											
										
										
											2021-12-15 03:45:15 +00:00
+								function run_rl(;
-												Restructured code to include all environments

											
										
										
											2022-01-11 00:31:30 +00:00
+								    EnvType::Type{E},
-												Save env_helper and added states and actions labels

											
										
										
											2022-01-30 00:22:37 +00:00
+								    process_dir::String,
-												Added half_box_len as env arg

											
										
										
											2022-01-29 16:40:26 +00:00
+								    elliptical_a_b_ratio::Float64=1.0,
-												RL circle around center of mass

											
										
										
											2021-12-28 16:15:00 +00:00
+								    n_episodes::Int64=200,
-												Fixes incl. state

											
										
										
											2021-12-14 03:03:14 +00:00
+								    episode_duration::Float64=50.0,
-												RL circle around center of mass

											
										
										
											2021-12-28 16:15:00 +00:00
+								    update_actions_at::Float64=0.1,
-												state and action ind

											
										
										
											2021-12-13 01:24:34 +00:00
+								    n_particles::Int64=100,
-												Fixes incl. state

											
										
										
											2021-12-14 03:03:14 +00:00
+								    seed::Int64=42,
-												Added compass for circle

											
										
										
											2022-01-06 00:48:37 +00:00
+								    ϵ_stable::Float64=0.0001,
-												Save env_helper every some episodes

											
										
										
											2022-01-30 20:19:53 +00:00
+								    skin_to_interaction_radius_ratio::Float64=ReCo.DEFAULT_SKIN_TO_INTERACTION_RADIUS_RATIO,
-												Updated center of mass graphics

											
										
										
											2022-01-29 00:04:51 +00:00
+								    packing_ratio::Float64=0.15,
-												Replaced progress macro

											
										
										
											2022-01-23 03:21:06 +00:00
+								    show_progress::Bool=true,
-												Updated default reward discount

											
										
										
											2022-01-30 13:46:08 +00:00
+								    reward_discount::Float64=0.1,
-												Added reward normalization

											
										
										
											2022-01-30 02:20:45 +00:00
+								    show_simulation_progress::Bool=true,
-												Save env_helper every some episodes

											
										
										
											2022-01-30 20:19:53 +00:00
+								    n_episodes_before_env_helper_saving::Int64=10,
-												Restructured code to include all environments

											
										
										
											2022-01-11 00:31:30 +00:00
+								) where {E<:Env}
-												Replaced goal ratio with a_b_ratio and added abs to eigvals ratio

											
										
										
											2022-01-25 23:20:53 +00:00
+								    @assert 0.0 <= elliptical_a_b_ratio <= 1.0
-												Restructure for RL

											
										
										
											2021-12-12 14:29:08 +00:00
+								    @assert n_episodes > 0
 								    @assert episode_duration > 0
-												RL circle around center of mass

											
										
										
											2021-12-28 16:15:00 +00:00
+								    @assert update_actions_at in 0.001:0.001:episode_duration
-												Restructure for RL

											
										
										
											2021-12-12 14:29:08 +00:00
+								    @assert n_particles > 0
-												Added compass for circle

											
										
										
											2022-01-06 00:48:37 +00:00
+								    @assert 0.0 < ϵ_stable < 1.0
-												Save env_helper every some episodes

											
										
										
											2022-01-30 20:19:53 +00:00
+								    @assert 0.0 <= reward_discount <= 1.0
 								    @assert n_episodes_before_env_helper_saving > 0
-												Restructure for RL

											
										
										
											2021-12-12 14:29:08 +00:00
-												Fixes incl. state

											
										
										
											2021-12-14 03:03:14 +00:00
+								    # Setup
 								    Random.seed!(seed)
-												Added reinforcement_learning.jl

											
										
										
											2021-12-10 02:16:45 +00:00
-												RL circle around center of mass

											
										
										
											2021-12-28 16:15:00 +00:00
+								    sim_consts = ReCo.gen_sim_consts(
-												Restructured code to include all environments

											
										
										
											2022-01-11 00:31:30 +00:00
+								        n_particles,
 .0;
-												Fixed graphics

											
										
										
											2022-01-29 13:32:04 +00:00
+								        skin_to_interaction_radius_ratio=skin_to_interaction_radius_ratio,
-												Restructured code to include all environments

											
										
										
											2022-01-11 00:31:30 +00:00
+								        packing_ratio=packing_ratio,
-												RL circle around center of mass

											
										
										
											2021-12-28 16:15:00 +00:00
+								    )
-												RL code organization

											
										
										
											2022-01-11 18:00:41 +00:00
+								    n_particles = sim_consts.n_particles # Not always equal to the input!
-												Restructure for RL

											
										
										
											2021-12-12 14:29:08 +00:00
-												Added half_box_len as env arg

											
										
										
											2022-01-29 16:40:26 +00:00
+								    env_args = (skin_radius=sim_consts.skin_radius, half_box_len=sim_consts.half_box_len)
-												Added shape reward term

											
										
										
											2022-01-14 11:28:47 +00:00
+								    env = EnvType(; args=env_args)
-												Only one agent

											
										
										
											2021-12-20 23:31:44 +00:00
-												Added reward discount analysis

											
										
										
											2022-01-30 01:28:34 +00:00
+								    agent = gen_agent(env.shared.n_states, env.shared.n_actions, ϵ_stable, reward_discount)
-												Restructure for RL

											
										
										
											2021-12-12 14:29:08 +00:00
-												Fixes

											
										
										
											2021-12-12 23:19:18 +00:00
+								    n_steps_before_actions_update = round(Int64, update_actions_at / sim_consts.δt)
-												Only one agent

											
										
										
											2021-12-20 23:31:44 +00:00
+								    hook = TotalRewardPerEpisode()
-												Added reward normalization

											
										
										
											2022-01-30 02:20:45 +00:00
+								    n_actions_updates_per_episode = ceil(Int64, episode_duration / update_actions_at)
-												Added shape reward term

											
										
										
											2022-01-14 11:28:47 +00:00
+								    env_helper_shared = EnvHelperSharedProps(
-												Added reward normalization

											
										
										
											2022-01-30 02:20:45 +00:00
+								        env,
 								        agent,
 								        hook,
 								        n_steps_before_actions_update,
 								        n_actions_updates_per_episode,
 								        elliptical_a_b_ratio,
 								        n_particles,
-												Added all methods for RL

											
										
										
											2021-12-12 17:27:56 +00:00
+								    )
-												Fixed graphics

											
										
										
											2022-01-29 13:32:04 +00:00
+								    env_helper_args = (
 								        half_box_len=sim_consts.half_box_len, skin_radius=sim_consts.skin_radius
 								    )
-												Added shape reward term

											
										
										
											2022-01-14 11:28:47 +00:00
 								    env_helper = gen_env_helper(env, env_helper_shared; args=env_helper_args)
-												Restructured code to include all environments

											
										
										
											2022-01-11 00:31:30 +00:00
-												Save env_helper and added states and actions labels

											
										
										
											2022-01-30 00:22:37 +00:00
+								    parent_dir = "RL/" * process_dir
-												RL circle around center of mass

											
										
										
											2021-12-28 16:15:00 +00:00
-												Save env_helper every some episodes

											
										
										
											2022-01-30 20:19:53 +00:00
+								    env_helper_path = ReCo.DEFAULT_EXPORTS_DIR * "/$parent_dir/env_helper.jld2"
-												Fixes incl. state

											
										
										
											2021-12-14 03:03:14 +00:00
+								    # Pre experiment
-												Only one agent

											
										
										
											2021-12-20 23:31:44 +00:00
+								    hook(PRE_EXPERIMENT_STAGE, agent, env)
 								    agent(PRE_EXPERIMENT_STAGE, env)
-												Restructure for RL

											
										
										
											2021-12-12 14:29:08 +00:00
-												Final mean squared displacement

											
										
										
											2022-01-23 04:26:27 +00:00
+								    progress = ProgressMeter.Progress(n_episodes; dt=2, enabled=show_progress, desc="RL: ")
-												Replaced progress macro

											
										
										
											2022-01-23 03:21:06 +00:00
 								    for episode in 1:n_episodes
-												RL circle around center of mass

											
										
										
											2021-12-28 16:15:00 +00:00
+								        dir = ReCo.init_sim_with_sim_consts(sim_consts; parent_dir=parent_dir)
-												Added all methods for RL

											
										
										
											2021-12-12 17:27:56 +00:00
-												Fixes incl. state

											
										
										
											2021-12-14 03:03:14 +00:00
+								        # Reset
-												Only one agent

											
										
										
											2021-12-20 23:31:44 +00:00
+								        reset!(env)
-												Restructure for RL

											
										
										
											2021-12-12 14:29:08 +00:00
-												Fixes incl. state

											
										
										
											2021-12-14 03:03:14 +00:00
+								        # Pre espisode
-												Only one agent

											
										
										
											2021-12-20 23:31:44 +00:00
+								        hook(PRE_EPISODE_STAGE, agent, env)
 								        agent(PRE_EPISODE_STAGE, env)
-												Fixes incl. state

											
										
										
											2021-12-14 03:03:14 +00:00
 								        # Episode
-												Added reward normalization

											
										
										
											2022-01-30 02:20:45 +00:00
+								        ReCo.run_sim(
 								            dir;
 								            duration=episode_duration,
 								            seed=episode,
 								            env_helper=env_helper,
 								            show_progress=show_simulation_progress,
 								        )
-												Added all methods for RL

											
										
										
											2021-12-12 17:27:56 +00:00
-												Added norm2d and sq_norm2d

											
										
										
											2022-01-11 17:39:38 +00:00
+								        env.shared.terminated = true
-												Added all methods for RL

											
										
										
											2021-12-12 17:27:56 +00:00
-												Only one agent

											
										
										
											2021-12-20 23:31:44 +00:00
+								        # Post episode
 								        hook(POST_EPISODE_STAGE, agent, env)
 								        agent(POST_EPISODE_STAGE, env)
-												RL circle around center of mass

											
										
										
											2021-12-28 16:15:00 +00:00
-												Save env_helper every some episodes

											
										
										
											2022-01-30 20:19:53 +00:00
+								        if episode % n_episodes_before_env_helper_saving == 0
 								            JLD2.save_object(env_helper_path, env_helper)
 								        end
-												Final mean squared displacement

											
										
										
											2022-01-23 04:26:27 +00:00
+								        ProgressMeter.next!(progress; showvalues=[(:rewards, hook.rewards)])
-												Restructure for RL

											
										
										
											2021-12-12 14:29:08 +00:00
+								    end
-												Fixes incl. state

											
										
										
											2021-12-14 03:03:14 +00:00
+								    # Post experiment
-												Only one agent

											
										
										
											2021-12-20 23:31:44 +00:00
+								    hook(POST_EXPERIMENT_STAGE, agent, env)
-												Added all methods for RL

											
										
										
											2021-12-12 17:27:56 +00:00
-												Restructured code to include all environments

											
										
										
											2022-01-11 00:31:30 +00:00
+								    return env_helper
-												Restructure for RL

											
										
										
											2021-12-12 14:29:08 +00:00
+								end
-												Added reinforcement_learning.jl

											
										
										
											2021-12-10 02:16:45 +00:00
-												Save env_helper and added states and actions labels

											
										
										
											2022-01-30 00:22:37 +00:00
+								function gen_state_space_labels(state_label::String, state_space::Vector{Interval})
 								    labels = Vector{LaTeXStrings.LaTeXString}(undef, length(state_space))
 								    for (state_ind, state) in enumerate(state_space)
 								        labels[state_ind] = LaTeXStrings.latexstring(
-												Compact latex tables

											
										
										
											2022-01-30 19:46:21 +00:00
+								            "\$" *
-												Save env_helper and added states and actions labels

											
										
										
											2022-01-30 00:22:37 +00:00
+								            state_label *
-												Compact latex tables

											
										
										
											2022-01-30 19:46:21 +00:00
+								            "\$=$(round(state.first; digits=2)):$(round(state.last, digits=2))",
-												Save env_helper and added states and actions labels

											
										
										
											2022-01-30 00:22:37 +00:00
+								        )
 								    end
 								    return labels
 								end
 								function gen_state_spaces_labels(
 								    states_labels::NTuple{N,String}, state_spaces::NTuple{N,Vector{Interval}}
 								) where {N}
 								    return [gen_state_space_labels(states_labels[i], state_spaces[i]) for i in 1:N]
 								end
-												Renaming and new Envs dir

											
										
										
											2022-01-29 14:48:13 +00:00
+								include("Envs/LocalCOMWithAdditionalShapeRewardEnv.jl")
-												Added OriginEnv

											
										
										
											2022-01-29 16:13:17 +00:00
+								include("Envs/OriginEnv.jl")
-												Restructured code to include all environments

											
										
										
											2022-01-11 00:31:30 +00:00
-												Restructure for RL

											
										
										
											2021-12-12 14:29:08 +00:00
+								end # module