ReCo.jl/analysis/reward_discount_analysis.jl

using CairoMakie
using LaTeXStrings: @L_str

using ReCo: ReCo

include("../src/Visualization/common_CairoMakie.jl")

function run_rl_prcesses_reward_discount(γs::AbstractVector)
    n_γs = length(γs)
    env_helpers = Vector{ReCo.RL.EnvHelper}(undef, n_γs)

    Threads.@threads for γ_ind in 1:n_γs
        γ = γs[γ_ind]
        env_helper, rl_dir = ReCo.run_rl(;
            EnvType=ReCo.OriginEnv,
            n_episodes=400,
            episode_duration=15.0,
            n_particles=150,
            update_actions_at=0.08,
            ϵ_stable=0.00001,
            process_dir="reward_discount_analysis/$γ_ind",
            reward_discount=γ,
            show_simulation_progress=false,
        )

        env_helpers[γ_ind] = env_helper
    end

    return env_helpers
end

function plot_reward_discount_analysis(
    γs::AbstractVector, env_helpers::Vector{ReCo.RL.EnvHelper}, linestyles::NTuple{N,Symbol}
) where {N}
    n_γs = length(γs)
    @assert n_γs == length(env_helpers) == N

    γ_rewards = Vector{Vector{Float64}}(undef, n_γs)

    for (env_helper_ind, env_helper) in enumerate(env_helpers)
        rewards = env_helper.shared.hook.rewards
        γ_rewards[env_helper_ind] = rewards
    end

    init_cairomakie!()

    fig = gen_figure()

    ax = Axis(fig[1, 1]; xlabel="Episode", ylabel="Reward")

    rewards_plots = []
    for (rewards, linestyle) in zip(γ_rewards, linestyles)
        rewards_plot = lines!(
            ax, 1:length(rewards), rewards; linestyle=linestyle, linewidth=0.6
        )
        push!(rewards_plots, rewards_plot)
    end

    Legend(fig[1, 2], rewards_plots, [L"\gamma = %$γ" for γ in γs])

    set_gaps!(fig)

    save_fig("reward_discount_analysis.pdf", fig)

    return nothing
end

function run_reward_discount_analysis()
    γs = 0.0:0.25:1.0

    env_helpers = run_rl_prcesses_reward_discount(γs)

    plot_reward_discount_analysis(
        γs, env_helpers, (:solid, :dash, :dashdot, :solid, :solid)
    )

    return nothing
end
-												Added reward discount analysis

											
										
										
											2022-01-30 01:28:34 +00:00
+								using CairoMakie
 								using LaTeXStrings: @L_str
 								using ReCo: ReCo
-												Remove includet

											
										
										
											2022-02-08 22:06:22 +00:00
+								include("../src/Visualization/common_CairoMakie.jl")
-												Added reward discount analysis

											
										
										
											2022-01-30 01:28:34 +00:00
-												Seperated processes and plotting for reward discount analysis

											
										
										
											2022-01-30 13:45:51 +00:00
+								function run_rl_prcesses_reward_discount(γs::AbstractVector)
-												Added reward discount analysis

											
										
										
											2022-01-30 01:28:34 +00:00
+								    n_γs = length(γs)
-												Seperated processes and plotting for reward discount analysis

											
										
										
											2022-01-30 13:45:51 +00:00
+								    env_helpers = Vector{ReCo.RL.EnvHelper}(undef, n_γs)
-												Added reward discount analysis

											
										
										
											2022-01-30 01:28:34 +00:00
 								    Threads.@threads for γ_ind in 1:n_γs
 								        γ = γs[γ_ind]
-												Add rl_dir

											
										
										
											2022-02-07 17:41:25 +00:00
+								        env_helper, rl_dir = ReCo.run_rl(;
-												Added reward discount analysis

											
										
										
											2022-01-30 01:28:34 +00:00
+								            EnvType=ReCo.OriginEnv,
-												Added reward shaping plot

											
										
										
											2022-01-30 03:38:57 +00:00
+								            n_episodes=400,
-												Fixed normalization

											
										
										
											2022-01-30 02:32:47 +00:00
+								            episode_duration=15.0,
 								            n_particles=150,
-												Added reward discount analysis

											
										
										
											2022-01-30 01:28:34 +00:00
+								            update_actions_at=0.08,
 								            ϵ_stable=0.00001,
 								            process_dir="reward_discount_analysis/$γ_ind",
 								            reward_discount=γ,
-												Fixed normalization

											
										
										
											2022-01-30 02:32:47 +00:00
+								            show_simulation_progress=false,
-												Added reward discount analysis

											
										
										
											2022-01-30 01:28:34 +00:00
+								        )
-												Seperated processes and plotting for reward discount analysis

											
										
										
											2022-01-30 13:45:51 +00:00
+								        env_helpers[γ_ind] = env_helper
 								    end
 								    return env_helpers
 								end
 								function plot_reward_discount_analysis(
 								    γs::AbstractVector, env_helpers::Vector{ReCo.RL.EnvHelper}, linestyles::NTuple{N,Symbol}
 								) where {N}
 								    n_γs = length(γs)
 								    @assert n_γs == length(env_helpers) == N
 								    γ_rewards = Vector{Vector{Float64}}(undef, n_γs)
 								    for (env_helper_ind, env_helper) in enumerate(env_helpers)
-												Added reward discount analysis

											
										
										
											2022-01-30 01:28:34 +00:00
+								        rewards = env_helper.shared.hook.rewards
-												Seperated processes and plotting for reward discount analysis

											
										
										
											2022-01-30 13:45:51 +00:00
+								        γ_rewards[env_helper_ind] = rewards
-												Added reward discount analysis

											
										
										
											2022-01-30 01:28:34 +00:00
+								    end
 								    init_cairomakie!()
 								    fig = gen_figure()
 								    ax = Axis(fig[1, 1]; xlabel="Episode", ylabel="Reward")
 								    rewards_plots = []
-												Seperated processes and plotting for reward discount analysis

											
										
										
											2022-01-30 13:45:51 +00:00
+								    for (rewards, linestyle) in zip(γ_rewards, linestyles)
 								        rewards_plot = lines!(
 								            ax, 1:length(rewards), rewards; linestyle=linestyle, linewidth=0.6
 								        )
-												Added reward discount analysis

											
										
										
											2022-01-30 01:28:34 +00:00
+								        push!(rewards_plots, rewards_plot)
 								    end
 								    Legend(fig[1, 2], rewards_plots, [L"\gamma = %$γ" for γ in γs])
 								    set_gaps!(fig)
 								    save_fig("reward_discount_analysis.pdf", fig)
-												Seperated processes and plotting for reward discount analysis

											
										
										
											2022-01-30 13:45:51 +00:00
+								    return nothing
 								end
 								function run_reward_discount_analysis()
 								    γs = 0.0:0.25:1.0
 								    env_helpers = run_rl_prcesses_reward_discount(γs)
 								    plot_reward_discount_analysis(
 								        γs, env_helpers, (:solid, :dash, :dashdot, :solid, :solid)
 								    )
-												Added reward discount analysis

											
										
										
											2022-01-30 01:28:34 +00:00
+								    return nothing
 								end