Seperated processes and plotting for reward discount analysis

2025-09-04 09:12:35 +00:00 · 2022-01-30 14:45:51 +01:00 · 2022-01-30 14:45:51 +01:00 · 3cd7395d60
commit 3cd7395d60
parent 946e646594
1 changed files with 34 additions and 6 deletions
--- a/analysis/reward_discount_analysis.jl
+++ b/analysis/reward_discount_analysis.jl
@ -5,10 +5,9 @@ using ReCo: ReCo

 includet("../src/Visualization/common_CairoMakie.jl")

-function run_reward_discount_analysis()
-    γs = 0.0:0.2:1.0
+function run_rl_prcesses_reward_discount(γs::AbstractVector)
    n_γs = length(γs)
-    γ_rewards = Vector{Vector{Float64}}(undef, n_γs)
+    env_helpers = Vector{ReCo.RL.EnvHelper}(undef, n_γs)

    Threads.@threads for γ_ind in 1:n_γs
        γ = γs[γ_ind]
@ -24,8 +23,23 @@ function run_reward_discount_analysis()
            show_simulation_progress=false,
        )

+        env_helpers[γ_ind] = env_helper
+    end
+
+    return env_helpers
+end
+
+function plot_reward_discount_analysis(
+    γs::AbstractVector, env_helpers::Vector{ReCo.RL.EnvHelper}, linestyles::NTuple{N,Symbol}
+) where {N}
+    n_γs = length(γs)
+    @assert n_γs == length(env_helpers) == N
+
+    γ_rewards = Vector{Vector{Float64}}(undef, n_γs)
+
+    for (env_helper_ind, env_helper) in enumerate(env_helpers)
        rewards = env_helper.shared.hook.rewards
-        γ_rewards[γ_ind] = rewards
+        γ_rewards[env_helper_ind] = rewards
    end

    init_cairomakie!()
@ -35,8 +49,10 @@ function run_reward_discount_analysis()
    ax = Axis(fig[1, 1]; xlabel="Episode", ylabel="Reward")

    rewards_plots = []
-    for rewards in γ_rewards
-        rewards_plot = lines!(ax, 1:length(rewards), rewards)
+    for (rewards, linestyle) in zip(γ_rewards, linestyles)
+        rewards_plot = lines!(
+            ax, 1:length(rewards), rewards; linestyle=linestyle, linewidth=0.6
+        )
        push!(rewards_plots, rewards_plot)
    end

@ -48,3 +64,15 @@ function run_reward_discount_analysis()

    return nothing
 end
+
+function run_reward_discount_analysis()
+    γs = 0.0:0.25:1.0
+
+    env_helpers = run_rl_prcesses_reward_discount(γs)
+
+    plot_reward_discount_analysis(
+        γs, env_helpers, (:solid, :dash, :dashdot, :solid, :solid)
+    )
+
+    return nothing
+end