diff --git a/analysis/reward_discount_analysis.jl b/analysis/reward_discount_analysis.jl index 937ed06..a7d6f1a 100644 --- a/analysis/reward_discount_analysis.jl +++ b/analysis/reward_discount_analysis.jl @@ -5,10 +5,9 @@ using ReCo: ReCo includet("../src/Visualization/common_CairoMakie.jl") -function run_reward_discount_analysis() - γs = 0.0:0.2:1.0 +function run_rl_prcesses_reward_discount(γs::AbstractVector) n_γs = length(γs) - γ_rewards = Vector{Vector{Float64}}(undef, n_γs) + env_helpers = Vector{ReCo.RL.EnvHelper}(undef, n_γs) Threads.@threads for γ_ind in 1:n_γs γ = γs[γ_ind] @@ -24,8 +23,23 @@ function run_reward_discount_analysis() show_simulation_progress=false, ) + env_helpers[γ_ind] = env_helper + end + + return env_helpers +end + +function plot_reward_discount_analysis( + γs::AbstractVector, env_helpers::Vector{ReCo.RL.EnvHelper}, linestyles::NTuple{N,Symbol} +) where {N} + n_γs = length(γs) + @assert n_γs == length(env_helpers) == N + + γ_rewards = Vector{Vector{Float64}}(undef, n_γs) + + for (env_helper_ind, env_helper) in enumerate(env_helpers) rewards = env_helper.shared.hook.rewards - γ_rewards[γ_ind] = rewards + γ_rewards[env_helper_ind] = rewards end init_cairomakie!() @@ -35,8 +49,10 @@ function run_reward_discount_analysis() ax = Axis(fig[1, 1]; xlabel="Episode", ylabel="Reward") rewards_plots = [] - for rewards in γ_rewards - rewards_plot = lines!(ax, 1:length(rewards), rewards) + for (rewards, linestyle) in zip(γ_rewards, linestyles) + rewards_plot = lines!( + ax, 1:length(rewards), rewards; linestyle=linestyle, linewidth=0.6 + ) push!(rewards_plots, rewards_plot) end @@ -46,5 +62,17 @@ function run_reward_discount_analysis() save_fig("reward_discount_analysis.pdf", fig) + return nothing +end + +function run_reward_discount_analysis() + γs = 0.0:0.25:1.0 + + env_helpers = run_rl_prcesses_reward_discount(γs) + + plot_reward_discount_analysis( + γs, env_helpers, (:solid, :dash, :dashdot, :solid, :solid) + ) + return nothing end \ No newline at end of file