diff --git a/analysis/radial_distribution_function/radial_distribution_function.jl b/analysis/radial_distribution_function/radial_distribution_function.jl index 7cb9957..954420d 100644 --- a/analysis/radial_distribution_function/radial_distribution_function.jl +++ b/analysis/radial_distribution_function/radial_distribution_function.jl @@ -160,7 +160,7 @@ function plot_radial_distributions(; max_g = maximum(maximum.(gs)) ax = Axis( - fig[1:2, 1:2]; + fig[1, 1]; xticks=0:(2 * particle_radius):floor(Int64, max_lower_radius), yticks=0:ceil(Int64, max_g), xlabel=L"r / d", diff --git a/analysis/reward_discount_analysis.jl b/analysis/reward_discount_analysis.jl new file mode 100644 index 0000000..c878cfd --- /dev/null +++ b/analysis/reward_discount_analysis.jl @@ -0,0 +1,49 @@ +using CairoMakie +using LaTeXStrings: @L_str + +using ReCo: ReCo + +includet("../src/Visualization/common_CairoMakie.jl") + +function run_reward_discount_analysis() + γs = 0.0:0.2:1.0 + n_γs = length(γs) + γ_rewards = Vector{Vector{Float64}}(undef, n_γs) + + Threads.@threads for γ_ind in 1:n_γs + γ = γs[γ_ind] + env_helper = ReCo.run_rl(; + EnvType=ReCo.OriginEnv, + n_episodes=500, + episode_duration=8.0, + n_particles=200, + update_actions_at=0.08, + ϵ_stable=0.00001, + process_dir="reward_discount_analysis/$γ_ind", + reward_discount=γ, + ) + + rewards = env_helper.shared.hook.rewards + γ_rewards[γ_ind] = rewards + end + + init_cairomakie!() + + fig = gen_figure() + + ax = Axis(fig[1, 1]; xlabel="Episode", ylabel="Reward") + + rewards_plots = [] + for (rewards_ind, rewards) in enumerate(γ_rewards) + rewards_plot = lines!(ax, 1:length(rewards), rewards) + push!(rewards_plots, rewards_plot) + end + + Legend(fig[1, 2], rewards_plots, [L"\gamma = %$γ" for γ in γs]) + + set_gaps!(fig) + + save_fig("reward_discount_analysis.pdf", fig) + + return nothing +end \ No newline at end of file diff --git a/src/RL/RL.jl b/src/RL/RL.jl index fe1aea7..f3325b4 100644 --- a/src/RL/RL.jl +++ b/src/RL/RL.jl @@ -26,7 +26,9 @@ include("States.jl") include("Hooks.jl") include("Reward.jl") -function gen_agent(n_states::Int64, n_actions::Int64, ϵ_stable::Float64) +function gen_agent( + n_states::Int64, n_actions::Int64, ϵ_stable::Float64, reward_discount::Float64 +) # TODO: Optimize warmup and decay warmup_steps = 500_000 decay_steps = 5_000_000 @@ -36,7 +38,7 @@ function gen_agent(n_states::Int64, n_actions::Int64, ϵ_stable::Float64) approximator=TabularQApproximator(; n_state=n_states, n_action=n_actions, opt=Flux.InvDecay(1.0) ), - γ=0.95, # Reward discount + γ=reward_discount, ), explorer=EpsilonGreedyExplorer(; kind=:linear, @@ -67,6 +69,7 @@ function run_rl(; skin_to_interaction_radius_ratio::Float64=ReCo.DEFAULT_SKIN_TO_INTERACTION_R_RATIO, packing_ratio::Float64=0.15, show_progress::Bool=true, + reward_discount::Float64=1.0, ) where {E<:Env} @assert 0.0 <= elliptical_a_b_ratio <= 1.0 @assert n_episodes > 0 @@ -89,7 +92,7 @@ function run_rl(; env_args = (skin_radius=sim_consts.skin_radius, half_box_len=sim_consts.half_box_len) env = EnvType(; args=env_args) - agent = gen_agent(env.shared.n_states, env.shared.n_actions, ϵ_stable) + agent = gen_agent(env.shared.n_states, env.shared.n_actions, ϵ_stable, reward_discount) n_steps_before_actions_update = round(Int64, update_actions_at / sim_consts.δt) diff --git a/src/RL/latex_table.jl b/src/RL/latex_table.jl index e62e5d7..8c40e0a 100644 --- a/src/RL/latex_table.jl +++ b/src/RL/latex_table.jl @@ -1,7 +1,9 @@ -using DataFrames +using DataFrames: DataFrames using PrettyTables: pretty_table -function latex_table(dataframe, filename::String; path="exports/$filename") +function latex_table( + dataframe::DataFrames.DataFrame, filename::String; path::String="exports/$filename" +) open(path, "w") do f pretty_table(f, dataframe; backend=:latex, nosubheader=true, alignment=:c) end @@ -9,8 +11,8 @@ function latex_table(dataframe, filename::String; path="exports/$filename") return nothing end -function latex_rl_table(env_helper, filename) - table = env_helper.shared.agent.policy.learner.approximator.table +function latex_rl_table(env_helper, filename::String) + table = copy(env_helper.shared.agent.policy.learner.approximator.table) for col in 1:size(table)[2] table[:, col] ./= sum(table[:, col]) @@ -31,12 +33,12 @@ function latex_rl_table(env_helper, filename) for i in action_spaces_labels[1] for j in action_spaces_labels[2] - push!(actions, i * "; " * j) + push!(actions, i * " ; " * j) end end - df = DataFrame(table, states) - insertcols!(df, 1, :Actions => actions) + df = DataFrames.DataFrame(table, states) + DataFrames.insertcols!(df, 1, :Actions => actions) latex_table(df, filename)