ReCo.jl/src/reinforcement_learning.jl

using ReinforcementLearning

mutable struct ReCoEnvParams
    n_particles::Int64
    half_box_len::Float64
    skin_r::Float64
    action_space::Vector{Tuple{Float64,Float64}}
    state_space::Vector{Tuple{Symbol,Symbol}}
    reward::Float64

    function ReCoEnvParams(
        n_particles::Int64,
        half_box_len::Float64,
        skin_r::Float64,
        n_v_actions::Int64,
        n_ω_actions::Int64;
        max_v::Float64=80.0,
        max_ω::Float64=float(π),
    )
        @assert half_box_len > 0
        @assert skin_r > 0
        @assert n_v_actions > 1
        @assert n_ω_actoins > 1
        @assert max_v > 0
        @assert max_ω > 0

        v_action_space = 0.0:(max_v / (n_v_actions - 1)):max_v
        ω_action_space = (-max_ω):(2 * max_ω / (n_ω_actions - 1)):max_ω

        n_actions = n_v_actions * n_ω_actions

        action_space = Vector{Tuple{Float64,Float64}}(undef, n_actions)

        ind = 1
        for v in v_action_space
            for ω in ω_action_space
                action_space[ind] = (v, ω)
                ind += 1
            end
        end

        distance_state_space = (:big, :medium, :small)
        direction_state_space = (:before, :behind, :left, :right)

        n_states = undef, length(distance_state_space) * length(direction_state_space) + 1

        state_space = Vector{Tuple{Symbol,Symbol}}(n_states)

        ind = 1
        for distance in distance_state_space
            for direction in direction_state_space
                state_space[ind] = (distance, direction)
                ind += 1
            end
        end
        state_space[ind] = (:none, :none)

        return new(n_particles, half_box_len, skin_r, action_space, state_space, 0.0)
    end
end

mutable struct ReCoEnv <: AbstractEnv
    params::ReCoEnvParams
    particle::Particle
    state::Tuple{Symbol,Symbol}

    function ReCoEnv(params::ReCoEnvParams, particle::Particle)
        return new(params, particle, (:none, :none))
    end
end

RLBase.state_space(env::ReCoEnv) = env.state_space

RLBase.state(env::ReCoEnv) = env.state

RLBase.action_space(env::ReCoEnv) = env.params.action_space

RLBase.reward(env::ReCoEnv) = env.params.reward