ReCo.jl/src/RL/Env.jl

abstract type Env <: AbstractEnv end

mutable struct EnvSharedProps{n_state_dims}
    n_actions::Int64
    action_space::Vector{SVector{2,Float64}}
    action_id_space::OneTo{Int64}

    n_states::Int64
    state_id_tensor::Array{Int64,n_state_dims}
    state_id_space::OneTo{Int64}
    state_id::Int64

    action_spaces_labels::Vector{Vector{LaTeXStrings.LaTeXString}}
    state_spaces_labels::Vector{Vector{LaTeXStrings.LaTeXString}}

    reward::Float64
    terminated::Bool

    function EnvSharedProps(
        n_states::Int64, # Can be different from the sum of state_id_tensor_dims
        state_id_tensor_dims::NTuple{n_state_dims,Int64},
        state_spaces_labels::Vector{Vector{LaTeXStrings.LaTeXString}};
        n_v_actions::Int64=2,
        n_ω_actions::Int64=3,
        max_v::Float64=40.0,
        max_ω::Float64=π / 2,
    ) where {n_state_dims}
        @assert n_v_actions > 1
        @assert n_ω_actions > 1
        @assert max_v > 0
        @assert max_ω > 0

        v_action_space = range(; start=0.0, stop=max_v, length=n_v_actions)
        ω_action_space = range(; start=-max_ω, stop=max_ω, length=n_ω_actions)

        action_spaces_labels = gen_action_spaces_labels(
            ("v", "\\omega"), (v_action_space, ω_action_space)
        )

        n_actions = n_v_actions * n_ω_actions

        action_space = Vector{SVector{2,Float64}}(undef, n_actions)

        ind = 1
        for v in v_action_space
            for ω in ω_action_space
                action_space[ind] = SVector(v, ω)
                ind += 1
            end
        end

        action_id_space = OneTo(n_actions)

        state_id_tensor = Array{Int64,n_state_dims}(undef, state_id_tensor_dims)

        id = 1
        for ind in eachindex(state_id_tensor)
            state_id_tensor[ind] = id
            id += 1
        end

        state_id_space = OneTo(n_states)

        return new{n_state_dims}(
            n_actions,
            action_space,
            action_id_space,
            n_states,
            state_id_tensor,
            state_id_space,
            INITIAL_STATE_IND,
            action_spaces_labels,
            state_spaces_labels,
            INITIAL_REWARD,
            false,
        )
    end
end

function reset!(env::Env)
    env.shared.terminated = false

    return nothing
end

function RLBase.state_space(env::Env)
    return env.shared.state_id_space
end

function RLBase.state(env::Env)
    return env.shared.state_id
end

function RLBase.action_space(env::Env)
    return env.shared.action_id_space
end

function RLBase.reward(env::Env)
    return env.shared.reward
end

function RLBase.is_terminated(env::Env)
    return env.shared.terminated
end

function gen_action_space_labels(action_label::String, action_space::AbstractRange)
    labels = Vector{LaTeXStrings.LaTeXString}(undef, length(action_space))
    for (action_ind, action) in enumerate(action_space)
        labels[action_ind] = LaTeXStrings.latexstring(
            "\$" * action_label * "\$=$(round(action; digits=2))"
        )
    end

    return labels
end

function gen_action_spaces_labels(
    actions_labels::NTuple{N,String}, action_spaces::NTuple{N,AbstractRange}
) where {N}
    return [gen_action_space_labels(actions_labels[i], action_spaces[i]) for i in 1:N]
end
RL code organization 2022-01-11 18:00:41 +00:00			`abstract type Env <: AbstractEnv end`

Added state_id_tensor 2022-01-15 20:27:15 +00:00			`mutable struct EnvSharedProps{n_state_dims}`
RL code organization 2022-01-11 18:00:41 +00:00			`n_actions::Int64`
			`action_space::Vector{SVector{2,Float64}}`
Added state_id_tensor 2022-01-15 20:27:15 +00:00			`action_id_space::OneTo{Int64}`
RL code organization 2022-01-11 18:00:41 +00:00
			`n_states::Int64`
Added state_id_tensor 2022-01-15 20:27:15 +00:00			`state_id_tensor::Array{Int64,n_state_dims}`
			`state_id_space::OneTo{Int64}`
			`state_id::Int64`
RL code organization 2022-01-11 18:00:41 +00:00
Save env_helper and added states and actions labels 2022-01-30 00:22:37 +00:00			`action_spaces_labels::Vector{Vector{LaTeXStrings.LaTeXString}}`
			`state_spaces_labels::Vector{Vector{LaTeXStrings.LaTeXString}}`

RL code organization 2022-01-11 18:00:41 +00:00			`reward::Float64`
			`terminated::Bool`

			`function EnvSharedProps(`
Added state_id_tensor 2022-01-15 20:27:15 +00:00			`n_states::Int64, # Can be different from the sum of state_id_tensor_dims`
Save env_helper and added states and actions labels 2022-01-30 00:22:37 +00:00			`state_id_tensor_dims::NTuple{n_state_dims,Int64},`
			`state_spaces_labels::Vector{Vector{LaTeXStrings.LaTeXString}};`
RL code organization 2022-01-11 18:00:41 +00:00			`n_v_actions::Int64=2,`
			`n_ω_actions::Int64=3,`
			`max_v::Float64=40.0,`
			`max_ω::Float64=π / 2,`
Added state_id_tensor 2022-01-15 20:27:15 +00:00			`) where {n_state_dims}`
RL code organization 2022-01-11 18:00:41 +00:00			`@assert n_v_actions > 1`
			`@assert n_ω_actions > 1`
			`@assert max_v > 0`
			`@assert max_ω > 0`

			`v_action_space = range(; start=0.0, stop=max_v, length=n_v_actions)`
			`ω_action_space = range(; start=-max_ω, stop=max_ω, length=n_ω_actions)`

Save env_helper and added states and actions labels 2022-01-30 00:22:37 +00:00			`action_spaces_labels = gen_action_spaces_labels(`
			`("v", "\\omega"), (v_action_space, ω_action_space)`
			`)`

RL code organization 2022-01-11 18:00:41 +00:00			`n_actions = n_v_actions * n_ω_actions`

			`action_space = Vector{SVector{2,Float64}}(undef, n_actions)`

			`ind = 1`
			`for v in v_action_space`
			`for ω in ω_action_space`
			`action_space[ind] = SVector(v, ω)`
			`ind += 1`
			`end`
			`end`

Added state_id_tensor 2022-01-15 20:27:15 +00:00			`action_id_space = OneTo(n_actions)`
RL code organization 2022-01-11 18:00:41 +00:00
Added state_id_tensor 2022-01-15 20:27:15 +00:00			`state_id_tensor = Array{Int64,n_state_dims}(undef, state_id_tensor_dims)`
RL code organization 2022-01-11 18:00:41 +00:00
Added state_id_tensor 2022-01-15 20:27:15 +00:00			`id = 1`
			`for ind in eachindex(state_id_tensor)`
			`state_id_tensor[ind] = id`
			`id += 1`
			`end`

			`state_id_space = OneTo(n_states)`

			`return new{n_state_dims}(`
RL code organization 2022-01-11 18:00:41 +00:00			`n_actions,`
			`action_space,`
Added state_id_tensor 2022-01-15 20:27:15 +00:00			`action_id_space,`
RL code organization 2022-01-11 18:00:41 +00:00			`n_states,`
Added state_id_tensor 2022-01-15 20:27:15 +00:00			`state_id_tensor,`
			`state_id_space,`
RL code organization 2022-01-11 18:00:41 +00:00			`INITIAL_STATE_IND,`
Save env_helper and added states and actions labels 2022-01-30 00:22:37 +00:00			`action_spaces_labels,`
			`state_spaces_labels,`
RL code organization 2022-01-11 18:00:41 +00:00			`INITIAL_REWARD,`
			`false,`
			`)`
			`end`
			`end`

			`function reset!(env::Env)`
			`env.shared.terminated = false`

			`return nothing`
			`end`

Follow Julian style guide 2022-01-18 01:17:52 +00:00			`function RLBase.state_space(env::Env)`
			`return env.shared.state_id_space`
			`end`
RL code organization 2022-01-11 18:00:41 +00:00
Follow Julian style guide 2022-01-18 01:17:52 +00:00			`function RLBase.state(env::Env)`
			`return env.shared.state_id`
			`end`
RL code organization 2022-01-11 18:00:41 +00:00
Follow Julian style guide 2022-01-18 01:17:52 +00:00			`function RLBase.action_space(env::Env)`
			`return env.shared.action_id_space`
			`end`
RL code organization 2022-01-11 18:00:41 +00:00
Follow Julian style guide 2022-01-18 01:17:52 +00:00			`function RLBase.reward(env::Env)`
			`return env.shared.reward`
			`end`
RL code organization 2022-01-11 18:00:41 +00:00
Follow Julian style guide 2022-01-18 01:17:52 +00:00			`function RLBase.is_terminated(env::Env)`
			`return env.shared.terminated`
Save env_helper and added states and actions labels 2022-01-30 00:22:37 +00:00			`end`

			`function gen_action_space_labels(action_label::String, action_space::AbstractRange)`
			`labels = Vector{LaTeXStrings.LaTeXString}(undef, length(action_space))`
			`for (action_ind, action) in enumerate(action_space)`
			`labels[action_ind] = LaTeXStrings.latexstring(`
Compact latex tables 2022-01-30 19:46:21 +00:00			`"\$" * action_label * "\$=$(round(action; digits=2))"`
Save env_helper and added states and actions labels 2022-01-30 00:22:37 +00:00			`)`
			`end`

			`return labels`
			`end`

			`function gen_action_spaces_labels(`
			`actions_labels::NTuple{N,String}, action_spaces::NTuple{N,AbstractRange}`
			`) where {N}`
			`return [gen_action_space_labels(actions_labels[i], action_spaces[i]) for i in 1:N]`
Follow Julian style guide 2022-01-18 01:17:52 +00:00			`end`