Added state_id_tensor

2025-09-04 09:12:35 +00:00 · 2022-01-15 21:27:15 +01:00 · 2022-01-15 21:27:15 +01:00 · 614311b080
commit 614311b080
parent 28fd6bab95
5 changed files with 47 additions and 53 deletions
--- a/src/RL/Env.jl
+++ b/src/RL/Env.jl
@ -1,26 +1,26 @@
 abstract type Env <: AbstractEnv end
-mutable struct EnvSharedProps{state_dims}
+mutable struct EnvSharedProps{n_state_dims}
    n_actions::Int64
    action_space::Vector{SVector{2,Float64}}
-    action_ind_space::OneTo{Int64}
+    action_id_space::OneTo{Int64}
    n_states::Int64
-    state_space::Vector{SVector{state_dims,Interval}}
+    state_id_tensor::Array{Int64,n_state_dims}
-    state_ind_space::OneTo{Int64}
+    state_id_space::OneTo{Int64}
-    state_ind::Int64
+    state_id::Int64
    reward::Float64
    terminated::Bool
    function EnvSharedProps(
-        n_states::Int64,
+        n_states::Int64, # Can be different from the sum of state_id_tensor_dims
-        state_space::Vector{SVector{state_dims,Interval}};
+        state_id_tensor_dims::NTuple{n_state_dims,Int64};
        n_v_actions::Int64=2,
        n_ω_actions::Int64=3,
        max_v::Float64=40.0,
        max_ω::Float64=π / 2,
-    ) where {state_dims}
+    ) where {n_state_dims}
        @assert n_v_actions > 1
        @assert n_ω_actions > 1
        @assert max_v > 0
@ -41,17 +41,25 @@ mutable struct EnvSharedProps{state_dims}
            end
        end
-        action_ind_space = OneTo(n_actions)
+        action_id_space = OneTo(n_actions)
-        state_ind_space = OneTo(n_states)
+        state_id_tensor = Array{Int64,n_state_dims}(undef, state_id_tensor_dims)
-        return new{state_dims}(
+        id = 1
        for ind in eachindex(state_id_tensor)
            state_id_tensor[ind] = id
            id += 1
        end
        state_id_space = OneTo(n_states)
        return new{n_state_dims}(
            n_actions,
            action_space,
-            action_ind_space,
+            action_id_space,
            n_states,
-            state_space,
+            state_id_tensor,
-            state_ind_space,
+            state_id_space,
            INITIAL_STATE_IND,
            INITIAL_REWARD,
            false,
@ -65,11 +73,11 @@ function reset!(env::Env)
    return nothing
 end
-RLBase.state_space(env::Env) = env.shared.state_ind_space
+RLBase.state_space(env::Env) = env.shared.state_id_space
-RLBase.state(env::Env) = env.shared.state_ind
+RLBase.state(env::Env) = env.shared.state_id
-RLBase.action_space(env::Env) = env.shared.action_ind_space
+RLBase.action_space(env::Env) = env.shared.action_id_space
 RLBase.reward(env::Env) = env.shared.reward
--- a/src/RL/EnvHelper.jl
+++ b/src/RL/EnvHelper.jl
@ -11,11 +11,11 @@ struct EnvHelperSharedProps{H<:AbstractHook}
    n_particles::Int64
-    old_states_ind::Vector{Int64}
+    old_states_id::Vector{Int64}
-    states_ind::Vector{Int64}
+    states_id::Vector{Int64}
    actions::Vector{SVector{2,Float64}}
-    actions_ind::Vector{Int64}
+    actions_id::Vector{Int64}
    function EnvHelperSharedProps(
        env::Env,
--- a/src/RL/Hooks.jl
+++ b/src/RL/Hooks.jl
@ -27,16 +27,16 @@ function update_table_and_actions_hook(
    if !first_integration_step
        # Old state
-        env.shared.state_ind = env_helper.shared.old_states_ind[id]
+        env.shared.state_id = env_helper.shared.old_states_id[id]
-        action_ind = env_helper.shared.actions_ind[id]
+        action_id = env_helper.shared.actions_id[id]
        # Pre act
-        agent(PRE_ACT_STAGE, env, action_ind)
+        agent(PRE_ACT_STAGE, env, action_id)
-        hook(PRE_ACT_STAGE, agent, env, action_ind)
+        hook(PRE_ACT_STAGE, agent, env, action_id)
        # Update to current state
-        env.shared.state_ind = env_helper.shared.states_ind[id]
+        env.shared.state_id = env_helper.shared.states_id[id]
        # Update reward
        update_reward!(env, env_helper, particle)
@ -47,11 +47,11 @@ function update_table_and_actions_hook(
    end
    # Update action
-    action_ind = agent(env)
+    action_id = agent(env)
-    action = env.shared.action_space[action_ind]
+    action = env.shared.action_space[action_id]
    env_helper.shared.actions[id] = action
-    env_helper.shared.actions_ind[id] = action_ind
+    env_helper.shared.actions_id[id] = action_id
    return nothing
 end
--- a/src/RL/LocalCOMEnv.jl
+++ b/src/RL/LocalCOMEnv.jl
@ -11,6 +11,7 @@ struct LocalCOMEnv <: Env
    function LocalCOMEnv(;
        n_distance_states::Int64=3, n_direction_angle_states::Int64=3, args
    )
        @assert n_distance_states > 1
        @assert n_direction_angle_states > 1
        direction_angle_state_space = gen_angle_state_space(n_direction_angle_states)
@ -23,19 +24,9 @@ struct LocalCOMEnv <: Env
        )
        n_states = n_distance_states * n_direction_angle_states + 1
        state_space = Vector{SVector{2,Interval}}(undef, n_states - 1)
        ind = 1
        for distance_state in distance_state_space
            for direction_angle_state in direction_angle_state_space
                state_space[ind] = SVector(distance_state, direction_angle_state)
                ind += 1
            end
        end
        # Last state is when no particle is in the skin radius
-        shared = EnvSharedProps(n_states, state_space)
+        shared = EnvSharedProps(n_states, (n_distance_states, n_direction_angle_states))
        return new(shared, distance_state_space, direction_angle_state_space)
    end
@ -110,7 +101,7 @@ function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Part
    n_particles = env_helper.shared.n_particles
    @turbo for id in 1:n_particles
-        env_helper.shared.old_states_ind[id] = env_helper.shared.states_ind[id]
+        env_helper.shared.old_states_id[id] = env_helper.shared.states_id[id]
    end
    env = env_helper.shared.env
@ -121,7 +112,7 @@ function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Part
        n_neighbours = env_helper.n_neighbours[id]
        if n_neighbours == 0
-            state_ind = env.shared.n_states
+            state_id = env.shared.n_states
            distance_to_local_center_of_mass_sum +=
                env_helper.max_distance_to_local_center_of_mass
@ -135,21 +126,20 @@ function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Part
            distance_to_local_center_of_mass_sum += distance
-            distance_state = find_state_interval(distance, env.distance_state_space)
+            distance_state_ind = find_state_ind(distance, env.distance_state_space)
            si, co = sincos(particles[id].φ)
            direction_angle = ReCo.angle2(SVector(co, si), vec_to_local_center_of_mass)
-            direction_angle_state = find_state_interval(
+            direction_state_ind = find_state_ind(
                direction_angle, env.direction_angle_state_space
            )
-            state = SVector{2,Interval}(distance_state, direction_angle_state)
+            state_id = env.shared.state_id_tensor[distance_state_ind, direction_state_ind]
            state_ind = find_state_ind(state, env.shared.state_space)
        end
-        env_helper.shared.states_ind[id] = state_ind
+        env_helper.shared.states_id[id] = state_id
    end
    mean_distance_to_local_center_of_mass =
--- a/src/RL/States.jl
+++ b/src/RL/States.jl
@ -46,14 +46,10 @@ function gen_distance_state_space(
    return distance_state_space
 end
-function find_state_ind(state::S, state_space::Vector{S}) where {S<:SVector}
+function find_state_ind(value::Float64, state_space::Vector{Interval})::Int64
-    return findfirst(x -> x == state, state_space)
+    for (ind, state) in enumerate(state_space)
 end
 function find_state_interval(value::Float64, state_space::Vector{Interval})::Interval
    for state in state_space
        if value in state
-            return state
+            return ind
        end
    end
 end