ReCo.jl/src/RL/LocalCOMEnv.jl

export LocalCOMEnv

using ..ReCo: Particle

struct LocalCOMEnv <: Env
    shared::EnvSharedProps

    distance_state_space::Vector{Interval}
    direction_angle_state_space::Vector{Interval}

    function LocalCOMEnv(;
        n_distance_states::Int64=3, n_direction_angle_states::Int64=3, args
    )
        @assert n_direction_angle_states > 1

        direction_angle_state_space = gen_angle_state_space(n_direction_angle_states)

        min_distance = 0.0
        max_distance = args.skin_r

        distance_state_space = gen_distance_state_space(
            min_distance, max_distance, n_distance_states
        )

        n_states = n_distance_states * n_direction_angle_states + 1

        state_space = Vector{SVector{2,Interval}}(undef, n_states - 1)

        ind = 1
        for distance_state in distance_state_space
            for direction_angle_state in direction_angle_state_space
                state_space[ind] = SVector(distance_state, direction_angle_state)
                ind += 1
            end
        end
        # Last state is when no particle is in the skin radius

        shared = EnvSharedProps(n_states, state_space)

        return new(shared, distance_state_space, direction_angle_state_space)
    end
end

mutable struct LocalCOMEnvHelper <: EnvHelper
    shared::EnvHelperSharedProps

    vec_to_neighbour_sums::Vector{SVector{2,Float64}}
    n_neighbours::Vector{Int64}

    distances_to_local_center_of_mass::Vector{Float64}
    max_distance_to_local_center_of_mass::Float64

    add_shape_reward_term::Bool

    center_of_mass::SVector{2,Float64}
    gyration_tensor_eigvec_to_smaller_eigval::SVector{2,Float64}
    gyration_tensor_eigvec_to_bigger_eigval::SVector{2,Float64}

    half_box_len::Float64
    max_elliptical_distance::Float64

    function LocalCOMEnvHelper(shared::EnvHelperSharedProps, half_box_len::Float64, skin_r)
        max_elliptical_distance =
            sqrt(2) * half_box_len / shared.goal_gyration_tensor_eigvals_ratio

        max_distance_to_local_center_of_mass = skin_r

        return new(
            shared,
            fill(SVector(0.0, 0.0), shared.n_particles),
            fill(0, shared.n_particles),
            zeros(shared.n_particles),
            max_distance_to_local_center_of_mass,
            false,
            SVector(0.0, 0.0),
            SVector(0.0, 0.0),
            SVector(0.0, 0.0),
            half_box_len,
            max_elliptical_distance,
        )
    end
end

function gen_env_helper(::LocalCOMEnv, env_helper_shared::EnvHelperSharedProps; args)
    return LocalCOMEnvHelper(env_helper_shared, args.half_box_len, args.skin_r)
end

function pre_integration_hook(env_helper::LocalCOMEnvHelper)
    @simd for id in 1:(env_helper.shared.n_particles)
        env_helper.vec_to_neighbour_sums[id] = SVector(0.0, 0.0)
        env_helper.n_neighbours[id] = 0
    end

    return nothing
end

function state_update_helper_hook(
    env_helper::LocalCOMEnvHelper, id1::Int64, id2::Int64, r⃗₁₂::SVector{2,Float64}
)
    env_helper.vec_to_neighbour_sums[id1] += r⃗₁₂
    env_helper.vec_to_neighbour_sums[id2] -= r⃗₁₂

    env_helper.n_neighbours[id1] += 1
    env_helper.n_neighbours[id2] += 1

    return nothing
end

function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Particle})
    n_particles = env_helper.shared.n_particles

    @turbo for id in 1:n_particles
        env_helper.shared.old_states_ind[id] = env_helper.shared.states_ind[id]
    end

    env = env_helper.shared.env

    distance_to_local_center_of_mass_sum = 0.0

    for id in 1:n_particles
        n_neighbours = env_helper.n_neighbours[id]

        if n_neighbours == 0
            state_ind = env.shared.n_states

            distance_to_local_center_of_mass_sum +=
                env_helper.max_distance_to_local_center_of_mass
        else
            vec_to_local_center_of_mass =
                env_helper.vec_to_neighbour_sums[id] / n_neighbours

            distance = ReCo.norm2d(vec_to_local_center_of_mass)

            env_helper.distances_to_local_center_of_mass[id] = distance

            distance_to_local_center_of_mass_sum += distance

            distance_state = find_state_interval(distance, env.distance_state_space)

            si, co = sincos(particles[id].φ)

            direction_angle = ReCo.angle2(SVector(co, si), vec_to_local_center_of_mass)

            direction_angle_state = find_state_interval(
                direction_angle, env.direction_angle_state_space
            )

            state = SVector{2,Interval}(distance_state, direction_angle_state)
            state_ind = find_state_ind(state, env.shared.state_space)
        end

        env_helper.shared.states_ind[id] = state_ind
    end

    mean_distance_to_local_center_of_mass =
        distance_to_local_center_of_mass_sum / n_particles
    env_helper.add_shape_reward_term =
        mean_distance_to_local_center_of_mass /
        env_helper.max_distance_to_local_center_of_mass < 0.32

    if env_helper.add_shape_reward_term
        #println(mean_distance_to_local_center_of_mass / env_helper.max_distance_to_local_center_of_mass) # TODO: Remove
    end

    env_helper.center_of_mass = ReCo.center_of_mass(particles, env_helper.half_box_len)

    v1, v2 = ReCo.gyration_tensor_eigvecs(
        particles, env_helper.half_box_len, env_helper.center_of_mass
    )

    env_helper.gyration_tensor_eigvec_to_smaller_eigval = v1
    env_helper.gyration_tensor_eigvec_to_bigger_eigval = v2

    return nothing
end

"""
    minimizing_reward(value::Float64, max_value::Float64)

Returns the reward such that it is 0 for value=max_value and 1 for value=0.
"""
function minimizing_reward(value::Float64, max_value::Float64)
    return (max_value - value) / (max_value + value)
end

function update_reward!(env::LocalCOMEnv, env_helper::LocalCOMEnvHelper, particle::Particle)
    id = particle.id

    normalization = env_helper.shared.n_particles # TODO: Add factor from steps
    n_neighbours = env_helper.n_neighbours[id]

    if n_neighbours == 0
        env.shared.reward = 0.0
    else
        reward = minimizing_reward(
            env_helper.distances_to_local_center_of_mass[id],
            env_helper.max_distance_to_local_center_of_mass,
        )

        if env_helper.add_shape_reward_term
            elliptical_distance = ReCo.elliptical_distance(
                particle.c,
                env_helper.center_of_mass,
                env_helper.gyration_tensor_eigvec_to_smaller_eigval,
                env_helper.gyration_tensor_eigvec_to_bigger_eigval,
                env_helper.shared.goal_gyration_tensor_eigvals_ratio,
                env_helper.half_box_len,
            )

            reward += unnormalized_reward(
                elliptical_distance,
                env_helper.max_elliptical_distance, # TODO: Fix sq
            )

            # println(elliptical_distance / env_helper.max_elliptical_distance) # TODO: Remove
        end

        env.shared.reward = reward / normalization
    end

    return nothing
end
Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`export LocalCOMEnv`

Fixed elliptical_distance 2022-01-14 13:01:14 +01:00			`using ..ReCo: Particle`

Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`struct LocalCOMEnv <: Env`
Added norm2d and sq_norm2d 2022-01-11 18:39:38 +01:00			`shared::EnvSharedProps`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00
			`distance_state_space::Vector{Interval}`
			`direction_angle_state_space::Vector{Interval}`

Added shape reward term 2022-01-14 12:28:47 +01:00			`function LocalCOMEnv(;`
			`n_distance_states::Int64=3, n_direction_angle_states::Int64=3, args`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`)`
			`@assert n_direction_angle_states > 1`

			`direction_angle_state_space = gen_angle_state_space(n_direction_angle_states)`

			`min_distance = 0.0`
Added shape reward term 2022-01-14 12:28:47 +01:00			`max_distance = args.skin_r`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00
			`distance_state_space = gen_distance_state_space(`
			`min_distance, max_distance, n_distance_states`
			`)`

			`n_states = n_distance_states * n_direction_angle_states + 1`

			`state_space = Vector{SVector{2,Interval}}(undef, n_states - 1)`

			`ind = 1`
			`for distance_state in distance_state_space`
			`for direction_angle_state in direction_angle_state_space`
			`state_space[ind] = SVector(distance_state, direction_angle_state)`
			`ind += 1`
			`end`
			`end`
			`# Last state is when no particle is in the skin radius`

Added norm2d and sq_norm2d 2022-01-11 18:39:38 +01:00			`shared = EnvSharedProps(n_states, state_space)`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00
Added shape reward term 2022-01-14 12:28:47 +01:00			`return new(shared, distance_state_space, direction_angle_state_space)`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`end`
			`end`

Added shape reward term 2022-01-14 12:28:47 +01:00			`mutable struct LocalCOMEnvHelper <: EnvHelper`
Added norm2d and sq_norm2d 2022-01-11 18:39:38 +01:00			`shared::EnvHelperSharedProps`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00
			`vec_to_neighbour_sums::Vector{SVector{2,Float64}}`
			`n_neighbours::Vector{Int64}`

Added shape reward term 2022-01-14 12:28:47 +01:00			`distances_to_local_center_of_mass::Vector{Float64}`
			`max_distance_to_local_center_of_mass::Float64`

			`add_shape_reward_term::Bool`

Fixed elliptical_distance 2022-01-14 13:01:14 +01:00			`center_of_mass::SVector{2,Float64}`
Added shape reward term 2022-01-14 12:28:47 +01:00			`gyration_tensor_eigvec_to_smaller_eigval::SVector{2,Float64}`
			`gyration_tensor_eigvec_to_bigger_eigval::SVector{2,Float64}`

			`half_box_len::Float64`
			`max_elliptical_distance::Float64`

			`function LocalCOMEnvHelper(shared::EnvHelperSharedProps, half_box_len::Float64, skin_r)`
			`max_elliptical_distance =`
			`sqrt(2) * half_box_len / shared.goal_gyration_tensor_eigvals_ratio`

			`max_distance_to_local_center_of_mass = skin_r`

Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`return new(`
Added norm2d and sq_norm2d 2022-01-11 18:39:38 +01:00			`shared,`
			`fill(SVector(0.0, 0.0), shared.n_particles),`
			`fill(0, shared.n_particles),`
			`zeros(shared.n_particles),`
Added shape reward term 2022-01-14 12:28:47 +01:00			`max_distance_to_local_center_of_mass,`
			`false,`
			`SVector(0.0, 0.0),`
			`SVector(0.0, 0.0),`
Fixed elliptical_distance 2022-01-14 13:01:14 +01:00			`SVector(0.0, 0.0),`
Added shape reward term 2022-01-14 12:28:47 +01:00			`half_box_len,`
			`max_elliptical_distance,`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`)`
			`end`
			`end`

Added shape reward term 2022-01-14 12:28:47 +01:00			`function gen_env_helper(::LocalCOMEnv, env_helper_shared::EnvHelperSharedProps; args)`
			`return LocalCOMEnvHelper(env_helper_shared, args.half_box_len, args.skin_r)`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`end`

			`function pre_integration_hook(env_helper::LocalCOMEnvHelper)`
Added norm2d and sq_norm2d 2022-01-11 18:39:38 +01:00			`@simd for id in 1:(env_helper.shared.n_particles)`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`env_helper.vec_to_neighbour_sums[id] = SVector(0.0, 0.0)`
			`env_helper.n_neighbours[id] = 0`
			`end`

			`return nothing`
			`end`

			`function state_update_helper_hook(`
			`env_helper::LocalCOMEnvHelper, id1::Int64, id2::Int64, r⃗₁₂::SVector{2,Float64}`
			`)`
			`env_helper.vec_to_neighbour_sums[id1] += r⃗₁₂`
			`env_helper.vec_to_neighbour_sums[id2] -= r⃗₁₂`

			`env_helper.n_neighbours[id1] += 1`
			`env_helper.n_neighbours[id2] += 1`

			`return nothing`
			`end`

			`function state_update_hook(env_helper::LocalCOMEnvHelper, particles::Vector{Particle})`
Added norm2d and sq_norm2d 2022-01-11 18:39:38 +01:00			`n_particles = env_helper.shared.n_particles`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00
Added shape reward term 2022-01-14 12:28:47 +01:00			`@turbo for id in 1:n_particles`
Added norm2d and sq_norm2d 2022-01-11 18:39:38 +01:00			`env_helper.shared.old_states_ind[id] = env_helper.shared.states_ind[id]`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`end`

Added norm2d and sq_norm2d 2022-01-11 18:39:38 +01:00			`env = env_helper.shared.env`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00
Added shape reward term 2022-01-14 12:28:47 +01:00			`distance_to_local_center_of_mass_sum = 0.0`

Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`for id in 1:n_particles`
			`n_neighbours = env_helper.n_neighbours[id]`

			`if n_neighbours == 0`
Added norm2d and sq_norm2d 2022-01-11 18:39:38 +01:00			`state_ind = env.shared.n_states`
Added shape reward term 2022-01-14 12:28:47 +01:00
			`distance_to_local_center_of_mass_sum +=`
			`env_helper.max_distance_to_local_center_of_mass`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`else`
			`vec_to_local_center_of_mass =`
			`env_helper.vec_to_neighbour_sums[id] / n_neighbours`

Fixed elliptical_distance 2022-01-14 13:01:14 +01:00			`distance = ReCo.norm2d(vec_to_local_center_of_mass)`
Added norm2d and sq_norm2d 2022-01-11 18:39:38 +01:00
Added shape reward term 2022-01-14 12:28:47 +01:00			`env_helper.distances_to_local_center_of_mass[id] = distance`

			`distance_to_local_center_of_mass_sum += distance`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00
			`distance_state = find_state_interval(distance, env.distance_state_space)`

			`si, co = sincos(particles[id].φ)`

Fixed elliptical_distance 2022-01-14 13:01:14 +01:00			`direction_angle = ReCo.angle2(SVector(co, si), vec_to_local_center_of_mass)`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00
			`direction_angle_state = find_state_interval(`
			`direction_angle, env.direction_angle_state_space`
			`)`

			`state = SVector{2,Interval}(distance_state, direction_angle_state)`
Added norm2d and sq_norm2d 2022-01-11 18:39:38 +01:00			`state_ind = find_state_ind(state, env.shared.state_space)`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`end`

Added norm2d and sq_norm2d 2022-01-11 18:39:38 +01:00			`env_helper.shared.states_ind[id] = state_ind`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`end`

Added shape reward term 2022-01-14 12:28:47 +01:00			`mean_distance_to_local_center_of_mass =`
			`distance_to_local_center_of_mass_sum / n_particles`
			`env_helper.add_shape_reward_term =`
			`mean_distance_to_local_center_of_mass /`
			`env_helper.max_distance_to_local_center_of_mass < 0.32`

			`if env_helper.add_shape_reward_term`
			`#println(mean_distance_to_local_center_of_mass / env_helper.max_distance_to_local_center_of_mass) # TODO: Remove`
			`end`

Fixed elliptical_distance 2022-01-14 13:01:14 +01:00			`env_helper.center_of_mass = ReCo.center_of_mass(particles, env_helper.half_box_len)`

			`v1, v2 = ReCo.gyration_tensor_eigvecs(`
			`particles, env_helper.half_box_len, env_helper.center_of_mass`
			`)`
Added shape reward term 2022-01-14 12:28:47 +01:00
			`env_helper.gyration_tensor_eigvec_to_smaller_eigval = v1`
			`env_helper.gyration_tensor_eigvec_to_bigger_eigval = v2`

Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`return nothing`
			`end`

Added shape reward term 2022-01-14 12:28:47 +01:00			`"""`
			`minimizing_reward(value::Float64, max_value::Float64)`

			`Returns the reward such that it is 0 for value=max_value and 1 for value=0.`
			`"""`
			`function minimizing_reward(value::Float64, max_value::Float64)`
			`return (max_value - value) / (max_value + value)`
			`end`

Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`function update_reward!(env::LocalCOMEnv, env_helper::LocalCOMEnvHelper, particle::Particle)`
			`id = particle.id`

Added shape reward term 2022-01-14 12:28:47 +01:00			`normalization = env_helper.shared.n_particles # TODO: Add factor from steps`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`n_neighbours = env_helper.n_neighbours[id]`
Added shape reward term 2022-01-14 12:28:47 +01:00
Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`if n_neighbours == 0`
Added shape reward term 2022-01-14 12:28:47 +01:00			`env.shared.reward = 0.0`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`else`
Added shape reward term 2022-01-14 12:28:47 +01:00			`reward = minimizing_reward(`
			`env_helper.distances_to_local_center_of_mass[id],`
			`env_helper.max_distance_to_local_center_of_mass,`
			`)`

			`if env_helper.add_shape_reward_term`
			`elliptical_distance = ReCo.elliptical_distance(`
Fixed elliptical_distance 2022-01-14 13:01:14 +01:00			`particle.c,`
			`env_helper.center_of_mass,`
Added shape reward term 2022-01-14 12:28:47 +01:00			`env_helper.gyration_tensor_eigvec_to_smaller_eigval,`
			`env_helper.gyration_tensor_eigvec_to_bigger_eigval,`
			`env_helper.shared.goal_gyration_tensor_eigvals_ratio,`
Fixed elliptical_distance 2022-01-14 13:01:14 +01:00			`env_helper.half_box_len,`
Added shape reward term 2022-01-14 12:28:47 +01:00			`)`

			`reward += unnormalized_reward(`
			`elliptical_distance,`
			`env_helper.max_elliptical_distance, # TODO: Fix sq`
			`)`

			`# println(elliptical_distance / env_helper.max_elliptical_distance) # TODO: Remove`
			`end`

			`env.shared.reward = reward / normalization`
Restructured code to include all environments 2022-01-11 01:31:30 +01:00			`end`

			`return nothing`
			`end`
No results found.