From 6fd1ff4a95d342f35f167d420894641b508b3e6f Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 13 Mar 2025 16:25:28 +0100
Subject: [PATCH 01/29] initialize the DynamicVehicleScheduling module

---
 src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl

diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
new file mode 100644
index 0000000..ef9cbb9
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -0,0 +1,6 @@
+module DynamicVehicleScheduling
+
+using ..Utils
+using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
+
+end

From c159731c4d9a6507a6ee4ac8d3711e1ec51b4ac7 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 3 Apr 2025 16:20:33 +0200
Subject: [PATCH 02/29] update

---
 src/DecisionFocusedLearningBenchmarks.jl      |  28 +-
 .../algorithms/anticipative_solver.jl         | 111 +++++++
 .../algorithms/prize_collecting_vsp.jl        | 215 +++++++++++++
 .../DynamicVSP/environment/environment.jl     | 293 ++++++++++++++++++
 .../DynamicVSP/environment/plot.jl            | 134 ++++++++
 .../DynamicVSP/environment/state.jl           |  89 ++++++
 .../DynamicVSP/learning/2d_features.jl        |  15 +
 .../DynamicVSP/learning/dataset.jl            |  37 +++
 .../DynamicVSP/learning/features.jl           |  75 +++++
 .../DynamicVSP/policy/abstract_vsp_policy.jl  |  33 ++
 .../DynamicVSP/policy/anticipative_policy.jl  |  18 ++
 .../DynamicVSP/policy/greedy_policy.jl        |  34 ++
 .../DynamicVSP/policy/kleopatra_policy.jl     |  53 ++++
 .../DynamicVSP/policy/lazy_policy.jl          |  34 ++
 .../DynamicVSP/utils.jl                       |  11 +
 .../DynamicVSP/vsp/instance.jl                |  65 ++++
 .../DynamicVSP/vsp/parsing.jl                 |  95 ++++++
 .../DynamicVSP/vsp/plot.jl                    |  39 +++
 .../DynamicVSP/vsp/solution.jl                |  50 +++
 .../DynamicVehicleScheduling.jl               |  67 ++++
 .../abstract_policy.jl                        |   5 +
 .../dynamic_config.jl                         |  24 ++
 src/DynamicVehicleScheduling/utils.jl         |  34 ++
 src/Utils/model_builders.jl                   |   1 -
 24 files changed, 1558 insertions(+), 2 deletions(-)
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/environment/environment.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/environment/plot.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/environment/state.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/utils.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/vsp/instance.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/vsp/parsing.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/vsp/plot.jl
 create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/vsp/solution.jl
 create mode 100644 src/DynamicVehicleScheduling/abstract_policy.jl
 create mode 100644 src/DynamicVehicleScheduling/dynamic_config.jl
 create mode 100644 src/DynamicVehicleScheduling/utils.jl

diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index 2122788..18cd94f 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -3,18 +3,42 @@ module DecisionFocusedLearningBenchmarks
 using DataDeps
 using Requires: @require
 
+function _euro_neurips_unpack(local_filepath)
+    directory = dirname(local_filepath)
+    unpack(local_filepath)
+    # Move instances and delete the rest
+    for filepath in readdir(
+        joinpath(directory, "euro-neurips-vrp-2022-quickstart-main", "instances"); join=true
+    )
+        if endswith(filepath, ".txt")
+            mv(filepath, joinpath(directory, basename(filepath)))
+        end
+    end
+    rm(joinpath(directory, "euro-neurips-vrp-2022-quickstart-main"); recursive=true)
+    return nothing
+end
+
 function __init__()
     # Register the Warcraft dataset
     ENV["DATADEPS_ALWAYS_ACCEPT"] = "true"
     register(
         DataDep(
             "warcraft",
-            "This is the warcraft dataset",
+            "Warcraft shortest path dataset",
             "http://cermics.enpc.fr/~bouvierl/warcraft_TP/data.zip";
             post_fetch_method=unpack,
         ),
     )
 
+    register(
+        DataDep(
+            "euro-neurips-2022",
+            "EURO-NeurIPs challenge 2022 dataset",
+            "https://github.com/ortec/euro-neurips-vrp-2022-quickstart/archive/refs/heads/main.zip";
+            post_fetch_method=_euro_neurips_unpack,
+        ),
+    )
+
     # Gurobi setup
     @info "If you have Gurobi installed and want to use it, make sure to `using Gurobi` in order to enable it."
     @require Gurobi = "2e9cd046-0924-5485-92f1-d5272153d98b" include("gurobi_setup.jl")
@@ -30,6 +54,7 @@ include("Warcraft/Warcraft.jl")
 include("FixedSizeShortestPath/FixedSizeShortestPath.jl")
 include("PortfolioOptimization/PortfolioOptimization.jl")
 include("StochasticVehicleScheduling/StochasticVehicleScheduling.jl")
+include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl")
 
 using .Utils
 using .Argmax
@@ -39,6 +64,7 @@ using .Warcraft
 using .FixedSizeShortestPath
 using .PortfolioOptimization
 using .StochasticVehicleScheduling
+using .DynamicVehicleScheduling
 
 # Interface
 export AbstractBenchmark, DataSample
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl
new file mode 100644
index 0000000..43a9edb
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl
@@ -0,0 +1,111 @@
+"""
+$TYPEDSIGNATURES
+
+Retrieve anticipative routes solution from the given MIP solution `y`.
+Outputs a set of routes per epoch.
+"""
+function retrieve_routes_anticipative(y::AbstractArray, dvspenv::DVSPEnv)
+    nb_tasks = length(dvspenv.customer_index)
+    (; first_epoch, last_epoch) = dvspenv.config
+    job_indices = 2:(nb_tasks)
+    epoch_indices = first_epoch:last_epoch
+
+    routes = [Vector{Int}[] for t in epoch_indices]
+    for t in epoch_indices
+        start = [i for i in job_indices if y[1, i, t] ≈ 1]
+        for task in start
+            route = Int[]
+            current_task = task
+            while current_task != 1 # < nb_tasks
+                push!(route, current_task)
+                local next_task
+                for i in 1:nb_tasks
+                    if isapprox(y[current_task, i, t], 1; atol=0.1)
+                        next_task = i
+                        break
+                    end
+                end
+                current_task = next_task
+            end
+            push!(routes[t], route)
+        end
+    end
+    return routes
+end
+
+"""
+$TYPEDSIGNATURES
+
+Solve the anticipative VSP problem for environment `env`.
+For this, it uses the current environment history, so make sure that the environment is terminated before calling this method.
+"""
+function anticipative_solver(env::DVSPEnv; model_builder=highs_model, draw_epochs=true)
+    draw_epochs && draw_all_epochs!(env)
+    (; customer_index, service_time, start_time, request_epoch) = env
+    duration = env.config.static_instance.duration[customer_index, customer_index]
+    (; first_epoch, last_epoch, epoch_duration, Δ_dispatch) = env.config
+
+    @assert first_epoch == 1
+
+    model = model_builder()
+    set_silent(model)
+
+    nb_nodes = length(customer_index)
+    job_indices = 2:nb_nodes
+    epoch_indices = first_epoch:last_epoch
+
+    @variable(model, y[i=1:nb_nodes, j=1:nb_nodes, t=epoch_indices]; binary=true)
+
+    @objective(
+        model,
+        Max,
+        sum(
+            -duration[i, j] * y[i, j, t] for i in 1:nb_nodes, j in 1:nb_nodes,
+            t in epoch_indices
+        )
+    )
+
+    # flow constraint per epoch
+    for t in epoch_indices, i in 1:nb_nodes
+        @constraint(
+            model,
+            sum(y[j, i, t] for j in 1:nb_nodes) == sum(y[i, j, t] for j in 1:nb_nodes)
+        )
+    end
+
+    # each task must be done once along the horizon
+    @constraint(
+        model,
+        demand[i in job_indices],
+        sum(y[j, i, t] for j in 1:nb_nodes, t in epoch_indices) == 1
+    )
+
+    # a trip from i can be planned only after request appeared
+    for i in job_indices, t in epoch_indices, j in 1:nb_nodes
+        if t < request_epoch[i]
+            @constraint(model, y[i, j, t] <= 0)
+        end
+    end
+
+    # a trip from i can be done only before limit date
+    for i in job_indices, t in epoch_indices, j in 1:nb_nodes
+        if (t - 1) * epoch_duration + duration[1, i] + Δ_dispatch > start_time[i]  # ! this only works if first_epoch = 1
+            @constraint(model, y[i, j, t] <= 0)
+        end
+    end
+
+    # trips can be planned if start, service and transport times enable it
+    for i in job_indices, t in epoch_indices, j in job_indices
+        if start_time[i] <= start_time[j]
+            if start_time[i] + service_time[i] + duration[i, j] > start_time[j]
+                @constraint(model, y[i, j, t] <= 0)
+            end
+        else
+            @constraint(model, y[i, j, t] <= 0)
+        end
+    end
+
+    optimize!(model)
+
+    return retrieve_routes_anticipative(value.(y), env)
+end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl
new file mode 100644
index 0000000..d1fbcce
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl
@@ -0,0 +1,215 @@
+"""
+$TYPEDSIGNATURES
+
+Create the acyclic digraph associated with the given VSP `instance`.
+"""
+function create_graph(instance::VSPInstance)
+    (; duration, start_time, service_time) = instance
+    # Initialize directed graph
+    nb_vertices = nb_locations(instance)
+    graph = SimpleDiGraph(nb_vertices)
+
+    depot = 1  # depot is always index 1
+    customers = 2:nb_vertices  # other vertices are customers
+
+    # Create existing edges
+    for i₁ in customers
+        # link every task to depot
+        add_edge!(graph, depot, i₁)
+        add_edge!(graph, i₁, depot)
+
+        t₁ = start_time[i₁]
+        for i₂ in (i₁ + 1):nb_vertices
+            t₂ = start_time[i₂]
+
+            if t₁ <= t₂
+                if t₁ + service_time[i₁] + duration[i₁, i₂] <= t₂
+                    add_edge!(graph, i₁, i₂)
+                end
+            else
+                if t₂ + service_time[i₂] + duration[i₂, i₁] <= t₁
+                    add_edge!(graph, i₂, i₁)
+                end
+            end
+        end
+    end
+
+    return graph
+end
+
+"""
+$TYPEDSIGNATURES
+
+Create the acyclic digraph associated with the given VSP `state`.
+"""
+function create_graph(state::VSPState)
+    return create_graph(state.instance)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Retrieve routes solution from the given MIP solution `y` matrix and `graph`.
+"""
+function retrieve_routes(y::AbstractArray, graph::AbstractGraph)
+    nb_tasks = nv(graph)
+    job_indices = 2:(nb_tasks)
+    routes = Vector{Int}[]
+
+    start = [i for i in job_indices if y[1, i] ≈ 1]
+    for task in start
+        route = Int[]
+        current_task = task
+        while current_task != 1 # < nb_tasks
+            push!(route, current_task)
+            local next_task
+            for i in outneighbors(graph, current_task)
+                if isapprox(y[current_task, i], 1; atol=0.1)
+                    next_task = i
+                    break
+                end
+            end
+            current_task = next_task
+        end
+        push!(routes, route)
+    end
+    return routes
+end
+
+"""
+$TYPEDSIGNATURES
+
+Solve the Prize Collecting Vehicle Scheduling Problem defined by `instance` and prize vector `θ`.
+"""
+function prize_collecting_vsp(
+    θ::AbstractVector; instance::VSPState, model_builder=highs_model, kwargs...
+)
+    (; duration) = instance.instance
+    graph = create_graph(instance)
+
+    model = model_builder()
+    set_silent(model)
+
+    nb_nodes = nv(graph)
+    job_indices = 2:(nb_nodes)
+
+    @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0)
+
+    θ_ext = fill(0.0, nb_locations(instance))  # no prize for must dispatch requests, only hard constraints
+    θ_ext[instance.is_postponable] .= θ
+
+    @objective(
+        model,
+        Max,
+        sum(
+            (θ_ext[dst(edge)] - duration[src(edge), dst(edge)]) * y[src(edge), dst(edge)]
+            for edge in edges(graph)
+        )
+    )
+    @constraint(
+        model,
+        flow[i in 2:nb_nodes],
+        sum(y[j, i] for j in inneighbors(graph, i)) ==
+            sum(y[i, j] for j in outneighbors(graph, i))
+    )
+    @constraint(
+        model, demand[i in job_indices], sum(y[j, i] for j in inneighbors(graph, i)) <= 1
+    )
+    # must dispatch constraints
+    @constraint(
+        model,
+        demand_must_dispatch[i in job_indices; instance.is_must_dispatch[i]],
+        sum(y[j, i] for j in inneighbors(graph, i)) == 1
+    )
+
+    optimize!(model)
+
+    return retrieve_routes(value.(y), graph)
+end
+
+# ?
+function prize_collecting_vsp_Q(
+    θ::AbstractVector,
+    vals::AbstractVector;
+    instance::VSPState,
+    model_builder=highs_model,
+    kwargs...,
+)
+    (; duration) = instance.instance
+    graph = create_graph(instance)
+    model = model_builder()
+    set_silent(model)
+    nb_nodes = nv(graph)
+    job_indices = 2:(nb_nodes)
+    @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0)
+    θ_ext = fill(0.0, nb_locations(instance.instance))  # no prize for must dispatch requests, only hard constraints
+    θ_ext[instance.is_postponable] .= θ
+    # v_ext = fill(0.0, nb_locations(instance.instance))  # no prize for must dispatch requests, only hard constraints
+    # v_ext[instance.is_postponable] .= vals
+    @objective(
+        model,
+        Max,
+        sum(
+            (θ_ext[dst(edge)] + vals[dst(edge)] - duration[src(edge), dst(edge)]) *
+            y[src(edge), dst(edge)] for edge in edges(graph)
+        )
+    )
+    @constraint(
+        model,
+        flow[i in 2:nb_nodes],
+        sum(y[j, i] for j in inneighbors(graph, i)) ==
+            sum(y[i, j] for j in outneighbors(graph, i))
+    )
+    @constraint(
+        model, demand[i in job_indices], sum(y[j, i] for j in inneighbors(graph, i)) <= 1
+    )
+    # must dispatch constraints
+    @constraint(
+        model,
+        demand_must_dispatch[i in job_indices; instance.is_must_dispatch[i]],
+        sum(y[j, i] for j in inneighbors(graph, i)) == 1
+    )
+    optimize!(model)
+    return retrieve_routes(value.(y), graph)
+end
+
+function my_objective_value(θ, routes; instance)
+    (; duration) = instance.instance
+    total = 0.0
+    θ_ext = fill(0.0, nb_locations(instance))
+    θ_ext[instance.is_postponable] .= θ
+    for route in routes
+        for (u, v) in partition(vcat(1, route), 2, 1)
+            total += θ_ext[v] - duration[u, v]
+        end
+    end
+    return -total
+end
+
+function _objective_value(θ, routes; instance)
+    (; duration) = instance.instance
+    total = 0.0
+    θ_ext = fill(0.0, nb_locations(instance))
+    θ_ext[instance.is_postponable] .= θ
+    mapping = cumsum(instance.is_postponable)
+    g = falses(length(θ))
+    for route in routes
+        for (u, v) in partition(vcat(1, route), 2, 1)
+            total -= duration[u, v]
+            if instance.is_postponable[v]
+                total += θ_ext[v]
+                g[mapping[v]] = 1
+            end
+        end
+    end
+    return -total, g
+end
+
+function ChainRulesCore.rrule(::typeof(my_objective_value), θ, routes; instance)
+    total, g = _objective_value(θ, routes; instance)
+    function pullback(dy)
+        g = g .* dy
+        return NoTangent(), g, NoTangent()
+    end
+    return total, pullback
+end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/environment/environment.jl b/src/DynamicVehicleScheduling/DynamicVSP/environment/environment.jl
new file mode 100644
index 0000000..189afb4
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/environment/environment.jl
@@ -0,0 +1,293 @@
+"""
+$TYPEDEF
+
+Environment data structure for the Dynamic Vehicle Scheduling Problem.
+
+# Fields
+$TYPEDFIELDS
+"""
+@kwdef mutable struct DVSPEnv{C<:DynamicConfig,R<:AbstractRNG,T,S<:VSPState}
+    "instance config as a [`DynamicConfig`](@ref)"
+    config::C
+    "current epoch number"
+    current_epoch::Int
+    "random number generator"
+    rng::R
+    "index of each customer in the static instance from the config"
+    customer_index::Vector{Int}
+    "service time values of each customer"
+    service_time::Vector{T}
+    "start time values of each customer"
+    start_time::Vector{T}
+    "1 if the request was already dispatched in a previous epoch, 0 otherwise"
+    request_is_dispatched::BitVector
+    "epoch index at which each request appearred"
+    request_epoch::Vector{Int}
+    "current state of environment"
+    state::S
+end
+
+"""
+$TYPEDSIGNATURES
+
+Constructor for [`DVSPEnv`](@ref).
+"""
+function DVSPEnv(
+    static_instance::VSPInstance;
+    seed=0,
+    max_requests_per_epoch=10,
+    Δ_dispatch=1.0,
+    epoch_duration=1.0,
+)
+    first_epoch = 1
+    last_epoch = trunc(Int, maximum(static_instance.start_time) / epoch_duration) - 1
+
+    config = DynamicConfig(;
+        static_instance,
+        max_requests_per_epoch,
+        Δ_dispatch,
+        epoch_duration,
+        seed,
+        first_epoch,
+        last_epoch,
+    )
+    return DVSPEnv(;
+        config,
+        customer_index=[1],
+        service_time=[0.0],
+        start_time=[0.0],
+        request_is_dispatched=falses(1),
+        state=VSPState(),
+        rng=MersenneTwister(seed),
+        current_epoch=0,
+        request_epoch=[first_epoch - 1],
+    )
+end
+
+"""
+$TYPEDSIGNATURES
+
+Return the indices of the epochs in the environment.
+"""
+function get_epoch_indices(env::DVSPEnv)
+    return (env.config.first_epoch):(env.config.last_epoch)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Return the number of epochs in the environment.
+"""
+function nb_epochs(env::DVSPEnv)
+    return length(get_epoch_indices(env))
+end
+
+"""
+$TYPEDSIGNATURES
+
+Get the current state of the environment.
+"""
+get_state(env::DVSPEnv) = env.state
+
+"""
+$TYPEDSIGNATURES
+
+Get the current time of the environment, i.e. the start time of the current_epoch.
+"""
+get_time(env::DVSPEnv) = (env.current_epoch - 1) * env.config.epoch_duration
+
+"""
+$TYPEDSIGNATURES
+
+Get the planning start time of the environment, i.e. the time at which vehicles routes dispatched in current epoch can depart.
+"""
+get_planning_start_time(env::DVSPEnv) = get_time(env) + env.config.Δ_dispatch
+
+"""
+$TYPEDSIGNATURES
+
+Check if the episode is terminated, i.e. if the current epoch is the last one.
+"""
+is_terminated(env::DVSPEnv) = env.current_epoch >= env.config.last_epoch
+
+"""
+$TYPEDSIGNATURES
+
+Return the total number of locations in the environment history.
+"""
+nb_locations(env::DVSPEnv) = length(env.customer_index)
+
+"""
+$TYPEDSIGNATURES
+
+Return a vector of env location indices that are still undispatched.
+"""
+get_undispatched_indices(env::DVSPEnv) = (1:nb_locations(env))[.!env.request_is_dispatched]
+
+"""
+$TYPEDSIGNATURES
+
+Reset the environment to its initial state.
+Also reset the seed if `reset_seed` is set to true.
+"""
+function reset!(env::DVSPEnv; reset_seed::Bool=true)
+    (; config) = env
+    env.current_epoch = config.first_epoch - 1
+    depot = 1
+    env.customer_index = [env.customer_index[depot]]
+    env.service_time = [env.service_time[depot]]
+    env.start_time = env.start_time[depot:depot]
+    env.request_is_dispatched = falses(1)
+    env.request_epoch = [env.current_epoch]
+    reset_seed && seed!(env.rng, config.seed)
+    return nothing
+end
+
+"""
+$TYPEDSIGNATURES
+
+Internal method that updates the state of the environment to correspond to env info.
+This is an internal method and should not be called directly.
+"""
+function update_state!(env::DVSPEnv)
+    (; config) = env
+    (; epoch_duration, static_instance, last_epoch) = config
+    (; duration) = static_instance
+    depot = 1
+
+    planning_start_time = get_planning_start_time(env)
+
+    # Must dispatch
+    undispatched_indices = get_undispatched_indices(env)
+    # If it's the last epoch, we must dispatch all remaining requests
+    is_must_dispatch = undispatched_indices .!= depot
+    # Else, only requests unreachable from the depot during next epoch are must dispatch
+    if env.current_epoch < last_epoch
+        is_must_dispatch =
+            planning_start_time .+ epoch_duration .+
+            @view(duration[depot, env.customer_index[undispatched_indices]]) .>
+            @view(env.start_time[undispatched_indices])
+        is_must_dispatch[1] = 0
+    end
+
+    is_postponable = falses(length(is_must_dispatch))
+    is_postponable[2:end] .= .!is_must_dispatch[2:end]
+
+    epoch_instance = VSPState(;
+        instance=VSPInstance(;
+            service_time=env.service_time[undispatched_indices],
+            start_time=env.start_time[undispatched_indices] .- planning_start_time,  # shift start times to planning start time
+            coordinate=static_instance.coordinate[env.customer_index[undispatched_indices]],
+            duration=duration[
+                env.customer_index[undispatched_indices],
+                env.customer_index[undispatched_indices],
+            ],
+        ),
+        is_must_dispatch,
+        is_postponable,
+    )
+
+    env.state = epoch_instance
+    return epoch_instance
+end
+
+"""
+$TYPEDSIGNATURES
+
+Update `env` by drawing the next epoch and returning a corresponding `EpochInstance`.
+"""
+function next_epoch!(env::DVSPEnv)
+    # Increment epoch number
+    env.current_epoch += 1
+
+    # Retrieve useful information
+    (; rng, config) = env
+    (; max_requests_per_epoch, static_instance) = config
+    (; duration, service_time, start_time) = config.static_instance
+    depot = 1
+
+    # Draw new requests uniformly from static instance
+    N = nb_customers(static_instance)
+
+    planning_start_time = get_planning_start_time(env)
+
+    coordinate_indices = sample_indices(rng, max_requests_per_epoch, N)
+    start_time_indices = sample_indices(rng, max_requests_per_epoch, N)
+    service_time_indices = sample_indices(rng, max_requests_per_epoch, N)
+
+    # Only keep requests with feasible start times (rejection sampling)
+    # i.e. that are reachable from the depot before their start time
+    is_feasible =
+        planning_start_time .+ duration[depot, coordinate_indices] .<=
+        start_time[start_time_indices]
+
+    # Update environment state
+    nb_new_requests = sum(is_feasible)
+
+    # Update environment by adding new requests in
+    env.customer_index = vcat(env.customer_index, coordinate_indices[is_feasible])
+    env.service_time = vcat(
+        env.service_time, service_time[service_time_indices[is_feasible]]
+    )
+    env.start_time = vcat(env.start_time, start_time[start_time_indices[is_feasible]])
+    env.request_is_dispatched = vcat(env.request_is_dispatched, falses(nb_new_requests))
+    env.request_epoch = vcat(env.request_epoch, fill(env.current_epoch, nb_new_requests))
+
+    # Finally, update the state of the environment with these new requests
+    return update_state!(env)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Transform state routes indices into env route indices.
+"""
+function env_routes_from_state_routes(env, routes)
+    undispatched_indices = get_undispatched_indices(env)
+    return [undispatched_indices[route] for route in routes]
+end
+
+"""
+$TYPEDSIGNATURES
+
+Transform env route indices into state route indices.
+"""
+function state_route_from_env_routes(env, routes)
+    nb_requests = length(env.customer_index)
+    undispatched_indices = (1:nb_requests)[.!env.request_is_dispatched]
+    global_to_local = zeros(Int, nb_requests)
+    for (local_i, global_i) in enumerate(undispatched_indices)
+        global_to_local[global_i] = local_i
+    end
+    return [global_to_local[route] for route in routes]
+end
+
+"""
+$TYPEDSIGNATURES
+
+Apply given `routes` as an action to `env`.
+
+Routes should be given with global indexation.
+Use [`env_routes_from_state_routes`](@ref) if needed to convert the indices beforehand.
+"""
+function apply_decision!(env::DVSPEnv, routes::Vector{Vector{Int}})
+    for route in routes
+        env.request_is_dispatched[route] .= true
+    end
+    duration = @view env.config.static_instance.duration[
+        env.customer_index, env.customer_index
+    ]
+    return cost(routes, duration)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Draw all epochs until the end of the environment, without any actions.
+"""
+function draw_all_epochs!(env::DVSPEnv; reset_env=true)
+    reset_env && reset!(env)
+    while !is_terminated(env)
+        next_epoch!(env)
+    end
+end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/environment/plot.jl b/src/DynamicVehicleScheduling/DynamicVSP/environment/plot.jl
new file mode 100644
index 0000000..60b6d90
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/environment/plot.jl
@@ -0,0 +1,134 @@
+"""
+$TYPEDSIGNATURES
+
+Plot the environment of a DVSPEnv, restricted to the given `epoch_indices` (all epoch if not given).
+"""
+function plot_environment(
+    env::DVSPEnv;
+    customer_markersize=4,
+    depot_markersize=7,
+    alpha_depot=0.8,
+    depot_color=:lightgreen,
+    epoch_indices=nothing,
+    kwargs...,
+)
+    draw_all_epochs!(env)
+
+    epoch_appearance = env.request_epoch
+    coordinates = coordinate(get_state(env))
+
+    epoch_indices = isnothing(epoch_indices) ? get_epoch_indices(env) : epoch_indices
+
+    xlims = (minimum(c.x for c in coordinates), maximum(c.x for c in coordinates))
+    ylims = (minimum(c.y for c in coordinates), maximum(c.y for c in coordinates))
+
+    fig = plot(;
+        legend=:topleft,
+        xlabel="x coordinate",
+        ylabel="y coordinate",
+        xlims,
+        ylims,
+        kwargs...,
+    )
+
+    for epoch in epoch_indices
+        requests = findall(epoch_appearance .== epoch)
+        x = [coordinates[request].x for request in requests]
+        y = [coordinates[request].y for request in requests]
+        scatter!(
+            fig, x, y; label="Epoch $epoch", marker=:circle, markersize=customer_markersize
+        )
+    end
+    scatter!(
+        fig,
+        [coordinates[1].x],
+        [coordinates[1].y];
+        label="Depot",
+        markercolor=depot_color,
+        marker=:rect,
+        markersize=depot_markersize,
+        alpha=alpha_depot,
+    )
+
+    return fig
+end
+
+"""
+$TYPEDSIGNATURES
+
+Plot the given `routes`` for a VSP `state`.
+"""
+function plot_epoch(state::VSPState, routes; kwargs...)
+    (; coordinate, start_time) = state.instance
+    x_depot = coordinate[1].x
+    y_depot = coordinate[1].y
+    X = [p.x for p in coordinate]
+    Y = [p.y for p in coordinate]
+    markersize = 5
+    fig = plot(;
+        legend=:topleft, xlabel="x", ylabel="y", clim=(0.0, maximum(start_time)), kwargs...
+    )
+    for route in routes
+        x_points = vcat(x_depot, X[route], x_depot)
+        y_points = vcat(y_depot, Y[route], y_depot)
+        plot!(fig, x_points, y_points; label=nothing)
+    end
+    scatter!(
+        fig,
+        [x_depot],
+        [y_depot];
+        label="depot",
+        markercolor=:lightgreen,
+        markersize,
+        marker=:rect,
+    )
+    if sum(state.is_postponable) > 0
+        scatter!(
+            fig,
+            X[state.is_postponable],
+            Y[state.is_postponable];
+            label="Postponable customers",
+            marker_z=start_time[state.is_postponable],
+            markersize,
+            colormap=:turbo,
+            marker=:utriangle,
+        )
+    end
+    if sum(state.is_must_dispatch) > 0
+        scatter!(
+            fig,
+            X[state.is_must_dispatch],
+            Y[state.is_must_dispatch];
+            label="Must-dispatch customers",
+            marker_z=start_time[state.is_must_dispatch],
+            markersize,
+            colormap=:turbo,
+            marker=:star5,
+        )
+    end
+    return fig
+end
+
+"""
+$TYPEDSIGNATURES
+
+Create a plot of routes for each epoch.
+"""
+function plot_routes(env::DVSPEnv, routes; epoch_indices=nothing, kwargs...)
+    reset!(env)
+    epoch_indices = isnothing(epoch_indices) ? get_epoch_indices(env) : epoch_indices
+
+    coordinates = env.config.static_instance.coordinate
+    xlims = (minimum(c.x for c in coordinates), maximum(c.x for c in coordinates))
+    ylims = (minimum(c.y for c in coordinates), maximum(c.y for c in coordinates))
+
+    figs = map(epoch_indices) do epoch
+        s = next_epoch!(env)
+        fig = plot_epoch(
+            s, state_route_from_env_routes(env, routes[epoch]); xlims, ylims, kwargs...
+        )
+        apply_decision!(env, routes[epoch])
+        return fig
+    end
+    return figs
+end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/environment/state.jl b/src/DynamicVehicleScheduling/DynamicVSP/environment/state.jl
new file mode 100644
index 0000000..dddb076
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/environment/state.jl
@@ -0,0 +1,89 @@
+"""
+$TYPEDSIGNATURES
+
+State data structure for the Dynamic Vehicle Scheduling Problem.
+"""
+@kwdef struct VSPState{I}
+    "associated (static) vehicle scheduling instance"
+    instance::I = VSPInstance()
+    "for each location, 1 if the request must be dispatched, 0 otherwise. The depot is always 0."
+    is_must_dispatch::BitVector = falses(0)
+    "for each location, 1 if the request can be postponed, 0 otherwise. The depot is always 0."
+    is_postponable::BitVector = falses(0)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Return the number of locations in `state` (customers + depot).
+"""
+nb_locations(state::VSPState) = nb_locations(state.instance)
+
+"""
+$TYPEDSIGNATURES
+
+Return the number of customers in `state`.
+"""
+nb_customers(state::VSPState) = nb_customers(state.instance)
+
+"""
+$TYPEDSIGNATURES
+
+Get the service time vector
+"""
+service_time(state::VSPState) = service_time(state.instance)
+
+"""
+$TYPEDSIGNATURES
+
+Get the coordinates vector.
+"""
+coordinate(state::VSPState) = coordinate(state.instance)
+
+"""
+$TYPEDSIGNATURES
+
+Get the duration matrix.
+"""
+duration(state::VSPState) = duration(state.instance)
+
+"""
+$TYPEDSIGNATURES
+
+Get the start time vector.
+"""
+start_time(state::VSPState) = start_time(state.instance)
+
+"""
+$TYPEDSIGNATURES
+
+Check if the given routes are feasible.
+Routes should be given with global indexation.
+Use [`env_routes_from_state_routes`](@ref) if needed to convert the indices beforehand.
+"""
+function is_feasible(state::VSPState, routes::Vector{Vector{Int}}; verbose::Bool=false)
+    (; is_must_dispatch, instance) = state
+    (; duration, start_time, service_time) = instance
+    is_dispatched = falses(length(is_must_dispatch))
+
+    # Check that routes follow time constraints
+    for route in routes
+        is_dispatched[route] .= true
+        current = 1  # start at the depot
+        current_time = start_time[current]
+        for next in route
+            current_time += duration[current, next]
+            if current_time > start_time[next]
+                verbose &&
+                    @warn "Route $route is infeasible: time constraint violated at location $next"
+                return false
+            end
+            current_time += service_time[next]
+            current = next
+        end
+    end
+
+    # Check that all must dispatch requests are dispatched
+    return all(is_dispatched[is_must_dispatch])
+    return true
+end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl b/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl
new file mode 100644
index 0000000..941468a
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl
@@ -0,0 +1,15 @@
+function get_features_meanTimeToRequests(env::DVSPEnv)
+    quantiles = [0.5]
+    a = env.config.static_instance.duration[
+        env.customer_index[.!env.request_is_dispatched], 2:end
+    ]
+    quantileTimeToRequests = mapslices(x -> quantile(x, quantiles), a; dims=2)
+    return quantileTimeToRequests
+end
+
+function compute_2D_features(env::DVSPEnv)
+    state = env.state
+    timeDepotRequest = state.instance.duration[:, 1][state.is_postponable]
+    quantileTimeToRequests = get_features_meanTimeToRequests(env)[state.is_postponable]
+    return hcat(timeDepotRequest, quantileTimeToRequests)'
+end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl b/src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl
new file mode 100644
index 0000000..8f80a44
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl
@@ -0,0 +1,37 @@
+function load_VSP_dataset(
+    datadir::String; model_builder=highs_model, use_2D_features=false, kwargs...
+)
+    instances_files = filtered_readdir(datadir)
+    X = Tuple{Matrix{Float32},VSPState{VSPInstance{Float64}}}[]
+    Y = BitMatrix[]
+
+    for (i, f) in enumerate(instances_files)
+        static_instance = read_vsp_instance((joinpath(datadir, f)))
+        env = DVSPEnv(static_instance; seed=i, kwargs...)
+
+        # Compute the anticipative policy
+        routes_anticipative = anticipative_solver(env; model_builder)
+        reset!(env)
+        for e in eachindex(routes_anticipative)
+            next_epoch!(env)
+            # Store the state
+            state = env.state
+            features = Matrix(
+                use_2D_features ? compute_2D_features(env) : compute_features(env)
+            )
+            push!(X, (features, state))
+            routes = routes_anticipative[e]
+            # Store the solution
+            push!(
+                Y,
+                VSPSolution(
+                    state_route_from_env_routes(env, routes);
+                    max_index=nb_locations(state.instance),
+                ).edge_matrix,
+            )
+            # Update the environment
+            apply_decision!(env, routes)
+        end
+    end
+    return X, Y
+end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl b/src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl
new file mode 100644
index 0000000..348b816
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl
@@ -0,0 +1,75 @@
+function get_features_quantileTimeToRequests(env::DVSPEnv)
+    quantiles = [i * 0.1 for i in 1:9]
+    a = env.config.static_instance.duration[
+        env.customer_index[.!env.request_is_dispatched], 2:end
+    ]
+    quantileTimeToRequests = mapslices(x -> quantile(x, quantiles), a; dims=2)
+    return quantileTimeToRequests
+end
+
+function compute_model_free_features(state::VSPState; env::DVSPEnv)
+    (; instance, is_postponable) = state
+
+    startTimes = instance.start_time
+    endTimes = startTimes .+ instance.service_time
+    timeDepotRequest = instance.duration[:, 1]
+    timeRequestDepot = instance.duration[1, :]
+
+    slack_next_epoch = startTimes .- env.config.epoch_duration
+
+    model_free_features = hcat(
+        startTimes[is_postponable], # 1
+        endTimes[is_postponable], # 2
+        timeDepotRequest[is_postponable], # 3
+        timeRequestDepot[is_postponable], # 4
+        slack_next_epoch[is_postponable], # 5-14
+    )
+    return model_free_features
+end
+
+function compute_model_aware_features(state::VSPState; env::DVSPEnv)
+    quantileTimeToRequests = get_features_quantileTimeToRequests(env)
+    model_aware_features = quantileTimeToRequests
+    return model_aware_features[state.is_postponable, :]
+end
+
+function compute_features(env::DVSPEnv)
+    state = env.state
+    model_free_features = compute_model_free_features(state; env)
+    model_aware_features = compute_model_aware_features(state; env)
+    return hcat(model_free_features, model_aware_features)'
+end
+
+# ? why is this needed
+function model_free_features_critic(state::VSPState; env::DVSPEnv)
+    (; instance) = state
+    startTimes = instance.start_time
+    endTimes = instance.service_time .+ instance.start_time
+    timeDepotRequest = instance.duration[:, 1]
+    timeRequestDepot = instance.duration[1, :]
+    slack_next_epoch = startTimes .- env.config.epoch_duration
+    model_free_features = hcat(
+        startTimes, endTimes, timeDepotRequest, timeRequestDepot, slack_next_epoch
+    )
+    return model_free_features
+end
+
+# ?
+function compute_critic_features(env::DVSPEnv)
+    state = env.state
+    model_free_features = model_free_features_critic(state; env)
+    model_aware_features = get_features_quantileTimeToRequests(env)
+    postpon = state.is_postponable
+    return hcat(model_free_features, model_aware_features, postpon)'
+end
+
+# ?
+function compute_critic_2D_features(env::DVSPEnv)
+    state = env.state
+    timeDepotRequest = state.instance.duration[:, 1]
+    quantileTimeToRequests = get_features_meanTimeToRequests(env)
+    postpon = state.is_postponable
+    # time_postpon = timeDepotRequest .* postpon
+    # quant_postpon = quantileTimeToRequests .* postpon
+    return hcat(timeDepotRequest, quantileTimeToRequests, postpon)'
+end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl
new file mode 100644
index 0000000..63d8030
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl
@@ -0,0 +1,33 @@
+"""
+$TYPEDEF
+
+Abstract type for dynamic VSP policies.
+"""
+abstract type AbstractDynamicVSPPolicy <: AbstractDynamicPolicy end
+
+"""
+$TYPEDSIGNATURES
+
+Apply the policy to the environment.
+"""
+function run_policy!(
+    π::AbstractDynamicVSPPolicy, env::DVSPEnv; check_feasibility=true, kwargs...
+)
+    # reset environment, and initialize variables
+    reset!(env)
+    total_cost = 0
+    epoch_routes = Vector{Vector{Int}}[]
+
+    # epoch loop
+    while !is_terminated(env)
+        next_epoch!(env)
+        state_routes = π(env; kwargs...)
+        check_feasibility && @assert is_feasible(get_state(env), state_routes)
+        env_routes = env_routes_from_state_routes(env, state_routes)
+        push!(epoch_routes, env_routes)
+        local_cost = apply_decision!(env, env_routes)
+        total_cost += local_cost
+    end
+
+    return total_cost, epoch_routes
+end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl
new file mode 100644
index 0000000..b6751c7
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl
@@ -0,0 +1,18 @@
+"""
+$TYPEDEF
+
+Anticipative policy for the Dynamic Vehicle Scheduling Problem.
+"""
+struct AnticipativeVSPPolicy <: AbstractDynamicPolicy end
+
+"""
+$TYPEDSIGNATURES
+
+Apply the anticipative policy to the environment.
+"""
+function run_policy!(::AnticipativeVSPPolicy, env::DVSPEnv; model_builder=highs_model)
+    routes_anticipative = anticipative_solver(env; model_builder)
+    duration = env.config.static_instance.duration[env.customer_index, env.customer_index]
+    anticipative_costs = [cost(routes, duration) for routes in routes_anticipative]
+    return sum(anticipative_costs), routes_anticipative
+end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl
new file mode 100644
index 0000000..f6c1654
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl
@@ -0,0 +1,34 @@
+"""
+$TYPEDEF
+
+Greedy policy for the Dynamic Vehicle Scheduling Problem.
+Dispatch customers as soon as they appear.
+"""
+struct GreedyVSPPolicy <: AbstractDynamicVSPPolicy end
+
+function (π::GreedyVSPPolicy)(env::DVSPEnv; model_builder=highs_model)
+    nb_postponable_requests = sum(get_state(env).is_postponable)
+    θ = ones(nb_postponable_requests) * 1e9
+    routes = prize_collecting_vsp(θ; instance=get_state(env), model_builder)
+    return routes
+end
+
+# function run_policy!(π::GreedyVSPPolicy, env::DVSPEnv; check_feasibility=true, kwargs...)
+#     # reset environment, and initialize variables
+#     reset!(env)
+#     total_cost = 0
+#     epoch_routes = Vector{Vector{Int}}[]
+
+#     # epoch loop
+#     while !is_terminated(env)
+#         next_epoch!(env)
+#         state_routes = π(env; kwargs...)
+#         check_feasibility && @assert is_feasible(get_state(env), state_routes)
+#         env_routes = env_routes_from_state_routes(env, state_routes)
+#         push!(epoch_routes, env_routes)
+#         local_cost = apply_decision!(env, env_routes)
+#         total_cost += local_cost
+#     end
+
+#     return total_cost, epoch_routes
+# end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl
new file mode 100644
index 0000000..0a3708f
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl
@@ -0,0 +1,53 @@
+"""
+$TYPEDEF
+
+Kleopatra policy for the Dynamic Vehicle Scheduling Problem.
+"""
+struct KleopatraVSPPolicy{P} <: AbstractDynamicVSPPolicy
+    prize_predictor::P
+    has_2D_features::Bool
+end
+
+"""
+$TYPEDSIGNATURES
+
+Custom constructor for [`KleopatraVSPPolicy`](@ref).
+"""
+function KleopatraVSPPolicy(prize_predictor; has_2D_features=nothing)
+    has_2D_features = if isnothing(has_2D_features)
+        size(prize_predictor[1].weight, 2) == 2
+    else
+        has_2D_features
+    end
+    return KleopatraVSPPolicy(prize_predictor, has_2D_features)
+end
+
+function (π::KleopatraVSPPolicy)(env::DVSPEnv; model_builder=highs_model)
+    (; prize_predictor, has_2D_features) = π
+    x = has_2D_features ? compute_2D_features(env) : compute_features(env)
+    θ = prize_predictor(x)
+    routes = prize_collecting_vsp(θ; instance=get_state(env), model_builder)
+    return routes
+end
+
+# function run_policy!(
+#     π::KleopatraVSP, env::DVSPEnv; check_feasibility=true, model_builder=highs_model
+# )
+#     # reset environment, and initialize variables
+#     reset!(env)
+#     total_cost = 0
+#     epoch_routes = Vector{Vector{Int}}[]
+
+#     # epoch loop
+#     while !is_terminated(env)
+#         next_epoch!(env)
+#         state_routes = π(env; model_builder)
+#         check_feasibility && @assert is_feasible(get_state(env), state_routes)
+#         env_routes = env_routes_from_state_routes(env, state_routes)
+#         push!(epoch_routes, env_routes)
+#         local_cost = apply_decision!(env, env_routes)
+#         total_cost += local_cost
+#     end
+
+#     return total_cost, epoch_routes
+# end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl
new file mode 100644
index 0000000..5ce71ca
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl
@@ -0,0 +1,34 @@
+"""
+$TYPEDEF
+
+Lazy policy for the Dynamic Vehicle Scheduling Problem.
+Dispatch customers only when necessary (i.e. must-dispatch).
+"""
+struct LazyVSPPolicy <: AbstractDynamicVSPPolicy end
+
+function (π::LazyVSPPolicy)(env::DVSPEnv; model_builder=highs_model)
+    nb_postponable_requests = sum(get_state(env).is_postponable)
+    θ = ones(nb_postponable_requests) * -1e9
+    routes = prize_collecting_vsp(θ; instance=get_state(env), model_builder)
+    return routes
+end
+
+# function run_policy!(π::LazyVSPPolicy, env::DVSPEnv; check_feasibility=true, kwargs...)
+#     # reset environment, and initialize variables
+#     reset!(env)
+#     total_cost = 0
+#     epoch_routes = Vector{Vector{Int}}[]
+
+#     # epoch loop
+#     while !is_terminated(env)
+#         next_epoch!(env)
+#         state_routes = π(env; kwargs...)
+#         check_feasibility && @assert is_feasible(get_state(env), state_routes)
+#         env_routes = env_routes_from_state_routes(env, state_routes)
+#         push!(epoch_routes, env_routes)
+#         local_cost = apply_decision!(env, env_routes)
+#         total_cost += local_cost
+#     end
+
+#     return total_cost, epoch_routes
+# end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/utils.jl b/src/DynamicVehicleScheduling/DynamicVSP/utils.jl
new file mode 100644
index 0000000..1be5e4d
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/utils.jl
@@ -0,0 +1,11 @@
+"""
+$TYPEDEF
+
+Basic point structure.
+"""
+struct Point{T}
+    x::T
+    y::T
+end
+
+Base.show(io::IO, p::Point) = print(io, "($(p.x), $(p.y))")
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/vsp/instance.jl b/src/DynamicVehicleScheduling/DynamicVSP/vsp/instance.jl
new file mode 100644
index 0000000..512a0fe
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/vsp/instance.jl
@@ -0,0 +1,65 @@
+"""
+$TYPEDEF
+
+Instance data structure for the (deterministic and static) Vehicle Scheduling Problem.
+
+# Fields
+$TYPEDFIELDS
+"""
+@kwdef struct VSPInstance{T}
+    "coordinates of the locations. The first one is always the depot."
+    coordinate::Vector{Point{T}} = Point{Float64}[]
+    "service time at each location"
+    service_time::Vector{T} = Float64[]
+    "start time at each location"
+    start_time::Vector{T} = Float64[]
+    "duration matrix between locations"
+    duration::Matrix{T} = zeros(Float64, 0, 0)
+end
+
+function Base.show(io::IO, instance::VSPInstance)
+    N = nb_customers(instance)
+    return print(io, "VSPInstance with $N customers")
+end
+
+"""
+$TYPEDSIGNATURES
+
+Return the number of locations in `instance` (customers + depot).
+"""
+nb_locations(instance::VSPInstance) = length(instance.coordinate)
+
+"""
+$TYPEDSIGNATURES
+
+Return the number of customers in `instance` (excluding the depot).
+"""
+nb_customers(instance::VSPInstance) = nb_locations(instance) - 1
+
+"""
+$TYPEDSIGNATURES
+
+Get the service time vector.
+"""
+service_time(instance::VSPInstance) = instance.service_time
+
+"""
+$TYPEDSIGNATURES
+
+Get the coordinates vector.
+"""
+coordinate(instance::VSPInstance) = instance.coordinate
+
+"""
+$TYPEDSIGNATURES
+
+Get the duration matrix.
+"""
+duration(instance::VSPInstance) = instance.duration
+
+"""
+$TYPEDSIGNATURES
+
+Get the start time vector.
+"""
+start_time(instance::VSPInstance) = instance.start_time
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/vsp/parsing.jl b/src/DynamicVehicleScheduling/DynamicVSP/vsp/parsing.jl
new file mode 100644
index 0000000..21589fd
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/vsp/parsing.jl
@@ -0,0 +1,95 @@
+"""
+$TYPEDSIGNATURES
+
+Create a `VSPInstance` from file `filepath` containing a VRPTW instance.
+It uses time window values to compute task times as the middle of the interval.
+
+Round all values to `Int` if `rounded=true`.
+Normalize all time values by the `normalization` parameter.
+"""
+function read_vsp_instance(filepath::String; rounded::Bool=false, normalization=3600.0)
+    type = rounded ? Int : Float64
+    mode = ""
+    local edge_weight_type
+    local edge_weight_format
+    duration_matrix = Vector{type}[]
+    nb_locations = 0
+    local demand
+    local service_time
+    local coordinates
+    local start_time
+
+    file = open(filepath, "r")
+    for line in eachline(file)
+        line = strip(line, [' ', '\n', '\t'])
+        if line == ""
+            continue
+        elseif startswith(line, "DIMENSION")
+            nb_locations = parse(Int, split(line, " : ")[2])
+            demand = zeros(type, nb_locations)
+            service_time = zeros(type, nb_locations)
+            coordinates = zeros(type, (nb_locations, 2))
+            start_time = zeros(type, nb_locations)
+        elseif startswith(line, "EDGE_WEIGHT_TYPE")
+            edge_weight_type = split(line, " : ")[2]
+        elseif startswith(line, "EDGE_WEIGHT_FORMAT")
+            edge_weight_format = split(line, " : ")[2]
+        elseif startswith(line, "NODE_COORD_SECTION")
+            mode = "coord"
+        elseif line == "DEMAND_SECTION"
+            mode = "demand"
+        elseif line == "DEPOT_SECTION"
+            mode = "depot"
+        elseif line == "EDGE_WEIGHT_SECTION"
+            mode = "edge_weights"
+            @assert edge_weight_type == "EXPLICIT"
+            @assert edge_weight_format == "FULL_MATRIX"
+        elseif line == "TIME_WINDOW_SECTION"
+            mode = "time_windows"
+        elseif line == "SERVICE_TIME_SECTION"
+            mode = "service_t"
+        elseif line == "EOF"
+            break
+        elseif mode == "coord"
+            node, x, y = split(line)  # Split by whitespace or \t, skip duplicate whitespace
+            node = parse(Int, node)
+            x, y = (parse(type, x), parse(type, y))
+            coordinates[node, :] = [x, y]
+        elseif mode == "demand"
+            node, d = split(line)
+            node, d = parse(Int, node), parse(type, d)
+            if node == 1 # depot
+                @assert d == 0
+            end
+            demand[node] = d
+        elseif mode == "edge_weights"
+            push!(duration_matrix, [parse(type, e) for e in split(line)])
+        elseif mode == "service_t"
+            node, t = split(line)
+            node = parse(Int, node)
+            t = parse(type, t)
+            if node == 1 # depot
+                @assert t == 0
+            end
+            service_time[node] = t
+        elseif mode == "time_windows"
+            node, l, u = split(line)
+            node = parse(Int, node)
+            l, u = parse(type, l), parse(type, u)
+            start_time[node] = (u + l) / 2
+        end
+    end
+    close(file)
+
+    duration = mapreduce(permutedims, vcat, duration_matrix)
+
+    coordinate = [
+        Point(x / normalization, y / normalization) for
+        (x, y) in zip(coordinates[:, 1], coordinates[:, 2])
+    ]
+    service_time ./= normalization
+    start_time ./= normalization
+    duration ./= normalization
+
+    return VSPInstance(; coordinate, service_time, start_time, duration)
+end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/vsp/plot.jl b/src/DynamicVehicleScheduling/DynamicVSP/vsp/plot.jl
new file mode 100644
index 0000000..a9f03de
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/vsp/plot.jl
@@ -0,0 +1,39 @@
+"""
+$TYPEDSIGNATURES
+
+Plot the given static VSP `instance`.
+"""
+function plot_instance(
+    instance::VSPInstance;
+    customer_markersize=4,
+    depot_markersize=7,
+    alpha_depot=0.8,
+    customer_color=:lightblue,
+    depot_color=:lightgreen,
+    kwargs...,
+)
+    x = [p.x for p in instance.coordinate]
+    y = [p.y for p in instance.coordinate]
+
+    fig = plot(; legend=:topleft, xlabel="x coordinate", ylabel="y coordinate", kwargs...)
+    scatter!(
+        fig,
+        x[2:end],
+        y[2:end];
+        label="Customers",
+        markercolor=customer_color,
+        marker=:circle,
+        markersize=customer_markersize,
+    )
+    scatter!(
+        fig,
+        [x[1]],
+        [y[1]];
+        label="Depot",
+        markercolor=depot_color,
+        marker=:rect,
+        markersize=depot_markersize,
+        alpha=alpha_depot,
+    )
+    return fig
+end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/vsp/solution.jl b/src/DynamicVehicleScheduling/DynamicVSP/vsp/solution.jl
new file mode 100644
index 0000000..d6fb25e
--- /dev/null
+++ b/src/DynamicVehicleScheduling/DynamicVSP/vsp/solution.jl
@@ -0,0 +1,50 @@
+"""
+$TYPEDEF
+
+Solution for the static Vehicle Scheduling Problem.
+
+# Fields
+$TYPEDFIELDS
+"""
+struct VSPSolution
+    "list of routes, each route being a list of request indices in corresponding instance (excluding the depot)."
+    routes::Vector{Vector{Int}}
+    "size (nb_locations, nb_locations). `edge_matrix[i, j]` is equal to 1 if a route takes edge `(i, j)`."
+    edge_matrix::BitMatrix
+end
+
+"""
+$TYPEDSIGNATURES
+
+Get routes from `solution`.
+"""
+routes(solution::VSPSolution) = solution.routes
+
+"""
+$TYPEDSIGNATURES
+
+Get edge matrix from `solution`.
+"""
+edge_matrix(solution::VSPSolution) = solution.edge_matrix
+
+"""
+$TYPEDSIGNATURES
+
+Build a `VSPSolution` from routes. Set `max_index` to manually define the size of the `edge_index` matrix.
+"""
+function VSPSolution(routes::Vector{Vector{Int}}; max_index=nothing)
+    if length(routes) == 0 && isnothing(max_index)
+        return VSPSolution(routes, falses(0, 0))
+    end
+    N = isnothing(max_index) ? maximum(maximum(route) for route in routes) : max_index
+    edge_matrix = falses(N, N)
+    for route in routes
+        old = 1
+        for r in route
+            edge_matrix[old, r] = true
+            old = r
+        end
+        edge_matrix[old, 1] = true
+    end
+    return VSPSolution(routes, edge_matrix)
+end
diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index ef9cbb9..329ea44 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -1,6 +1,73 @@
 module DynamicVehicleScheduling
 
 using ..Utils
+
+using Base: @kwdef
+using ChainRulesCore
 using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
+using Graphs
+using HiGHS
+using InferOpt
+using IterTools: partition
+using JSON
+using JuMP
+using Plots: plot, plot!, scatter!
+using Printf: @printf
+using Random: AbstractRNG, MersenneTwister, seed!, randperm
+using Requires: @require
+using Statistics: mean, quantile
+
+include("utils.jl")
+include("dynamic_config.jl")
+include("abstract_policy.jl")
+
+# Dynamic Vehicle Scheduling
+include("DynamicVSP/utils.jl")
+
+include("DynamicVSP/vsp/instance.jl")
+include("DynamicVSP/vsp/parsing.jl")
+include("DynamicVSP/vsp/solution.jl")
+include("DynamicVSP/vsp/plot.jl")
+
+include("DynamicVSP/environment/state.jl")
+include("DynamicVSP/environment/environment.jl")
+include("DynamicVSP/environment/plot.jl")
+
+include("DynamicVSP/algorithms/prize_collecting_vsp.jl")
+include("DynamicVSP/algorithms/anticipative_solver.jl")
+
+include("DynamicVSP/learning/features.jl")
+include("DynamicVSP/learning/2d_features.jl")
+include("DynamicVSP/learning/dataset.jl")
+
+include("DynamicVSP/policy/abstract_vsp_policy.jl")
+include("DynamicVSP/policy/greedy_policy.jl")
+include("DynamicVSP/policy/lazy_policy.jl")
+include("DynamicVSP/policy/anticipative_policy.jl")
+include("DynamicVSP/policy/kleopatra_policy.jl")
+
+export highs_model, filtered_readdir
+
+export solve_hindsight_problem
+
+export AbstractDynamicPolicy, BasicDynamicPolicy
+
+export GreedyPolicy, LazyPolicy, RandomPolicy, Kleopatra
+
+export run_policy
+
+export compute_features,
+    compute_2D_features, compute_critic_features, compute_critic_2D_features, load_dataset
 
+export VSPInstance,
+    read_vsp_instance, start_time, env_routes_from_state_routes, state_route_from_env_routes
+export DVSPEnv, prize_collecting_vsp
+export anticipative_solver
+export VSPSolution
+export load_VSP_dataset
+export GreedyVSPPolicy,
+    LazyVSPPolicy, AnticipativeVSPPolicy, run_policy!, KleopatraVSPPolicy
+export plot_routes, plot_instance, plot_environment, plot_epoch
+export get_state
+export nb_epochs, get_epoch_indices
 end
diff --git a/src/DynamicVehicleScheduling/abstract_policy.jl b/src/DynamicVehicleScheduling/abstract_policy.jl
new file mode 100644
index 0000000..951efd0
--- /dev/null
+++ b/src/DynamicVehicleScheduling/abstract_policy.jl
@@ -0,0 +1,5 @@
+abstract type AbstractDynamicPolicy end
+
+function (π::AbstractDynamicPolicy)(env; kwargs...)
+    throw("Not implemented")
+end
diff --git a/src/DynamicVehicleScheduling/dynamic_config.jl b/src/DynamicVehicleScheduling/dynamic_config.jl
new file mode 100644
index 0000000..3d052bf
--- /dev/null
+++ b/src/DynamicVehicleScheduling/dynamic_config.jl
@@ -0,0 +1,24 @@
+"""
+$TYPEDEF
+
+Config data structures for dynamic vehicle routing and scheduling problems.
+
+# Fields
+$TYPEDFIELDS
+"""
+@kwdef struct DynamicConfig{I,S,T}
+    "static instance to sample arriving requests from"
+    static_instance::I
+    "max number of new requests per epoch (rejection sampling)"
+    max_requests_per_epoch::Int = 100
+    "time distance between epoch start and routes start"
+    Δ_dispatch::T = 3600
+    "duration of each epoch"
+    epoch_duration::T = 3600
+    "first epoch index (time = epoch_duration x first_epoch)"
+    first_epoch::Int
+    "last epoch index"
+    last_epoch::Int
+    "seed for customer sampling"
+    seed::S
+end
diff --git a/src/DynamicVehicleScheduling/utils.jl b/src/DynamicVehicleScheduling/utils.jl
new file mode 100644
index 0000000..1e17906
--- /dev/null
+++ b/src/DynamicVehicleScheduling/utils.jl
@@ -0,0 +1,34 @@
+"""
+$TYPEDSIGNATURES
+
+Sample k random different indices from 2 to N+1.
+"""
+sample_indices(rng::AbstractRNG, k, N) = randperm(rng, N)[1:k] .+ 1
+
+"""
+$TYPEDSIGNATURES
+
+Compute the total cost of a set of routes given a distance matrix, i.e. the sum of the distances between each location in the route.
+Note that the first location is implicitly assumed to be the depot, and should not appear in the route.
+"""
+function cost(routes::Vector{Vector{Int}}, duration::AbstractMatrix)
+    total = zero(eltype(duration))
+    for route in routes
+        current_location = 1
+        for r in route
+            total += duration[current_location, r]
+            current_location = r
+        end
+        total += duration[current_location, 1]
+    end
+    return total
+end
+
+# """
+# $TYPEDSIGNATURES
+
+# Readdir all files in `data` with extension `file_extension`.
+# """
+# function filtered_readdir(data, file_extension=".txt"; kwargs...)
+#     return filter(x -> endswith(x, file_extension), readdir(data; kwargs...))
+# end
diff --git a/src/Utils/model_builders.jl b/src/Utils/model_builders.jl
index 95df58b..4f0c838 100644
--- a/src/Utils/model_builders.jl
+++ b/src/Utils/model_builders.jl
@@ -5,7 +5,6 @@ Initialize a HiGHS model (with disabled logging).
 """
 function highs_model()
     model = Model(HiGHS.Optimizer)
-    # set_attribute(model, "log_to_console", false)
     return model
 end
 

From 2f406e0e85e8ea84f8985deadf30b1854e4d4190 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Tue, 22 Apr 2025 10:29:56 +0200
Subject: [PATCH 03/29] wip

---
 src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index 329ea44..8f24828 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -3,7 +3,7 @@ module DynamicVehicleScheduling
 using ..Utils
 
 using Base: @kwdef
-using ChainRulesCore
+# using ChainRulesCore
 using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
 using Graphs
 using HiGHS

From 5a2e85274c9fd685238ae2681e302676eeae10ae Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 3 Jul 2025 09:44:53 +0200
Subject: [PATCH 04/29] Implement generate_sample interface

---
 Project.toml                                  |  4 ++
 src/Argmax/Argmax.jl                          | 42 ++++++++++++-------
 src/DecisionFocusedLearningBenchmarks.jl      |  6 +--
 .../algorithms/prize_collecting_vsp.jl        | 16 +++----
 .../DynamicVehicleScheduling.jl               |  2 +-
 .../FixedSizeShortestPath.jl                  | 41 +++++-------------
 .../PortfolioOptimization.jl                  | 17 +++++++-
 src/Ranking/Ranking.jl                        | 20 ++++-----
 .../StochasticVehicleScheduling.jl            | 39 +++++++----------
 src/SubsetSelection/SubsetSelection.jl        | 42 ++++++++-----------
 src/Utils/Utils.jl                            |  7 ++--
 src/Utils/interface.jl                        | 29 ++++++++++++-
 test/subset_selection.jl                      |  7 ++--
 13 files changed, 144 insertions(+), 128 deletions(-)

diff --git a/Project.toml b/Project.toml
index d9500c6..ed7fde9 100644
--- a/Project.toml
+++ b/Project.toml
@@ -13,6 +13,8 @@ Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
 HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
 Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0"
 Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
+IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
+JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Metalhead = "dbeba491-748d-5e0e-a39e-b530a07fa0cc"
@@ -37,6 +39,8 @@ Graphs = "1.11"
 HiGHS = "1.9"
 Images = "0.26.1"
 Ipopt = "1.6"
+IterTools = "1.10.0"
+JSON = "0.21.4"
 JuMP = "1.22"
 LinearAlgebra = "1"
 Metalhead = "0.9.4"
diff --git a/src/Argmax/Argmax.jl b/src/Argmax/Argmax.jl
index 0864a96..a9590df 100644
--- a/src/Argmax/Argmax.jl
+++ b/src/Argmax/Argmax.jl
@@ -62,25 +62,39 @@ end
 
 """
 $TYPEDSIGNATURES
-
-Generate a dataset of labeled instances for the argmax problem.
 """
-function Utils.generate_dataset(
-    bench::ArgmaxBenchmark, dataset_size::Int=10; seed::Int=0, noise_std=0.0
+function Utils.generate_sample(
+    bench::ArgmaxBenchmark, rng::AbstractRNG; noise_std::Float32=0.0f0
 )
     (; instance_dim, nb_features, encoder) = bench
-    rng = MersenneTwister(seed)
-    features = [randn(rng, Float32, nb_features, instance_dim) for _ in 1:dataset_size]
-    costs = encoder.(features)
-    noisy_solutions = [
-        one_hot_argmax(θ + noise_std * randn(rng, Float32, instance_dim)) for θ in costs
-    ]
-    return [
-        DataSample(; x, θ_true, y_true) for
-        (x, θ_true, y_true) in zip(features, costs, noisy_solutions)
-    ]
+    features = randn(rng, Float32, nb_features, instance_dim)
+    costs = encoder(features)
+    noisy_solution = one_hot_argmax(costs + noise_std * randn(rng, Float32, instance_dim))
+    return DataSample(; x=features, θ_true=costs, y_true=noisy_solution)
 end
 
+# """
+# $TYPEDSIGNATURES
+
+# Generate a dataset of labeled instances for the argmax problem.
+# """
+# function Utils.generate_dataset(
+#     bench::ArgmaxBenchmark, dataset_size::Int; noise_std=0.0, kwargs...
+# )
+#     return Utils.generate_dataset(bench, dataset_size; noise_std=noise_std, kwargs...)
+#     # (; instance_dim, nb_features, encoder) = bench
+#     # rng = MersenneTwister(seed)
+#     # features = [randn(rng, Float32, nb_features, instance_dim) for _ in 1:dataset_size]
+#     # costs = encoder.(features)
+#     # noisy_solutions = [
+#     #     one_hot_argmax(θ + noise_std * randn(rng, Float32, instance_dim)) for θ in costs
+#     # ]
+#     # return [
+#     #     DataSample(; x, θ_true, y_true) for
+#     #     (x, θ_true, y_true) in zip(features, costs, noisy_solutions)
+#     # ]
+# end
+
 """
 $TYPEDSIGNATURES
 
diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index 18cd94f..74bb04a 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -54,7 +54,7 @@ include("Warcraft/Warcraft.jl")
 include("FixedSizeShortestPath/FixedSizeShortestPath.jl")
 include("PortfolioOptimization/PortfolioOptimization.jl")
 include("StochasticVehicleScheduling/StochasticVehicleScheduling.jl")
-include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl")
+# include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl")
 
 using .Utils
 using .Argmax
@@ -64,10 +64,10 @@ using .Warcraft
 using .FixedSizeShortestPath
 using .PortfolioOptimization
 using .StochasticVehicleScheduling
-using .DynamicVehicleScheduling
+# using .DynamicVehicleScheduling
 
 # Interface
-export AbstractBenchmark, DataSample
+export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample
 export generate_dataset
 export generate_statistical_model
 export generate_maximizer, maximizer_kwargs
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl
index d1fbcce..531169c 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl
+++ b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl
@@ -205,11 +205,11 @@ function _objective_value(θ, routes; instance)
     return -total, g
 end
 
-function ChainRulesCore.rrule(::typeof(my_objective_value), θ, routes; instance)
-    total, g = _objective_value(θ, routes; instance)
-    function pullback(dy)
-        g = g .* dy
-        return NoTangent(), g, NoTangent()
-    end
-    return total, pullback
-end
+# function ChainRulesCore.rrule(::typeof(my_objective_value), θ, routes; instance)
+#     total, g = _objective_value(θ, routes; instance)
+#     function pullback(dy)
+#         g = g .* dy
+#         return NoTangent(), g, NoTangent()
+#     end
+#     return total, pullback
+# end
diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index 8f24828..e1fddbc 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -7,7 +7,7 @@ using Base: @kwdef
 using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
 using Graphs
 using HiGHS
-using InferOpt
+# using InferOpt
 using IterTools: partition
 using JSON
 using JuMP
diff --git a/src/FixedSizeShortestPath/FixedSizeShortestPath.jl b/src/FixedSizeShortestPath/FixedSizeShortestPath.jl
index fd60de2..ed799ea 100644
--- a/src/FixedSizeShortestPath/FixedSizeShortestPath.jl
+++ b/src/FixedSizeShortestPath/FixedSizeShortestPath.jl
@@ -103,45 +103,24 @@ function Utils.generate_maximizer(bench::FixedSizeShortestPathBenchmark; use_dij
     return shortest_path_maximizer
 end
 
-"""
-$TYPEDSIGNATURES
-
-Generate dataset for the shortest path problem.
-"""
-function Utils.generate_dataset(
-    bench::FixedSizeShortestPathBenchmark,
-    dataset_size::Int=10;
-    seed::Int=0,
-    type::Type=Float32,
+function Utils.generate_sample(
+    bench::FixedSizeShortestPathBenchmark, rng::AbstractRNG; type::Type=Float32
 )
-    # Set seed
-    rng = MersenneTwister(seed)
     (; graph, p, deg, ν) = bench
-
+    features = randn(rng, Float32, bench.p)
     E = Graphs.ne(graph)
-
-    # Features
-    features = [randn(rng, type, p) for _ in 1:dataset_size]
-
     # True weights
     B = rand(rng, Bernoulli(0.5), E, p)
     ξ = if ν == 0.0
-        [ones(type, E) for _ in 1:dataset_size]
+        ones(type, E)
     else
-        [rand(rng, Uniform{type}(1 - ν, 1 + ν), E) for _ in 1:dataset_size]
+        rand(rng, Uniform{type}(1 - ν, 1 + ν), E)
     end
-    costs = [
-        -(1 .+ (3 .+ B * zᵢ ./ type(sqrt(p))) .^ deg) .* ξᵢ for (ξᵢ, zᵢ) in zip(ξ, features)
-    ]
-
-    shortest_path_maximizer = Utils.generate_maximizer(bench)
-
-    # Label solutions
-    solutions = shortest_path_maximizer.(costs)
-    return [
-        DataSample(; x, θ_true, y_true) for
-        (x, θ_true, y_true) in zip(features, costs, solutions)
-    ]
+    costs = -(1 .+ (3 .+ B * features ./ type(sqrt(p))) .^ deg) .* ξ
+
+    maximizer = Utils.generate_maximizer(bench)
+    solution = maximizer(costs)
+    return DataSample(; x=features, θ_true=costs, y_true=solution)
 end
 
 """
diff --git a/src/PortfolioOptimization/PortfolioOptimization.jl b/src/PortfolioOptimization/PortfolioOptimization.jl
index 308770a..32c32bc 100644
--- a/src/PortfolioOptimization/PortfolioOptimization.jl
+++ b/src/PortfolioOptimization/PortfolioOptimization.jl
@@ -7,7 +7,7 @@ using Flux: Chain, Dense
 using Ipopt: Ipopt
 using JuMP: @variable, @objective, @constraint, optimize!, value, Model, set_silent
 using LinearAlgebra: I
-using Random: MersenneTwister
+using Random: AbstractRNG, MersenneTwister
 
 """
 $TYPEDEF
@@ -82,6 +82,21 @@ function Utils.generate_maximizer(bench::PortfolioOptimizationBenchmark)
     return portfolio_maximizer
 end
 
+function Utils.generate_sample(
+    bench::PortfolioOptimizationBenchmark, rng::AbstractRNG; type::Type=Float32
+)
+    (; d, p, deg, ν, L, f) = bench
+    features = randn(rng, type, p, d)
+    B = rand(rng, Bernoulli(0.5), d, p)
+    c̄ = (0.05 / type(sqrt(p)) .* B * features .+ 0.1^(1 / deg)) .^ deg
+    costs = c̄ .+ L * f .+ 0.01 * ν * randn(rng, type, d)
+
+    maximizer = Utils.generate_maximizer(bench)
+    solution = maximizer(costs)
+
+    return DataSample(; x=features, θ_true=c̄, y_true=solution)
+end
+
 """
 $TYPEDSIGNATURES
 
diff --git a/src/Ranking/Ranking.jl b/src/Ranking/Ranking.jl
index 8b93b8a..c6ec398 100644
--- a/src/Ranking/Ranking.jl
+++ b/src/Ranking/Ranking.jl
@@ -61,22 +61,16 @@ end
 """
 $TYPEDSIGNATURES
 
-Generate a dataset of labeled instances for the ranking problem.
+Generate a labeled sample for the ranking problem.
 """
-function Utils.generate_dataset(
-    bench::RankingBenchmark, dataset_size::Int=10; seed::Int=0, noise_std=0.0
+function Utils.generate_sample(
+    bench::RankingBenchmark, rng::AbstractRNG; noise_std::Float32=0.0f0
 )
     (; instance_dim, nb_features, encoder) = bench
-    rng = MersenneTwister(seed)
-    features = [randn(rng, Float32, nb_features, instance_dim) for _ in 1:dataset_size]
-    costs = encoder.(features)
-    noisy_solutions = [
-        ranking(θ .+ noise_std * randn(rng, Float32, instance_dim)) for θ in costs
-    ]
-    return [
-        DataSample(; x, θ_true, y_true) for
-        (x, θ_true, y_true) in zip(features, costs, noisy_solutions)
-    ]
+    features = randn(rng, Float32, nb_features, instance_dim)
+    costs = encoder(features)
+    noisy_solution = ranking(costs .+ noise_std * randn(rng, Float32, instance_dim))
+    return DataSample(; x=features, θ_true=costs, y_true=noisy_solution)
 end
 
 """
diff --git a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
index 150f147..83da492 100644
--- a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
+++ b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
@@ -73,43 +73,32 @@ end
 """
 $TYPEDSIGNATURES
 
-Create a dataset of `dataset_size` instances for the given `StochasticVehicleSchedulingBenchmark`.
-If you want to not add label solutions in the dataset, set `compute_solutions=false`.
+Generate a sample for the given `StochasticVehicleSchedulingBenchmark`.
+If you want to not add label solutions in the sample, set `compute_solutions=false`.
 By default, they will be computed using column generation.
 Note that computing solutions can be time-consuming, especially for large instances.
 You can also use instead `compact_mip` or `compact_linearized_mip` as the algorithm to compute solutions.
 If you want to provide a custom algorithm to compute solutions, you can pass it as the `algorithm` keyword argument.
 If `algorithm` takes keyword arguments, you can pass them as well directly in `kwargs...`.
-If `store_city=false`, the coordinates and unnecessary information about instances will not be stored in the dataset.
+If `store_city=false`, the coordinates and unnecessary information about instances will not be stored in the sample.
 """
-function Utils.generate_dataset(
+function Utils.generate_sample(
     benchmark::StochasticVehicleSchedulingBenchmark,
-    dataset_size::Int;
+    rng::AbstractRNG;
+    store_city=true,
     compute_solutions=true,
-    seed=nothing,
-    rng=MersenneTwister(0),
     algorithm=column_generation_algorithm,
-    store_city=true,
     kwargs...,
 )
     (; nb_tasks, nb_scenarios) = benchmark
-    Random.seed!(rng, seed)
-    instances = [
-        Instance(; nb_tasks, nb_scenarios, rng, store_city) for _ in 1:dataset_size
-    ]
-    features = get_features.(instances)
-    if compute_solutions
-        solutions = [algorithm(instance; kwargs...).value for instance in instances]
-        return [
-            DataSample(; x=feature, instance, y_true=solution) for
-            (instance, feature, solution) in zip(instances, features, solutions)
-        ]
+    instance = Instance(; nb_tasks, nb_scenarios, rng, store_city)
+    x = get_features(instance)
+    y_true = if compute_solutions
+        algorithm(instance; kwargs...).value  # TODO: modify algorithms to directly return the solution
+    else
+        nothing
     end
-    # else
-    return [
-        DataSample(; x=feature, instance) for
-        (instance, feature) in zip(instances, features)
-    ]
+    return DataSample(; x, instance, y_true)
 end
 
 """
@@ -126,7 +115,7 @@ end
 $TYPEDSIGNATURES
 """
 function Utils.generate_maximizer(
-    bench::StochasticVehicleSchedulingBenchmark; model_builder=highs_model
+    ::StochasticVehicleSchedulingBenchmark; model_builder=highs_model
 )
     return StochasticVechicleSchedulingMaximizer(model_builder)
 end
diff --git a/src/SubsetSelection/SubsetSelection.jl b/src/SubsetSelection/SubsetSelection.jl
index 0e738a5..085324d 100644
--- a/src/SubsetSelection/SubsetSelection.jl
+++ b/src/SubsetSelection/SubsetSelection.jl
@@ -17,11 +17,13 @@ without knowing their values, but only observing some features.
 # Fields
 $TYPEDFIELDS
 """
-struct SubsetSelectionBenchmark <: AbstractBenchmark
+struct SubsetSelectionBenchmark{M} <: AbstractBenchmark
     "total number of items"
     n::Int
     "number of items to select"
     k::Int
+    "hidden unknown mapping from features to costs"
+    mapping::M
 end
 
 function Base.show(io::IO, bench::SubsetSelectionBenchmark)
@@ -29,9 +31,14 @@ function Base.show(io::IO, bench::SubsetSelectionBenchmark)
     return print(io, "SubsetSelectionBenchmark(n=$n, k=$k)")
 end
 
-function SubsetSelectionBenchmark(; n::Int=25, k::Int=5)
+function SubsetSelectionBenchmark(; n::Int=25, k::Int=5, identity_mapping::Bool=true)
     @assert n >= k "number of items n must be greater than k"
-    return SubsetSelectionBenchmark(n, k)
+    mapping = if identity_mapping
+        copy
+    else
+        Dense(n => n; bias=false)
+    end
+    return SubsetSelectionBenchmark(n, k, mapping)
 end
 
 function top_k(v::AbstractVector, k::Int)
@@ -54,29 +61,14 @@ end
 """
 $TYPEDSIGNATURES
 
-Generate a dataset of labeled instances for the subset selection problem.
-The mapping between features and cost is identity.
+Generate a labeled instance for the subset selection problem.
 """
-function Utils.generate_dataset(
-    bench::SubsetSelectionBenchmark,
-    dataset_size::Int=10;
-    seed::Int=0,
-    identity_mapping=true,
-)
-    (; n, k) = bench
-    rng = MersenneTwister(seed)
-    features = [randn(rng, Float32, n) for _ in 1:dataset_size]
-    costs = if identity_mapping
-        copy(features)  # we assume that the cost is the same as the feature
-    else
-        mapping = Dense(n => n; bias=false)
-        mapping.(features)
-    end
-    solutions = top_k.(costs, k)
-    return [
-        DataSample(; x, θ_true, y_true) for
-        (x, θ_true, y_true) in zip(features, costs, solutions)
-    ]
+function Utils.generate_sample(bench::SubsetSelectionBenchmark, rng::AbstractRNG)
+    (; n, k, mapping) = bench
+    features = randn(rng, Float32, n)
+    costs = mapping(features)
+    solution = top_k(costs, k)
+    return DataSample(; x=features, θ_true=costs, y_true=solution)
 end
 
 """
diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl
index 60b5b92..58bc161 100644
--- a/src/Utils/Utils.jl
+++ b/src/Utils/Utils.jl
@@ -5,6 +5,7 @@ using Flux: softplus
 using HiGHS: HiGHS
 using JuMP: Model
 using LinearAlgebra: dot
+using Random: Random, MersenneTwister
 using SCIP: SCIP
 using SimpleWeightedGraphs: SimpleWeightedDiGraph
 using StatsBase: StatsBase
@@ -18,9 +19,9 @@ include("model_builders.jl")
 
 export DataSample
 
-export AbstractBenchmark
-export generate_dataset,
-    generate_statistical_model, generate_maximizer, plot_data, compute_gap
+export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark
+export generate_dataset, generate_statistical_model, generate_maximizer, generate_sample
+export plot_data, compute_gap
 export maximizer_kwargs
 export grid_graph, get_path, path_to_matrix
 export neg_tensor, squeeze_last_dims, average_tensor
diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl
index 96a2a39..5f4188c 100644
--- a/src/Utils/interface.jl
+++ b/src/Utils/interface.jl
@@ -15,13 +15,40 @@ The following methods are optional:
 """
 abstract type AbstractBenchmark end
 
+"""
+$TYPEDEF
+"""
+abstract type AbstractStochasticBenchmark <: AbstractBenchmark end
+
+"""
+$TYPEDEF
+"""
+abstract type AbstractDynamicBenchmark <: AbstractStochasticBenchmark end
+
+"""
+    generate_sample(::AbstractBenchmark, rng::AbstractRNG; kwargs...)
+
+Do not always exist, interface to make [`generate_dataset`](@ref) work.
+Either implement this or generate_dataset.
+"""
+function generate_sample end
+
 """
     generate_dataset(::AbstractBenchmark, dataset_size::Int; kwargs...) -> Vector{<:DataSample}
 
 Generate a `Vector` of [`DataSample`](@ref)  of length `dataset_size` for given benchmark.
 Content of the dataset can be visualized using [`plot_data`](@ref), when it applies.
 """
-function generate_dataset end
+function generate_dataset(
+    bench::AbstractBenchmark,
+    dataset_size::Int;
+    seed=nothing,
+    rng=MersenneTwister(0),
+    kwargs...,
+)
+    Random.seed!(rng, seed)
+    return [generate_sample(bench, rng; kwargs...) for _ in 1:dataset_size]
+end
 
 """
     generate_maximizer(::AbstractBenchmark; kwargs...)
diff --git a/test/subset_selection.jl b/test/subset_selection.jl
index 694f7f4..d59ae54 100644
--- a/test/subset_selection.jl
+++ b/test/subset_selection.jl
@@ -4,14 +4,15 @@
     n = 25
     k = 5
 
-    b = SubsetSelectionBenchmark(; n=n, k=k)
+    b_identity = SubsetSelectionBenchmark(; n=n, k=k)
+    b = SubsetSelectionBenchmark(; n=n, k=k, identity_mapping=false)
 
     io = IOBuffer()
     show(io, b)
     @test String(take!(io)) == "SubsetSelectionBenchmark(n=25, k=5)"
 
-    dataset = generate_dataset(b, 50)
-    dataset2 = generate_dataset(b, 50; identity_mapping=false)
+    dataset = generate_dataset(b_identity, 50)
+    dataset2 = generate_dataset(b, 50)
     model = generate_statistical_model(b)
     maximizer = generate_maximizer(b)
 

From fe9482fef8a4e7a1c9f16135959d1fbc464d243e Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 3 Jul 2025 10:06:13 +0200
Subject: [PATCH 05/29] update docstrings

---
 src/Argmax/Argmax.jl                          | 26 +++-------------
 .../FixedSizeShortestPath.jl                  |  5 ++++
 .../PortfolioOptimization.jl                  |  5 ++++
 .../StochasticVehicleScheduling.jl            |  2 +-
 .../solution/algorithms/column_generation.jl  |  2 +-
 .../solution/algorithms/deterministic_mip.jl  |  2 +-
 .../solution/algorithms/local_search.jl       |  2 +-
 .../solution/algorithms/mip.jl                |  4 +--
 src/Utils/interface.jl                        | 30 +++++++++++++++----
 9 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/src/Argmax/Argmax.jl b/src/Argmax/Argmax.jl
index a9590df..27663c5 100644
--- a/src/Argmax/Argmax.jl
+++ b/src/Argmax/Argmax.jl
@@ -62,6 +62,10 @@ end
 
 """
 $TYPEDSIGNATURES
+
+Generate a data sample for the argmax benchmark.
+This function generates a random feature matrix, computes the costs using the encoder,
+and adds noise to the costs before computing a target solution.
 """
 function Utils.generate_sample(
     bench::ArgmaxBenchmark, rng::AbstractRNG; noise_std::Float32=0.0f0
@@ -73,28 +77,6 @@ function Utils.generate_sample(
     return DataSample(; x=features, θ_true=costs, y_true=noisy_solution)
 end
 
-# """
-# $TYPEDSIGNATURES
-
-# Generate a dataset of labeled instances for the argmax problem.
-# """
-# function Utils.generate_dataset(
-#     bench::ArgmaxBenchmark, dataset_size::Int; noise_std=0.0, kwargs...
-# )
-#     return Utils.generate_dataset(bench, dataset_size; noise_std=noise_std, kwargs...)
-#     # (; instance_dim, nb_features, encoder) = bench
-#     # rng = MersenneTwister(seed)
-#     # features = [randn(rng, Float32, nb_features, instance_dim) for _ in 1:dataset_size]
-#     # costs = encoder.(features)
-#     # noisy_solutions = [
-#     #     one_hot_argmax(θ + noise_std * randn(rng, Float32, instance_dim)) for θ in costs
-#     # ]
-#     # return [
-#     #     DataSample(; x, θ_true, y_true) for
-#     #     (x, θ_true, y_true) in zip(features, costs, noisy_solutions)
-#     # ]
-# end
-
 """
 $TYPEDSIGNATURES
 
diff --git a/src/FixedSizeShortestPath/FixedSizeShortestPath.jl b/src/FixedSizeShortestPath/FixedSizeShortestPath.jl
index ed799ea..46a22fe 100644
--- a/src/FixedSizeShortestPath/FixedSizeShortestPath.jl
+++ b/src/FixedSizeShortestPath/FixedSizeShortestPath.jl
@@ -103,6 +103,11 @@ function Utils.generate_maximizer(bench::FixedSizeShortestPathBenchmark; use_dij
     return shortest_path_maximizer
 end
 
+"""
+$TYPEDSIGNATURES
+
+Generate a labeled sample for the fixed size shortest path benchmark.
+"""
 function Utils.generate_sample(
     bench::FixedSizeShortestPathBenchmark, rng::AbstractRNG; type::Type=Float32
 )
diff --git a/src/PortfolioOptimization/PortfolioOptimization.jl b/src/PortfolioOptimization/PortfolioOptimization.jl
index 32c32bc..7d1b577 100644
--- a/src/PortfolioOptimization/PortfolioOptimization.jl
+++ b/src/PortfolioOptimization/PortfolioOptimization.jl
@@ -82,6 +82,11 @@ function Utils.generate_maximizer(bench::PortfolioOptimizationBenchmark)
     return portfolio_maximizer
 end
 
+"""
+$TYPEDSIGNATURES
+
+Generate a labeled sample for the portfolio optimization problem.
+"""
 function Utils.generate_sample(
     bench::PortfolioOptimizationBenchmark, rng::AbstractRNG; type::Type=Float32
 )
diff --git a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
index 83da492..e148d5e 100644
--- a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
+++ b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
@@ -94,7 +94,7 @@ function Utils.generate_sample(
     instance = Instance(; nb_tasks, nb_scenarios, rng, store_city)
     x = get_features(instance)
     y_true = if compute_solutions
-        algorithm(instance; kwargs...).value  # TODO: modify algorithms to directly return the solution
+        algorithm(instance; kwargs...)
     else
         nothing
     end
diff --git a/src/StochasticVehicleScheduling/solution/algorithms/column_generation.jl b/src/StochasticVehicleScheduling/solution/algorithms/column_generation.jl
index dbd2fd6..1bfbe1f 100644
--- a/src/StochasticVehicleScheduling/solution/algorithms/column_generation.jl
+++ b/src/StochasticVehicleScheduling/solution/algorithms/column_generation.jl
@@ -189,5 +189,5 @@ function column_generation_algorithm(
     end
 
     col_solution = solution_from_paths(sol, instance)
-    return col_solution
+    return col_solution.value
 end
diff --git a/src/StochasticVehicleScheduling/solution/algorithms/deterministic_mip.jl b/src/StochasticVehicleScheduling/solution/algorithms/deterministic_mip.jl
index 5f68190..9e14861 100644
--- a/src/StochasticVehicleScheduling/solution/algorithms/deterministic_mip.jl
+++ b/src/StochasticVehicleScheduling/solution/algorithms/deterministic_mip.jl
@@ -41,5 +41,5 @@ function deterministic_mip(instance::Instance; model_builder=highs_model, silent
     solution = value.(y)
 
     sol = solution_from_JuMP_array(solution, graph)
-    return sol
+    return sol.value
 end
diff --git a/src/StochasticVehicleScheduling/solution/algorithms/local_search.jl b/src/StochasticVehicleScheduling/solution/algorithms/local_search.jl
index b4f0f0f..49ae00c 100644
--- a/src/StochasticVehicleScheduling/solution/algorithms/local_search.jl
+++ b/src/StochasticVehicleScheduling/solution/algorithms/local_search.jl
@@ -153,5 +153,5 @@ Very simple heuristic, using [`local_search`](@ref)
 function local_search(instance::Instance; num_iterations=1000)
     _, initial_solution = solve_deterministic_VSP(instance)
     sol, _, _, _ = _local_search(initial_solution, instance; nb_it=num_iterations)
-    return sol
+    return sol.value
 end
diff --git a/src/StochasticVehicleScheduling/solution/algorithms/mip.jl b/src/StochasticVehicleScheduling/solution/algorithms/mip.jl
index e202569..10b0b40 100644
--- a/src/StochasticVehicleScheduling/solution/algorithms/mip.jl
+++ b/src/StochasticVehicleScheduling/solution/algorithms/mip.jl
@@ -78,7 +78,7 @@ function compact_linearized_mip(
     solution = value.(y)
 
     sol = solution_from_JuMP_array(solution, graph)
-    return sol
+    return sol.value
 end
 
 """
@@ -149,5 +149,5 @@ function compact_mip(
     solution = value.(y)
 
     sol = solution_from_JuMP_array(solution, graph)
-    return sol
+    return sol.value
 end
diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl
index 5f4188c..e6ecb17 100644
--- a/src/Utils/interface.jl
+++ b/src/Utils/interface.jl
@@ -1,10 +1,10 @@
 """
 $TYPEDEF
 
-Abstract type interface for a benchmark problem.
+Abstract type interface for benchmark problems.
 
 The following methods are mandatory for benchmarks:
-- [`generate_dataset`](@ref)
+- [`generate_dataset`](@ref) or [`generate_sample`](@ref)
 - [`generate_statistical_model`](@ref)
 - [`generate_maximizer`](@ref)
 
@@ -17,27 +17,45 @@ abstract type AbstractBenchmark end
 
 """
 $TYPEDEF
+
+Abstract type interface for stochastic benchmark problems.
+This type should be used for benchmarks that involve single stage stochastic optimization problems.
+
+It follows the same interface as [`AbstractBenchmark`](@ref), with the addition of the following methods:
+TODO
 """
 abstract type AbstractStochasticBenchmark <: AbstractBenchmark end
 
 """
 $TYPEDEF
+
+Abstract type interface for dynamic benchmark problems.
+This type should be used for benchmarks that involve multi-stage stochastic optimization problems.
+
+It follows the same interface as [`AbstractStochasticBenchmark`](@ref), with the addition of the following methods:
+TODO
 """
 abstract type AbstractDynamicBenchmark <: AbstractStochasticBenchmark end
 
 """
-    generate_sample(::AbstractBenchmark, rng::AbstractRNG; kwargs...)
+    generate_sample(::AbstractBenchmark, rng::AbstractRNG; kwargs...) -> DataSample
 
-Do not always exist, interface to make [`generate_dataset`](@ref) work.
-Either implement this or generate_dataset.
+Generate a single [`DataSample`](@ref) for given benchmark.
+This is a low-level function that is used by [`generate_dataset`](@ref) to create
+a dataset of samples. It is not mandatory to implement this method, but it is
+recommended for benchmarks that have a well-defined way to generate individual samples.
+An alternative is to directly implement [`generate_dataset`](@ref) to create a dataset
+without generating individual samples.
 """
 function generate_sample end
 
 """
     generate_dataset(::AbstractBenchmark, dataset_size::Int; kwargs...) -> Vector{<:DataSample}
 
-Generate a `Vector` of [`DataSample`](@ref)  of length `dataset_size` for given benchmark.
+Generate a `Vector` of [`DataSample`](@ref) of length `dataset_size` for given benchmark.
 Content of the dataset can be visualized using [`plot_data`](@ref), when it applies.
+
+By default, it uses [`generate_sample`](@ref) to create each sample in the dataset, and passes any keyword arguments to it.
 """
 function generate_dataset(
     bench::AbstractBenchmark,

From 48c7a214b5dbecefe54c073752154d8a540312cf Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 3 Jul 2025 16:58:15 +0200
Subject: [PATCH 06/29] implement DVSP under the new interface

---
 Project.toml                                  |   2 +
 src/DecisionFocusedLearningBenchmarks.jl      |  11 +-
 .../algorithms/anticipative_solver.jl         |  34 +-
 .../algorithms/prize_collecting_vsp.jl        |  22 +-
 .../DynamicVSP/environment/environment.jl     | 293 ------------------
 .../DynamicVSP/environment/state.jl           |  89 ------
 .../DynamicVSP/learning/2d_features.jl        |   6 +-
 .../DynamicVSP/learning/dataset.jl            |   4 +-
 .../DynamicVSP/learning/features.jl           |  66 ++--
 .../DynamicVSP/policy/abstract_vsp_policy.jl  |  17 +-
 .../DynamicVSP/policy/anticipative_policy.jl  |   9 +-
 .../DynamicVSP/policy/greedy_policy.jl        |  26 +-
 .../DynamicVSP/policy/kleopatra_policy.jl     |  25 +-
 .../DynamicVSP/policy/lazy_policy.jl          |  25 +-
 .../DynamicVSP/utils.jl                       |  11 -
 .../DynamicVehicleScheduling.jl               |  73 +++--
 .../dynamic_config.jl                         |  24 --
 .../environment/environment.jl                |  86 +++++
 .../environment/instance.jl                   |  52 ++++
 .../{DynamicVSP => }/environment/plot.jl      |   2 +-
 .../environment/scenario.jl                   |  47 +++
 .../environment/state.jl                      | 201 ++++++++++++
 .../vsp => static_vsp}/instance.jl            |  18 +-
 .../{DynamicVSP/vsp => static_vsp}/parsing.jl |   2 +-
 .../{DynamicVSP/vsp => static_vsp}/plot.jl    |   2 +-
 .../vsp => static_vsp}/solution.jl            |   0
 src/DynamicVehicleScheduling/utils.jl         |  12 +
 .../StochasticVehicleScheduling.jl            |   6 +-
 src/Utils/interface.jl                        |  55 ++--
 src/Warcraft/Warcraft.jl                      |   2 +-
 30 files changed, 589 insertions(+), 633 deletions(-)
 delete mode 100644 src/DynamicVehicleScheduling/DynamicVSP/environment/environment.jl
 delete mode 100644 src/DynamicVehicleScheduling/DynamicVSP/environment/state.jl
 delete mode 100644 src/DynamicVehicleScheduling/DynamicVSP/utils.jl
 delete mode 100644 src/DynamicVehicleScheduling/dynamic_config.jl
 create mode 100644 src/DynamicVehicleScheduling/environment/environment.jl
 create mode 100644 src/DynamicVehicleScheduling/environment/instance.jl
 rename src/DynamicVehicleScheduling/{DynamicVSP => }/environment/plot.jl (98%)
 create mode 100644 src/DynamicVehicleScheduling/environment/scenario.jl
 create mode 100644 src/DynamicVehicleScheduling/environment/state.jl
 rename src/DynamicVehicleScheduling/{DynamicVSP/vsp => static_vsp}/instance.jl (65%)
 rename src/DynamicVehicleScheduling/{DynamicVSP/vsp => static_vsp}/parsing.jl (97%)
 rename src/DynamicVehicleScheduling/{DynamicVSP/vsp => static_vsp}/plot.jl (96%)
 rename src/DynamicVehicleScheduling/{DynamicVSP/vsp => static_vsp}/solution.jl (100%)

diff --git a/Project.toml b/Project.toml
index ed7fde9..b9695a6 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,6 +4,7 @@ authors = ["Members of JuliaDecisionFocusedLearning"]
 version = "0.2.2"
 
 [deps]
+CommonRLInterface = "d842c3ba-07a1-494f-bbec-f5741b0a3e98"
 ConstrainedShortestPaths = "b3798467-87dc-4d99-943d-35a1bd39e395"
 DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
@@ -30,6 +31,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [compat]
+CommonRLInterface = "0.3.3"
 ConstrainedShortestPaths = "0.6.0"
 DataDeps = "0.7"
 Distributions = "0.25"
diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index 74bb04a..252e5d4 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -32,8 +32,8 @@ function __init__()
 
     register(
         DataDep(
-            "euro-neurips-2022",
-            "EURO-NeurIPs challenge 2022 dataset",
+            "dvrptw",
+            "EURO-NeurIPS challenge 2022 dataset for the dynamic vehicle routing problem with time windows",
             "https://github.com/ortec/euro-neurips-vrp-2022-quickstart/archive/refs/heads/main.zip";
             post_fetch_method=_euro_neurips_unpack,
         ),
@@ -54,7 +54,7 @@ include("Warcraft/Warcraft.jl")
 include("FixedSizeShortestPath/FixedSizeShortestPath.jl")
 include("PortfolioOptimization/PortfolioOptimization.jl")
 include("StochasticVehicleScheduling/StochasticVehicleScheduling.jl")
-# include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl")
+include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl")
 
 using .Utils
 using .Argmax
@@ -64,11 +64,11 @@ using .Warcraft
 using .FixedSizeShortestPath
 using .PortfolioOptimization
 using .StochasticVehicleScheduling
-# using .DynamicVehicleScheduling
+using .DynamicVehicleScheduling
 
 # Interface
 export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample
-export generate_dataset
+export generate_sample, generate_dataset, generate_scenario
 export generate_statistical_model
 export generate_maximizer, maximizer_kwargs
 export objective_value
@@ -83,5 +83,6 @@ export WarcraftBenchmark
 export FixedSizeShortestPathBenchmark
 export PortfolioOptimizationBenchmark
 export StochasticVehicleSchedulingBenchmark
+export DVSPBenchmark
 
 end # module DecisionFocusedLearningBenchmarks
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl
index 43a9edb..ef897e5 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl
+++ b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl
@@ -4,13 +4,14 @@ $TYPEDSIGNATURES
 Retrieve anticipative routes solution from the given MIP solution `y`.
 Outputs a set of routes per epoch.
 """
-function retrieve_routes_anticipative(y::AbstractArray, dvspenv::DVSPEnv)
-    nb_tasks = length(dvspenv.customer_index)
-    (; first_epoch, last_epoch) = dvspenv.config
+function retrieve_routes_anticipative(y::AbstractArray, dvspenv::DVSPEnv, customer_index)
+    nb_tasks = length(customer_index)
+    first_epoch = 1
+    (; last_epoch) = dvspenv.instance
     job_indices = 2:(nb_tasks)
     epoch_indices = first_epoch:last_epoch
 
-    routes = [Vector{Int}[] for t in epoch_indices]
+    routes = [Vector{Int}[] for _ in epoch_indices]
     for t in epoch_indices
         start = [i for i in job_indices if y[1, i, t] ≈ 1]
         for task in start
@@ -39,13 +40,21 @@ $TYPEDSIGNATURES
 Solve the anticipative VSP problem for environment `env`.
 For this, it uses the current environment history, so make sure that the environment is terminated before calling this method.
 """
-function anticipative_solver(env::DVSPEnv; model_builder=highs_model, draw_epochs=true)
-    draw_epochs && draw_all_epochs!(env)
-    (; customer_index, service_time, start_time, request_epoch) = env
-    duration = env.config.static_instance.duration[customer_index, customer_index]
-    (; first_epoch, last_epoch, epoch_duration, Δ_dispatch) = env.config
+function anticipative_solver(
+    env::DVSPEnv, scenario=env.scenario; model_builder=highs_model, reset_env=false
+)
+    reset_env && reset!(env)
+    request_epoch = [0]
+    for (epoch, indices) in enumerate(scenario.indices)
+        request_epoch = vcat(request_epoch, fill(epoch, length(indices)))
+    end
+    customer_index = vcat(1, scenario.indices...)
+    service_time = vcat(0.0, scenario.service_time...)
+    start_time = vcat(0.0, scenario.start_time...)
 
-    @assert first_epoch == 1
+    duration = env.instance.static_instance.duration[customer_index, customer_index]
+    first_epoch = 1
+    (; last_epoch, epoch_duration, Δ_dispatch) = env.instance
 
     model = model_builder()
     set_silent(model)
@@ -80,7 +89,7 @@ function anticipative_solver(env::DVSPEnv; model_builder=highs_model, draw_epoch
         sum(y[j, i, t] for j in 1:nb_nodes, t in epoch_indices) == 1
     )
 
-    # a trip from i can be planned only after request appeared
+    # a trip from i can be planned only after request appeared (release times)
     for i in job_indices, t in epoch_indices, j in 1:nb_nodes
         if t < request_epoch[i]
             @constraint(model, y[i, j, t] <= 0)
@@ -107,5 +116,6 @@ function anticipative_solver(env::DVSPEnv; model_builder=highs_model, draw_epoch
 
     optimize!(model)
 
-    return retrieve_routes_anticipative(value.(y), env)
+    return JuMP.objective_value(model),
+    retrieve_routes_anticipative(value.(y), env, customer_index)
 end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl
index 531169c..75af6a4 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl
+++ b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl
@@ -3,10 +3,10 @@ $TYPEDSIGNATURES
 
 Create the acyclic digraph associated with the given VSP `instance`.
 """
-function create_graph(instance::VSPInstance)
+function create_graph(instance::StaticInstance)
     (; duration, start_time, service_time) = instance
     # Initialize directed graph
-    nb_vertices = nb_locations(instance)
+    nb_vertices = location_count(instance)
     graph = SimpleDiGraph(nb_vertices)
 
     depot = 1  # depot is always index 1
@@ -42,8 +42,8 @@ $TYPEDSIGNATURES
 
 Create the acyclic digraph associated with the given VSP `state`.
 """
-function create_graph(state::VSPState)
-    return create_graph(state.instance)
+function create_graph(state::DVSPState)
+    return create_graph(state.state_instance)
 end
 
 """
@@ -82,9 +82,9 @@ $TYPEDSIGNATURES
 Solve the Prize Collecting Vehicle Scheduling Problem defined by `instance` and prize vector `θ`.
 """
 function prize_collecting_vsp(
-    θ::AbstractVector; instance::VSPState, model_builder=highs_model, kwargs...
+    θ::AbstractVector; instance::DVSPState, model_builder=highs_model, kwargs...
 )
-    (; duration) = instance.instance
+    (; duration) = instance.state_instance
     graph = create_graph(instance)
 
     model = model_builder()
@@ -95,7 +95,7 @@ function prize_collecting_vsp(
 
     @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0)
 
-    θ_ext = fill(0.0, nb_locations(instance))  # no prize for must dispatch requests, only hard constraints
+    θ_ext = fill(0.0, location_count(instance))  # no prize for must dispatch requests, only hard constraints
     θ_ext[instance.is_postponable] .= θ
 
     @objective(
@@ -131,7 +131,7 @@ end
 function prize_collecting_vsp_Q(
     θ::AbstractVector,
     vals::AbstractVector;
-    instance::VSPState,
+    instance::DVSPState,
     model_builder=highs_model,
     kwargs...,
 )
@@ -142,7 +142,7 @@ function prize_collecting_vsp_Q(
     nb_nodes = nv(graph)
     job_indices = 2:(nb_nodes)
     @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0)
-    θ_ext = fill(0.0, nb_locations(instance.instance))  # no prize for must dispatch requests, only hard constraints
+    θ_ext = fill(0.0, location_count(instance.instance))  # no prize for must dispatch requests, only hard constraints
     θ_ext[instance.is_postponable] .= θ
     # v_ext = fill(0.0, nb_locations(instance.instance))  # no prize for must dispatch requests, only hard constraints
     # v_ext[instance.is_postponable] .= vals
@@ -176,7 +176,7 @@ end
 function my_objective_value(θ, routes; instance)
     (; duration) = instance.instance
     total = 0.0
-    θ_ext = fill(0.0, nb_locations(instance))
+    θ_ext = fill(0.0, location_count(instance))
     θ_ext[instance.is_postponable] .= θ
     for route in routes
         for (u, v) in partition(vcat(1, route), 2, 1)
@@ -189,7 +189,7 @@ end
 function _objective_value(θ, routes; instance)
     (; duration) = instance.instance
     total = 0.0
-    θ_ext = fill(0.0, nb_locations(instance))
+    θ_ext = fill(0.0, location_count(instance))
     θ_ext[instance.is_postponable] .= θ
     mapping = cumsum(instance.is_postponable)
     g = falses(length(θ))
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/environment/environment.jl b/src/DynamicVehicleScheduling/DynamicVSP/environment/environment.jl
deleted file mode 100644
index 189afb4..0000000
--- a/src/DynamicVehicleScheduling/DynamicVSP/environment/environment.jl
+++ /dev/null
@@ -1,293 +0,0 @@
-"""
-$TYPEDEF
-
-Environment data structure for the Dynamic Vehicle Scheduling Problem.
-
-# Fields
-$TYPEDFIELDS
-"""
-@kwdef mutable struct DVSPEnv{C<:DynamicConfig,R<:AbstractRNG,T,S<:VSPState}
-    "instance config as a [`DynamicConfig`](@ref)"
-    config::C
-    "current epoch number"
-    current_epoch::Int
-    "random number generator"
-    rng::R
-    "index of each customer in the static instance from the config"
-    customer_index::Vector{Int}
-    "service time values of each customer"
-    service_time::Vector{T}
-    "start time values of each customer"
-    start_time::Vector{T}
-    "1 if the request was already dispatched in a previous epoch, 0 otherwise"
-    request_is_dispatched::BitVector
-    "epoch index at which each request appearred"
-    request_epoch::Vector{Int}
-    "current state of environment"
-    state::S
-end
-
-"""
-$TYPEDSIGNATURES
-
-Constructor for [`DVSPEnv`](@ref).
-"""
-function DVSPEnv(
-    static_instance::VSPInstance;
-    seed=0,
-    max_requests_per_epoch=10,
-    Δ_dispatch=1.0,
-    epoch_duration=1.0,
-)
-    first_epoch = 1
-    last_epoch = trunc(Int, maximum(static_instance.start_time) / epoch_duration) - 1
-
-    config = DynamicConfig(;
-        static_instance,
-        max_requests_per_epoch,
-        Δ_dispatch,
-        epoch_duration,
-        seed,
-        first_epoch,
-        last_epoch,
-    )
-    return DVSPEnv(;
-        config,
-        customer_index=[1],
-        service_time=[0.0],
-        start_time=[0.0],
-        request_is_dispatched=falses(1),
-        state=VSPState(),
-        rng=MersenneTwister(seed),
-        current_epoch=0,
-        request_epoch=[first_epoch - 1],
-    )
-end
-
-"""
-$TYPEDSIGNATURES
-
-Return the indices of the epochs in the environment.
-"""
-function get_epoch_indices(env::DVSPEnv)
-    return (env.config.first_epoch):(env.config.last_epoch)
-end
-
-"""
-$TYPEDSIGNATURES
-
-Return the number of epochs in the environment.
-"""
-function nb_epochs(env::DVSPEnv)
-    return length(get_epoch_indices(env))
-end
-
-"""
-$TYPEDSIGNATURES
-
-Get the current state of the environment.
-"""
-get_state(env::DVSPEnv) = env.state
-
-"""
-$TYPEDSIGNATURES
-
-Get the current time of the environment, i.e. the start time of the current_epoch.
-"""
-get_time(env::DVSPEnv) = (env.current_epoch - 1) * env.config.epoch_duration
-
-"""
-$TYPEDSIGNATURES
-
-Get the planning start time of the environment, i.e. the time at which vehicles routes dispatched in current epoch can depart.
-"""
-get_planning_start_time(env::DVSPEnv) = get_time(env) + env.config.Δ_dispatch
-
-"""
-$TYPEDSIGNATURES
-
-Check if the episode is terminated, i.e. if the current epoch is the last one.
-"""
-is_terminated(env::DVSPEnv) = env.current_epoch >= env.config.last_epoch
-
-"""
-$TYPEDSIGNATURES
-
-Return the total number of locations in the environment history.
-"""
-nb_locations(env::DVSPEnv) = length(env.customer_index)
-
-"""
-$TYPEDSIGNATURES
-
-Return a vector of env location indices that are still undispatched.
-"""
-get_undispatched_indices(env::DVSPEnv) = (1:nb_locations(env))[.!env.request_is_dispatched]
-
-"""
-$TYPEDSIGNATURES
-
-Reset the environment to its initial state.
-Also reset the seed if `reset_seed` is set to true.
-"""
-function reset!(env::DVSPEnv; reset_seed::Bool=true)
-    (; config) = env
-    env.current_epoch = config.first_epoch - 1
-    depot = 1
-    env.customer_index = [env.customer_index[depot]]
-    env.service_time = [env.service_time[depot]]
-    env.start_time = env.start_time[depot:depot]
-    env.request_is_dispatched = falses(1)
-    env.request_epoch = [env.current_epoch]
-    reset_seed && seed!(env.rng, config.seed)
-    return nothing
-end
-
-"""
-$TYPEDSIGNATURES
-
-Internal method that updates the state of the environment to correspond to env info.
-This is an internal method and should not be called directly.
-"""
-function update_state!(env::DVSPEnv)
-    (; config) = env
-    (; epoch_duration, static_instance, last_epoch) = config
-    (; duration) = static_instance
-    depot = 1
-
-    planning_start_time = get_planning_start_time(env)
-
-    # Must dispatch
-    undispatched_indices = get_undispatched_indices(env)
-    # If it's the last epoch, we must dispatch all remaining requests
-    is_must_dispatch = undispatched_indices .!= depot
-    # Else, only requests unreachable from the depot during next epoch are must dispatch
-    if env.current_epoch < last_epoch
-        is_must_dispatch =
-            planning_start_time .+ epoch_duration .+
-            @view(duration[depot, env.customer_index[undispatched_indices]]) .>
-            @view(env.start_time[undispatched_indices])
-        is_must_dispatch[1] = 0
-    end
-
-    is_postponable = falses(length(is_must_dispatch))
-    is_postponable[2:end] .= .!is_must_dispatch[2:end]
-
-    epoch_instance = VSPState(;
-        instance=VSPInstance(;
-            service_time=env.service_time[undispatched_indices],
-            start_time=env.start_time[undispatched_indices] .- planning_start_time,  # shift start times to planning start time
-            coordinate=static_instance.coordinate[env.customer_index[undispatched_indices]],
-            duration=duration[
-                env.customer_index[undispatched_indices],
-                env.customer_index[undispatched_indices],
-            ],
-        ),
-        is_must_dispatch,
-        is_postponable,
-    )
-
-    env.state = epoch_instance
-    return epoch_instance
-end
-
-"""
-$TYPEDSIGNATURES
-
-Update `env` by drawing the next epoch and returning a corresponding `EpochInstance`.
-"""
-function next_epoch!(env::DVSPEnv)
-    # Increment epoch number
-    env.current_epoch += 1
-
-    # Retrieve useful information
-    (; rng, config) = env
-    (; max_requests_per_epoch, static_instance) = config
-    (; duration, service_time, start_time) = config.static_instance
-    depot = 1
-
-    # Draw new requests uniformly from static instance
-    N = nb_customers(static_instance)
-
-    planning_start_time = get_planning_start_time(env)
-
-    coordinate_indices = sample_indices(rng, max_requests_per_epoch, N)
-    start_time_indices = sample_indices(rng, max_requests_per_epoch, N)
-    service_time_indices = sample_indices(rng, max_requests_per_epoch, N)
-
-    # Only keep requests with feasible start times (rejection sampling)
-    # i.e. that are reachable from the depot before their start time
-    is_feasible =
-        planning_start_time .+ duration[depot, coordinate_indices] .<=
-        start_time[start_time_indices]
-
-    # Update environment state
-    nb_new_requests = sum(is_feasible)
-
-    # Update environment by adding new requests in
-    env.customer_index = vcat(env.customer_index, coordinate_indices[is_feasible])
-    env.service_time = vcat(
-        env.service_time, service_time[service_time_indices[is_feasible]]
-    )
-    env.start_time = vcat(env.start_time, start_time[start_time_indices[is_feasible]])
-    env.request_is_dispatched = vcat(env.request_is_dispatched, falses(nb_new_requests))
-    env.request_epoch = vcat(env.request_epoch, fill(env.current_epoch, nb_new_requests))
-
-    # Finally, update the state of the environment with these new requests
-    return update_state!(env)
-end
-
-"""
-$TYPEDSIGNATURES
-
-Transform state routes indices into env route indices.
-"""
-function env_routes_from_state_routes(env, routes)
-    undispatched_indices = get_undispatched_indices(env)
-    return [undispatched_indices[route] for route in routes]
-end
-
-"""
-$TYPEDSIGNATURES
-
-Transform env route indices into state route indices.
-"""
-function state_route_from_env_routes(env, routes)
-    nb_requests = length(env.customer_index)
-    undispatched_indices = (1:nb_requests)[.!env.request_is_dispatched]
-    global_to_local = zeros(Int, nb_requests)
-    for (local_i, global_i) in enumerate(undispatched_indices)
-        global_to_local[global_i] = local_i
-    end
-    return [global_to_local[route] for route in routes]
-end
-
-"""
-$TYPEDSIGNATURES
-
-Apply given `routes` as an action to `env`.
-
-Routes should be given with global indexation.
-Use [`env_routes_from_state_routes`](@ref) if needed to convert the indices beforehand.
-"""
-function apply_decision!(env::DVSPEnv, routes::Vector{Vector{Int}})
-    for route in routes
-        env.request_is_dispatched[route] .= true
-    end
-    duration = @view env.config.static_instance.duration[
-        env.customer_index, env.customer_index
-    ]
-    return cost(routes, duration)
-end
-
-"""
-$TYPEDSIGNATURES
-
-Draw all epochs until the end of the environment, without any actions.
-"""
-function draw_all_epochs!(env::DVSPEnv; reset_env=true)
-    reset_env && reset!(env)
-    while !is_terminated(env)
-        next_epoch!(env)
-    end
-end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/environment/state.jl b/src/DynamicVehicleScheduling/DynamicVSP/environment/state.jl
deleted file mode 100644
index dddb076..0000000
--- a/src/DynamicVehicleScheduling/DynamicVSP/environment/state.jl
+++ /dev/null
@@ -1,89 +0,0 @@
-"""
-$TYPEDSIGNATURES
-
-State data structure for the Dynamic Vehicle Scheduling Problem.
-"""
-@kwdef struct VSPState{I}
-    "associated (static) vehicle scheduling instance"
-    instance::I = VSPInstance()
-    "for each location, 1 if the request must be dispatched, 0 otherwise. The depot is always 0."
-    is_must_dispatch::BitVector = falses(0)
-    "for each location, 1 if the request can be postponed, 0 otherwise. The depot is always 0."
-    is_postponable::BitVector = falses(0)
-end
-
-"""
-$TYPEDSIGNATURES
-
-Return the number of locations in `state` (customers + depot).
-"""
-nb_locations(state::VSPState) = nb_locations(state.instance)
-
-"""
-$TYPEDSIGNATURES
-
-Return the number of customers in `state`.
-"""
-nb_customers(state::VSPState) = nb_customers(state.instance)
-
-"""
-$TYPEDSIGNATURES
-
-Get the service time vector
-"""
-service_time(state::VSPState) = service_time(state.instance)
-
-"""
-$TYPEDSIGNATURES
-
-Get the coordinates vector.
-"""
-coordinate(state::VSPState) = coordinate(state.instance)
-
-"""
-$TYPEDSIGNATURES
-
-Get the duration matrix.
-"""
-duration(state::VSPState) = duration(state.instance)
-
-"""
-$TYPEDSIGNATURES
-
-Get the start time vector.
-"""
-start_time(state::VSPState) = start_time(state.instance)
-
-"""
-$TYPEDSIGNATURES
-
-Check if the given routes are feasible.
-Routes should be given with global indexation.
-Use [`env_routes_from_state_routes`](@ref) if needed to convert the indices beforehand.
-"""
-function is_feasible(state::VSPState, routes::Vector{Vector{Int}}; verbose::Bool=false)
-    (; is_must_dispatch, instance) = state
-    (; duration, start_time, service_time) = instance
-    is_dispatched = falses(length(is_must_dispatch))
-
-    # Check that routes follow time constraints
-    for route in routes
-        is_dispatched[route] .= true
-        current = 1  # start at the depot
-        current_time = start_time[current]
-        for next in route
-            current_time += duration[current, next]
-            if current_time > start_time[next]
-                verbose &&
-                    @warn "Route $route is infeasible: time constraint violated at location $next"
-                return false
-            end
-            current_time += service_time[next]
-            current = next
-        end
-    end
-
-    # Check that all must dispatch requests are dispatched
-    return all(is_dispatched[is_must_dispatch])
-    return true
-end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl b/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl
index 941468a..7226e9c 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl
+++ b/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl
@@ -1,15 +1,13 @@
 function get_features_meanTimeToRequests(env::DVSPEnv)
     quantiles = [0.5]
-    a = env.config.static_instance.duration[
-        env.customer_index[.!env.request_is_dispatched], 2:end
-    ]
+    a = env.instance.static_instance.duration[env.state.location_indices, 2:end]
     quantileTimeToRequests = mapslices(x -> quantile(x, quantiles), a; dims=2)
     return quantileTimeToRequests
 end
 
 function compute_2D_features(env::DVSPEnv)
     state = env.state
-    timeDepotRequest = state.instance.duration[:, 1][state.is_postponable]
+    timeDepotRequest = state.state_instance.duration[:, 1][state.is_postponable]
     quantileTimeToRequests = get_features_meanTimeToRequests(env)[state.is_postponable]
     return hcat(timeDepotRequest, quantileTimeToRequests)'
 end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl b/src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl
index 8f80a44..da37b59 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl
+++ b/src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl
@@ -2,7 +2,7 @@ function load_VSP_dataset(
     datadir::String; model_builder=highs_model, use_2D_features=false, kwargs...
 )
     instances_files = filtered_readdir(datadir)
-    X = Tuple{Matrix{Float32},VSPState{VSPInstance{Float64}}}[]
+    X = Tuple{Matrix{Float32},DVSPState{VSPInstance{Float64}}}[]
     Y = BitMatrix[]
 
     for (i, f) in enumerate(instances_files)
@@ -26,7 +26,7 @@ function load_VSP_dataset(
                 Y,
                 VSPSolution(
                     state_route_from_env_routes(env, routes);
-                    max_index=nb_locations(state.instance),
+                    max_index=location_count(state.instance),
                 ).edge_matrix,
             )
             # Update the environment
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl b/src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl
index 348b816..0cb4160 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl
+++ b/src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl
@@ -7,7 +7,7 @@ function get_features_quantileTimeToRequests(env::DVSPEnv)
     return quantileTimeToRequests
 end
 
-function compute_model_free_features(state::VSPState; env::DVSPEnv)
+function compute_model_free_features(state::DVSPState; env::DVSPEnv)
     (; instance, is_postponable) = state
 
     startTimes = instance.start_time
@@ -27,7 +27,7 @@ function compute_model_free_features(state::VSPState; env::DVSPEnv)
     return model_free_features
 end
 
-function compute_model_aware_features(state::VSPState; env::DVSPEnv)
+function compute_model_aware_features(state::DVSPState; env::DVSPEnv)
     quantileTimeToRequests = get_features_quantileTimeToRequests(env)
     model_aware_features = quantileTimeToRequests
     return model_aware_features[state.is_postponable, :]
@@ -40,36 +40,36 @@ function compute_features(env::DVSPEnv)
     return hcat(model_free_features, model_aware_features)'
 end
 
-# ? why is this needed
-function model_free_features_critic(state::VSPState; env::DVSPEnv)
-    (; instance) = state
-    startTimes = instance.start_time
-    endTimes = instance.service_time .+ instance.start_time
-    timeDepotRequest = instance.duration[:, 1]
-    timeRequestDepot = instance.duration[1, :]
-    slack_next_epoch = startTimes .- env.config.epoch_duration
-    model_free_features = hcat(
-        startTimes, endTimes, timeDepotRequest, timeRequestDepot, slack_next_epoch
-    )
-    return model_free_features
-end
+# # ? why is this needed
+# function model_free_features_critic(state::DVSPState; env::DVSPEnv)
+#     (; instance) = state
+#     startTimes = instance.start_time
+#     endTimes = instance.service_time .+ instance.start_time
+#     timeDepotRequest = instance.duration[:, 1]
+#     timeRequestDepot = instance.duration[1, :]
+#     slack_next_epoch = startTimes .- env.config.epoch_duration
+#     model_free_features = hcat(
+#         startTimes, endTimes, timeDepotRequest, timeRequestDepot, slack_next_epoch
+#     )
+#     return model_free_features
+# end
 
-# ?
-function compute_critic_features(env::DVSPEnv)
-    state = env.state
-    model_free_features = model_free_features_critic(state; env)
-    model_aware_features = get_features_quantileTimeToRequests(env)
-    postpon = state.is_postponable
-    return hcat(model_free_features, model_aware_features, postpon)'
-end
+# # ?
+# function compute_critic_features(env::DVSPEnv)
+#     state = env.state
+#     model_free_features = model_free_features_critic(state; env)
+#     model_aware_features = get_features_quantileTimeToRequests(env)
+#     postpon = state.is_postponable
+#     return hcat(model_free_features, model_aware_features, postpon)'
+# end
 
-# ?
-function compute_critic_2D_features(env::DVSPEnv)
-    state = env.state
-    timeDepotRequest = state.instance.duration[:, 1]
-    quantileTimeToRequests = get_features_meanTimeToRequests(env)
-    postpon = state.is_postponable
-    # time_postpon = timeDepotRequest .* postpon
-    # quant_postpon = quantileTimeToRequests .* postpon
-    return hcat(timeDepotRequest, quantileTimeToRequests, postpon)'
-end
+# # ?
+# function compute_critic_2D_features(env::DVSPEnv)
+#     state = env.state
+#     timeDepotRequest = state.instance.duration[:, 1]
+#     quantileTimeToRequests = get_features_meanTimeToRequests(env)
+#     postpon = state.is_postponable
+#     # time_postpon = timeDepotRequest .* postpon
+#     # quant_postpon = quantileTimeToRequests .* postpon
+#     return hcat(timeDepotRequest, quantileTimeToRequests, postpon)'
+# end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl
index 63d8030..bd640fe 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl
+++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl
@@ -11,7 +11,11 @@ $TYPEDSIGNATURES
 Apply the policy to the environment.
 """
 function run_policy!(
-    π::AbstractDynamicVSPPolicy, env::DVSPEnv; check_feasibility=true, kwargs...
+    π::AbstractDynamicVSPPolicy,
+    env::DVSPEnv,
+    scenario=env.scenario;
+    check_feasibility=true,
+    kwargs...,
 )
     # reset environment, and initialize variables
     reset!(env)
@@ -19,13 +23,12 @@ function run_policy!(
     epoch_routes = Vector{Vector{Int}}[]
 
     # epoch loop
-    while !is_terminated(env)
-        next_epoch!(env)
+    while !terminated(env)
         state_routes = π(env; kwargs...)
-        check_feasibility && @assert is_feasible(get_state(env), state_routes)
-        env_routes = env_routes_from_state_routes(env, state_routes)
-        push!(epoch_routes, env_routes)
-        local_cost = apply_decision!(env, env_routes)
+        check_feasibility && @assert is_feasible(observe(env), state_routes)
+        # env_routes = env_routes_from_state_routes(env, state_routes)
+        push!(epoch_routes, state_routes)
+        local_cost = act!(env, state_routes, scenario)
         total_cost += local_cost
     end
 
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl
index b6751c7..d7f2381 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl
+++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl
@@ -10,9 +10,8 @@ $TYPEDSIGNATURES
 
 Apply the anticipative policy to the environment.
 """
-function run_policy!(::AnticipativeVSPPolicy, env::DVSPEnv; model_builder=highs_model)
-    routes_anticipative = anticipative_solver(env; model_builder)
-    duration = env.config.static_instance.duration[env.customer_index, env.customer_index]
-    anticipative_costs = [cost(routes, duration) for routes in routes_anticipative]
-    return sum(anticipative_costs), routes_anticipative
+function run_policy!(
+    ::AnticipativeVSPPolicy, env::DVSPEnv, scenario=env.scenario; model_builder=highs_model
+)
+    return anticipative_solver(env, scenario; model_builder, reset_env=true)
 end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl
index f6c1654..a15a3b9 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl
+++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl
@@ -7,28 +7,10 @@ Dispatch customers as soon as they appear.
 struct GreedyVSPPolicy <: AbstractDynamicVSPPolicy end
 
 function (π::GreedyVSPPolicy)(env::DVSPEnv; model_builder=highs_model)
-    nb_postponable_requests = sum(get_state(env).is_postponable)
+    state = observe(env)
+    (; is_postponable) = state
+    nb_postponable_requests = sum(is_postponable)
     θ = ones(nb_postponable_requests) * 1e9
-    routes = prize_collecting_vsp(θ; instance=get_state(env), model_builder)
+    routes = prize_collecting_vsp(θ; instance=state, model_builder)
     return routes
 end
-
-# function run_policy!(π::GreedyVSPPolicy, env::DVSPEnv; check_feasibility=true, kwargs...)
-#     # reset environment, and initialize variables
-#     reset!(env)
-#     total_cost = 0
-#     epoch_routes = Vector{Vector{Int}}[]
-
-#     # epoch loop
-#     while !is_terminated(env)
-#         next_epoch!(env)
-#         state_routes = π(env; kwargs...)
-#         check_feasibility && @assert is_feasible(get_state(env), state_routes)
-#         env_routes = env_routes_from_state_routes(env, state_routes)
-#         push!(epoch_routes, env_routes)
-#         local_cost = apply_decision!(env, env_routes)
-#         total_cost += local_cost
-#     end
-
-#     return total_cost, epoch_routes
-# end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl
index 0a3708f..8a7e8d1 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl
+++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl
@@ -23,31 +23,10 @@ function KleopatraVSPPolicy(prize_predictor; has_2D_features=nothing)
 end
 
 function (π::KleopatraVSPPolicy)(env::DVSPEnv; model_builder=highs_model)
+    state = observe(env)
     (; prize_predictor, has_2D_features) = π
     x = has_2D_features ? compute_2D_features(env) : compute_features(env)
     θ = prize_predictor(x)
-    routes = prize_collecting_vsp(θ; instance=get_state(env), model_builder)
+    routes = prize_collecting_vsp(θ; instance=state, model_builder)
     return routes
 end
-
-# function run_policy!(
-#     π::KleopatraVSP, env::DVSPEnv; check_feasibility=true, model_builder=highs_model
-# )
-#     # reset environment, and initialize variables
-#     reset!(env)
-#     total_cost = 0
-#     epoch_routes = Vector{Vector{Int}}[]
-
-#     # epoch loop
-#     while !is_terminated(env)
-#         next_epoch!(env)
-#         state_routes = π(env; model_builder)
-#         check_feasibility && @assert is_feasible(get_state(env), state_routes)
-#         env_routes = env_routes_from_state_routes(env, state_routes)
-#         push!(epoch_routes, env_routes)
-#         local_cost = apply_decision!(env, env_routes)
-#         total_cost += local_cost
-#     end
-
-#     return total_cost, epoch_routes
-# end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl
index 5ce71ca..50b44d3 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl
+++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl
@@ -7,28 +7,9 @@ Dispatch customers only when necessary (i.e. must-dispatch).
 struct LazyVSPPolicy <: AbstractDynamicVSPPolicy end
 
 function (π::LazyVSPPolicy)(env::DVSPEnv; model_builder=highs_model)
-    nb_postponable_requests = sum(get_state(env).is_postponable)
+    state = observe(env)
+    nb_postponable_requests = sum(state.is_postponable)
     θ = ones(nb_postponable_requests) * -1e9
-    routes = prize_collecting_vsp(θ; instance=get_state(env), model_builder)
+    routes = prize_collecting_vsp(θ; instance=state, model_builder)
     return routes
 end
-
-# function run_policy!(π::LazyVSPPolicy, env::DVSPEnv; check_feasibility=true, kwargs...)
-#     # reset environment, and initialize variables
-#     reset!(env)
-#     total_cost = 0
-#     epoch_routes = Vector{Vector{Int}}[]
-
-#     # epoch loop
-#     while !is_terminated(env)
-#         next_epoch!(env)
-#         state_routes = π(env; kwargs...)
-#         check_feasibility && @assert is_feasible(get_state(env), state_routes)
-#         env_routes = env_routes_from_state_routes(env, state_routes)
-#         push!(epoch_routes, env_routes)
-#         local_cost = apply_decision!(env, env_routes)
-#         total_cost += local_cost
-#     end
-
-#     return total_cost, epoch_routes
-# end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/utils.jl b/src/DynamicVehicleScheduling/DynamicVSP/utils.jl
deleted file mode 100644
index 1be5e4d..0000000
--- a/src/DynamicVehicleScheduling/DynamicVSP/utils.jl
+++ /dev/null
@@ -1,11 +0,0 @@
-"""
-$TYPEDEF
-
-Basic point structure.
-"""
-struct Point{T}
-    x::T
-    y::T
-end
-
-Base.show(io::IO, p::Point) = print(io, "($(p.x), $(p.y))")
diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index e1fddbc..d003f40 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -3,6 +3,8 @@ module DynamicVehicleScheduling
 using ..Utils
 
 using Base: @kwdef
+using CommonRLInterface: CommonRLInterface, AbstractEnv, reset!, terminated, observe, act!
+using DataDeps: @datadep_str
 # using ChainRulesCore
 using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
 using Graphs
@@ -13,25 +15,26 @@ using JSON
 using JuMP
 using Plots: plot, plot!, scatter!
 using Printf: @printf
-using Random: AbstractRNG, MersenneTwister, seed!, randperm
+using Random: Random, AbstractRNG, MersenneTwister, seed!, randperm
 using Requires: @require
 using Statistics: mean, quantile
 
 include("utils.jl")
-include("dynamic_config.jl")
-include("abstract_policy.jl")
 
-# Dynamic Vehicle Scheduling
-include("DynamicVSP/utils.jl")
+include("abstract_policy.jl")
 
-include("DynamicVSP/vsp/instance.jl")
-include("DynamicVSP/vsp/parsing.jl")
-include("DynamicVSP/vsp/solution.jl")
-include("DynamicVSP/vsp/plot.jl")
+# static vsp stuff
+include("static_vsp/instance.jl")
+include("static_vsp/parsing.jl")
+include("static_vsp/solution.jl")
+include("static_vsp/plot.jl")
 
-include("DynamicVSP/environment/state.jl")
-include("DynamicVSP/environment/environment.jl")
-include("DynamicVSP/environment/plot.jl")
+# dynamic environment
+include("environment/instance.jl")
+include("environment/scenario.jl")
+include("environment/state.jl")
+include("environment/environment.jl")
+include("environment/plot.jl")
 
 include("DynamicVSP/algorithms/prize_collecting_vsp.jl")
 include("DynamicVSP/algorithms/anticipative_solver.jl")
@@ -46,28 +49,38 @@ include("DynamicVSP/policy/lazy_policy.jl")
 include("DynamicVSP/policy/anticipative_policy.jl")
 include("DynamicVSP/policy/kleopatra_policy.jl")
 
-export highs_model, filtered_readdir
+struct DVSPBenchmark <: AbstractDynamicBenchmark end
+
+function Utils.generate_sample(b::DVSPBenchmark, rng::AbstractRNG)
+    return Instance(read_vsp_instance(readdir(datadep"dvrptw"; join=true)[1]))
+end
+
+export DVSPBenchmark, generate_sample, generate_scenario
+export run_policy!,
+    GreedyVSPPolicy, LazyVSPPolicy, KleopatraVSPPolicy, AnticipativeVSPPolicy
+
+# export highs_model, filtered_readdir
 
-export solve_hindsight_problem
+# export solve_hindsight_problem
 
-export AbstractDynamicPolicy, BasicDynamicPolicy
+# export AbstractDynamicPolicy, BasicDynamicPolicy
 
-export GreedyPolicy, LazyPolicy, RandomPolicy, Kleopatra
+# export GreedyPolicy, LazyPolicy, RandomPolicy, Kleopatra
 
-export run_policy
+# export run_policy
 
-export compute_features,
-    compute_2D_features, compute_critic_features, compute_critic_2D_features, load_dataset
+# export compute_features,
+#     compute_2D_features, compute_critic_features, compute_critic_2D_features, load_dataset
 
-export VSPInstance,
-    read_vsp_instance, start_time, env_routes_from_state_routes, state_route_from_env_routes
-export DVSPEnv, prize_collecting_vsp
-export anticipative_solver
-export VSPSolution
-export load_VSP_dataset
-export GreedyVSPPolicy,
-    LazyVSPPolicy, AnticipativeVSPPolicy, run_policy!, KleopatraVSPPolicy
-export plot_routes, plot_instance, plot_environment, plot_epoch
-export get_state
-export nb_epochs, get_epoch_indices
+# export VSPInstance,
+#     read_vsp_instance, start_time, env_routes_from_state_routes, state_route_from_env_routes
+# export DVSPEnv, prize_collecting_vsp
+# export anticipative_solver
+# export VSPSolution
+# export load_VSP_dataset
+# export GreedyVSPPolicy,
+#     LazyVSPPolicy, AnticipativeVSPPolicy, run_policy!, KleopatraVSPPolicy
+# export plot_routes, plot_instance, plot_environment, plot_epoch
+# export get_state
+# export nb_epochs, get_epoch_indices
 end
diff --git a/src/DynamicVehicleScheduling/dynamic_config.jl b/src/DynamicVehicleScheduling/dynamic_config.jl
deleted file mode 100644
index 3d052bf..0000000
--- a/src/DynamicVehicleScheduling/dynamic_config.jl
+++ /dev/null
@@ -1,24 +0,0 @@
-"""
-$TYPEDEF
-
-Config data structures for dynamic vehicle routing and scheduling problems.
-
-# Fields
-$TYPEDFIELDS
-"""
-@kwdef struct DynamicConfig{I,S,T}
-    "static instance to sample arriving requests from"
-    static_instance::I
-    "max number of new requests per epoch (rejection sampling)"
-    max_requests_per_epoch::Int = 100
-    "time distance between epoch start and routes start"
-    Δ_dispatch::T = 3600
-    "duration of each epoch"
-    epoch_duration::T = 3600
-    "first epoch index (time = epoch_duration x first_epoch)"
-    first_epoch::Int
-    "last epoch index"
-    last_epoch::Int
-    "seed for customer sampling"
-    seed::S
-end
diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl
new file mode 100644
index 0000000..8109c0e
--- /dev/null
+++ b/src/DynamicVehicleScheduling/environment/environment.jl
@@ -0,0 +1,86 @@
+struct DVSPEnv{S<:DVSPState} <: AbstractEnv
+    "associated instance"
+    instance::Instance
+    "current state"
+    state::S
+    "scenario the environment will use when not given a specific one"
+    scenario::Scenario
+end
+
+"""
+$TYPEDSIGNATURES
+
+Constructor for [`DVSPEnv`](@ref).
+"""
+function DVSPEnv(instance::Instance; seed=nothing, rng=MersenneTwister(seed))
+    scenario = generate_scenario(instance; rng, seed)
+    initial_state = DVSPState(instance; scenario[1]...)
+    return DVSPEnv(instance, initial_state, scenario)
+end
+
+currrent_epoch(env::DVSPEnv) = current_epoch(env.state)
+epoch_duration(env::DVSPEnv) = epoch_duration(env.instance)
+last_epoch(env::DVSPEnv) = last_epoch(env.instance)
+Δ_dispatch(env::DVSPEnv) = Δ_dispatch(env.instance)
+
+"""
+$TYPEDSIGNATURES
+
+Get the current state of the environment.
+"""
+CommonRLInterface.observe(env::DVSPEnv) = env.state
+
+current_epoch(env::DVSPEnv) = current_epoch(env.state)
+
+"""
+$TYPEDSIGNATURES
+
+Get the current time of the environment, i.e. the start time of the current_epoch.
+"""
+time(env::DVSPEnv) = (current_epoch(env) - 1) * epoch_duration(env)
+
+"""
+$TYPEDSIGNATURES
+
+Get the planning start time of the environment, i.e. the time at which vehicles routes dispatched in current epoch can depart.
+"""
+planning_start_time(env::DVSPEnv) = time(env) + Δ_dispatch(env)
+"""
+$TYPEDSIGNATURES
+
+Check if the episode is terminated, i.e. if the current epoch is the last one.
+"""
+CommonRLInterface.terminated(env::DVSPEnv) = current_epoch(env) >= last_epoch(env)
+
+"""
+draw new customers in scenario
+"""
+function draw_next_epoch!(env::DVSPEnv, scenario=env.scenario)
+    env.state.current_epoch += 1
+
+    return nothing
+end
+
+"""
+$TYPEDSIGNATURES
+
+Reset the environment to its initial state.
+Also reset the seed if `reset_seed` is set to true.
+"""
+function CommonRLInterface.reset!(env::DVSPEnv, scenario=env.scenario)
+    reset_state!(env.state, env.instance; scenario[1]...)
+    return nothing
+end
+
+"""
+remove dispatched customers, advance time, and add new requests to the environment.
+"""
+function CommonRLInterface.act!(env::DVSPEnv, routes, scenario=env.scenario)
+    reward = -apply_routes!(env.state, routes)
+    env.state.current_epoch += 1
+    if current_epoch(env) > last_epoch(env)
+        return nothing
+    end
+    add_new_customers!(env.state, env.instance; scenario[current_epoch(env)]...)
+    return reward
+end
diff --git a/src/DynamicVehicleScheduling/environment/instance.jl b/src/DynamicVehicleScheduling/environment/instance.jl
new file mode 100644
index 0000000..b375077
--- /dev/null
+++ b/src/DynamicVehicleScheduling/environment/instance.jl
@@ -0,0 +1,52 @@
+"""
+$TYPEDEF
+
+Instance data structure for the dynamic vehicle scheduling problem.
+"""
+@kwdef struct Instance{I<:StaticInstance,T}
+    "static instance to sample arriving requests from"
+    static_instance::I
+    "max number of new requests per epoch (rejection sampling)"
+    max_requests_per_epoch::Int = 10
+    "time distance between epoch start and routes start"
+    Δ_dispatch::T = 1.0
+    "duration of each epoch"
+    epoch_duration::T = 1.0
+    "last epoch index"
+    last_epoch::Int
+    # "seed for customer sampling"
+    # seed::S
+end
+
+function Instance(
+    static_instance::StaticInstance;
+    max_requests_per_epoch::Int=10,
+    Δ_dispatch::Float64=1.0,
+    epoch_duration::Float64=1.0,
+)
+    last_epoch = trunc(
+        Int,
+        (
+            maximum(static_instance.start_time) - minimum(static_instance.duration[1, :]) -
+            Δ_dispatch
+        ) / epoch_duration,
+    )
+    return Instance(;
+        static_instance=static_instance,
+        max_requests_per_epoch=max_requests_per_epoch,
+        Δ_dispatch=Δ_dispatch,
+        epoch_duration=epoch_duration,
+        last_epoch=last_epoch,
+    )
+end
+
+Δ_dispatch(instance::Instance) = instance.Δ_dispatch
+epoch_duration(instance::Instance) = instance.epoch_duration
+last_epoch(instance::Instance) = instance.last_epoch
+max_requests_per_epoch(instance::Instance) = instance.max_requests_per_epoch
+# static_instance(instance::Instance) = instance.static_instance
+
+# duration(instance::Instance) = duration(instance.static_instance)
+# service_time(instance::Instance) = service_time(instance.static_instance)
+# coordinate(instance::Instance) = coordinate(instance.static_instance)
+# start_time(instance::Instance) = start_time(instance.static_instance)
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/environment/plot.jl b/src/DynamicVehicleScheduling/environment/plot.jl
similarity index 98%
rename from src/DynamicVehicleScheduling/DynamicVSP/environment/plot.jl
rename to src/DynamicVehicleScheduling/environment/plot.jl
index 60b6d90..409ad79 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/environment/plot.jl
+++ b/src/DynamicVehicleScheduling/environment/plot.jl
@@ -58,7 +58,7 @@ $TYPEDSIGNATURES
 
 Plot the given `routes`` for a VSP `state`.
 """
-function plot_epoch(state::VSPState, routes; kwargs...)
+function plot_epoch(state::DVSPState, routes; kwargs...)
     (; coordinate, start_time) = state.instance
     x_depot = coordinate[1].x
     y_depot = coordinate[1].y
diff --git a/src/DynamicVehicleScheduling/environment/scenario.jl b/src/DynamicVehicleScheduling/environment/scenario.jl
new file mode 100644
index 0000000..de5d858
--- /dev/null
+++ b/src/DynamicVehicleScheduling/environment/scenario.jl
@@ -0,0 +1,47 @@
+
+struct Scenario
+    "indices of the new requests in each epoch"
+    indices::Vector{Vector{Int}}
+    "service times of the new requests in each epoch"
+    service_time::Vector{Vector{Float64}}
+    "start times of the new requests in each epoch"
+    start_time::Vector{Vector{Float64}}
+end
+
+function Base.getindex(scenario::Scenario, idx::Integer)
+    return (;
+        indices=scenario.indices[idx],
+        service_time=scenario.service_time[idx],
+        start_time=scenario.start_time[idx],
+    )
+end
+
+function generate_scenario(
+    instance::Instance; seed=nothing, rng::AbstractRNG=MersenneTwister(seed)
+)
+    (; Δ_dispatch, static_instance, last_epoch, epoch_duration, max_requests_per_epoch) =
+        instance
+    (; duration, start_time, service_time) = static_instance
+    N = customer_count(static_instance)
+    depot = 1
+
+    new_indices = Vector{Int}[]
+    new_service_time = Vector{Float64}[]
+    new_start_time = Vector{Float64}[]
+
+    for epoch in 1:last_epoch
+        time = epoch_duration * (epoch - 1) + Δ_dispatch
+
+        coordinate_indices = sample_indices(rng, max_requests_per_epoch, N)
+        start_time_indices = sample_indices(rng, max_requests_per_epoch, N)
+        service_time_indices = sample_indices(rng, max_requests_per_epoch, N)
+
+        is_feasible =
+            time .+ duration[depot, coordinate_indices] .<= start_time[start_time_indices]
+
+        push!(new_indices, coordinate_indices[is_feasible])
+        push!(new_service_time, service_time[service_time_indices[is_feasible]])
+        push!(new_start_time, start_time[start_time_indices[is_feasible]])
+    end
+    return Scenario(new_indices, new_service_time, new_start_time)
+end
diff --git a/src/DynamicVehicleScheduling/environment/state.jl b/src/DynamicVehicleScheduling/environment/state.jl
new file mode 100644
index 0000000..ebac101
--- /dev/null
+++ b/src/DynamicVehicleScheduling/environment/state.jl
@@ -0,0 +1,201 @@
+"""
+$TYPEDSIGNATURES
+
+State data structure for the Dynamic Vehicle Scheduling Problem.
+"""
+@kwdef mutable struct DVSPState{I}
+    "current epoch number"
+    current_epoch::Int = 1
+    "list of location indices from the upper instance (useful for adding new customers)"
+    location_indices::Vector{Int} = Int[]
+    "associated (static) vehicle scheduling instance"
+    state_instance::I = StaticInstance()
+    "for each location, 1 if the request must be dispatched, 0 otherwise. The depot is always 0."
+    is_must_dispatch::BitVector = falses(0)
+    "for each location, 1 if the request can be postponed, 0 otherwise. The depot is always 0."
+    is_postponable::BitVector = falses(0)
+end
+
+function reset_state!(
+    state::DVSPState, instance::Instance; indices, service_time, start_time
+)
+    (; epoch_duration, Δ_dispatch, static_instance) = instance
+    indices_with_depot = vcat(1, indices)
+    service_time_with_depot = vcat(0.0, service_time)
+    start_time_with_depot = vcat(0.0, start_time)
+
+    coordinates = coordinate(static_instance)[indices_with_depot]
+    duration_matrix = duration(static_instance)[indices_with_depot, indices_with_depot]
+
+    is_must_dispatch = falses(length(indices_with_depot))
+    is_must_dispatch[2:end] .=
+        Δ_dispatch .+ epoch_duration .+ @view(duration_matrix[1, 2:end]) .> start_time
+
+    is_postponable = falses(length(is_must_dispatch))
+    is_postponable[2:end] .= .!is_must_dispatch[2:end]
+
+    state.current_epoch = 1
+    state.state_instance = StaticInstance(;
+        service_time=service_time_with_depot,
+        start_time=start_time_with_depot,
+        coordinate=coordinates,
+        duration=duration_matrix,
+    )
+    state.is_must_dispatch = is_must_dispatch
+    state.is_postponable = is_postponable
+    state.location_indices = indices_with_depot
+    return nothing
+end
+
+function DVSPState(instance::Instance; indices, service_time, start_time)
+    state = DVSPState()
+    reset_state!(state, instance; indices=indices, service_time=service_time, start_time)
+    return state
+end
+
+current_epoch(state::DVSPState) = state.current_epoch
+
+"""
+$TYPEDSIGNATURES
+
+Return the number of locations in `state` (customers + depot).
+"""
+location_count(state::DVSPState) = location_count(state.state_instance)
+
+"""
+$TYPEDSIGNATURES
+
+Return the number of customers in `state`.
+"""
+customer_count(state::DVSPState) = customer_count(state.state_instance)
+
+"""
+$TYPEDSIGNATURES
+
+Get the service time vector
+"""
+service_time(state::DVSPState) = service_time(state.state_instance)
+
+"""
+$TYPEDSIGNATURES
+
+Get the coordinates vector.
+"""
+coordinate(state::DVSPState) = coordinate(state.state_instance)
+
+"""
+$TYPEDSIGNATURES
+
+Get the duration matrix.
+"""
+duration(state::DVSPState) = duration(state.state_instance)
+
+"""
+$TYPEDSIGNATURES
+
+Get the start time vector.
+"""
+start_time(state::DVSPState) = start_time(state.state_instance)
+
+"""
+$TYPEDSIGNATURES
+
+Check if the given routes are feasible.
+Routes should be given with global indexation.
+Use [`env_routes_from_state_routes`](@ref) if needed to convert the indices beforehand.
+"""
+function is_feasible(state::DVSPState, routes::Vector{Vector{Int}}; verbose::Bool=false)
+    (; is_must_dispatch, state_instance) = state
+    (; duration, start_time, service_time) = state_instance
+    is_dispatched = falses(length(is_must_dispatch))
+
+    # Check that routes follow time constraints
+    for route in routes
+        is_dispatched[route] .= true
+        current = 1  # start at the depot
+        current_time = start_time[current]
+        for next in route
+            current_time += duration[current, next]
+            if current_time > start_time[next]
+                verbose &&
+                    @warn "Route $route is infeasible: time constraint violated at location $next"
+                return false
+            end
+            current_time += service_time[next]
+            current = next
+        end
+    end
+
+    # Check that all must dispatch requests are dispatched
+    if all(is_dispatched[is_must_dispatch])
+        return true
+    else
+        verbose && @warn "Not all must-dispatch requests are dispatched"
+        return false
+    end
+end
+
+"""
+remove dispatched customers, and update must-dispatch and postponable flags.
+"""
+function apply_routes!(
+    state::DVSPState, routes::Vector{Vector{Int}}; check_feasibility::Bool=true
+)
+    check_feasibility && @assert is_feasible(state, routes; verbose=true)
+    (; is_must_dispatch, is_postponable, state_instance, location_indices) = state
+    c = cost(state, routes)
+
+    # Remove dispatched customers
+    N = location_count(state_instance)
+    undispatched_indices = trues(N)
+    undispatched_indices[vcat(routes...)] .= false
+    state.state_instance = StaticInstance(;
+        coordinate=state_instance.coordinate[undispatched_indices],
+        service_time=state_instance.service_time[undispatched_indices],
+        start_time=state_instance.start_time[undispatched_indices],
+        duration=state_instance.duration[undispatched_indices, undispatched_indices],
+    )
+    state.is_must_dispatch = is_must_dispatch[undispatched_indices]
+    state.is_postponable = is_postponable[undispatched_indices]
+    state.location_indices = location_indices[undispatched_indices]
+    return c
+end
+
+function cost(state::DVSPState, routes::Vector{Vector{Int}})
+    return cost(routes, duration(state.state_instance))
+end
+
+function add_new_customers!(
+    state::DVSPState, instance::Instance; indices, service_time, start_time
+)
+    (; state_instance, is_must_dispatch, is_postponable, location_indices) = state
+
+    updated_indices = vcat(location_indices, indices)
+    updated_service_time = vcat(state_instance.service_time, service_time)
+    updated_start_time = vcat(state_instance.start_time, start_time)
+    updated_coordinates = instance.static_instance.coordinate[updated_indices]
+    updated_duration = instance.static_instance.duration[updated_indices, updated_indices]
+    is_must_dispatch = falses(length(updated_indices))
+    is_postponable = falses(length(updated_indices))
+
+    state.state_instance = StaticInstance(;
+        coordinate=updated_coordinates,
+        service_time=updated_service_time,
+        start_time=updated_start_time,
+        duration=updated_duration,
+    )
+
+    # Compute must-dispatch flags
+    epoch_duration = instance.epoch_duration
+    Δ_dispatch = instance.Δ_dispatch
+    planning_start_time = (state.current_epoch - 1) * epoch_duration + Δ_dispatch
+    is_must_dispatch[2:end] .=
+        planning_start_time .+ epoch_duration .+ @view(updated_duration[1, 2:end]) .>
+        updated_start_time[2:end]
+    is_postponable[2:end] .= .!is_must_dispatch[2:end]
+
+    state.is_must_dispatch = is_must_dispatch
+    state.is_postponable = is_postponable
+    state.location_indices = updated_indices
+    return nothing
+end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/vsp/instance.jl b/src/DynamicVehicleScheduling/static_vsp/instance.jl
similarity index 65%
rename from src/DynamicVehicleScheduling/DynamicVSP/vsp/instance.jl
rename to src/DynamicVehicleScheduling/static_vsp/instance.jl
index 512a0fe..97091a0 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/vsp/instance.jl
+++ b/src/DynamicVehicleScheduling/static_vsp/instance.jl
@@ -6,7 +6,7 @@ Instance data structure for the (deterministic and static) Vehicle Scheduling Pr
 # Fields
 $TYPEDFIELDS
 """
-@kwdef struct VSPInstance{T}
+@kwdef struct StaticInstance{T}
     "coordinates of the locations. The first one is always the depot."
     coordinate::Vector{Point{T}} = Point{Float64}[]
     "service time at each location"
@@ -17,8 +17,8 @@ $TYPEDFIELDS
     duration::Matrix{T} = zeros(Float64, 0, 0)
 end
 
-function Base.show(io::IO, instance::VSPInstance)
-    N = nb_customers(instance)
+function Base.show(io::IO, instance::StaticInstance)
+    N = customer_count(instance)
     return print(io, "VSPInstance with $N customers")
 end
 
@@ -27,39 +27,39 @@ $TYPEDSIGNATURES
 
 Return the number of locations in `instance` (customers + depot).
 """
-nb_locations(instance::VSPInstance) = length(instance.coordinate)
+location_count(instance::StaticInstance) = length(instance.coordinate)
 
 """
 $TYPEDSIGNATURES
 
 Return the number of customers in `instance` (excluding the depot).
 """
-nb_customers(instance::VSPInstance) = nb_locations(instance) - 1
+customer_count(instance::StaticInstance) = location_count(instance) - 1
 
 """
 $TYPEDSIGNATURES
 
 Get the service time vector.
 """
-service_time(instance::VSPInstance) = instance.service_time
+service_time(instance::StaticInstance) = instance.service_time
 
 """
 $TYPEDSIGNATURES
 
 Get the coordinates vector.
 """
-coordinate(instance::VSPInstance) = instance.coordinate
+coordinate(instance::StaticInstance) = instance.coordinate
 
 """
 $TYPEDSIGNATURES
 
 Get the duration matrix.
 """
-duration(instance::VSPInstance) = instance.duration
+duration(instance::StaticInstance) = instance.duration
 
 """
 $TYPEDSIGNATURES
 
 Get the start time vector.
 """
-start_time(instance::VSPInstance) = instance.start_time
+start_time(instance::StaticInstance) = instance.start_time
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/vsp/parsing.jl b/src/DynamicVehicleScheduling/static_vsp/parsing.jl
similarity index 97%
rename from src/DynamicVehicleScheduling/DynamicVSP/vsp/parsing.jl
rename to src/DynamicVehicleScheduling/static_vsp/parsing.jl
index 21589fd..7bd7f92 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/vsp/parsing.jl
+++ b/src/DynamicVehicleScheduling/static_vsp/parsing.jl
@@ -91,5 +91,5 @@ function read_vsp_instance(filepath::String; rounded::Bool=false, normalization=
     start_time ./= normalization
     duration ./= normalization
 
-    return VSPInstance(; coordinate, service_time, start_time, duration)
+    return StaticInstance(; coordinate, service_time, start_time, duration)
 end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/vsp/plot.jl b/src/DynamicVehicleScheduling/static_vsp/plot.jl
similarity index 96%
rename from src/DynamicVehicleScheduling/DynamicVSP/vsp/plot.jl
rename to src/DynamicVehicleScheduling/static_vsp/plot.jl
index a9f03de..515ab3d 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/vsp/plot.jl
+++ b/src/DynamicVehicleScheduling/static_vsp/plot.jl
@@ -4,7 +4,7 @@ $TYPEDSIGNATURES
 Plot the given static VSP `instance`.
 """
 function plot_instance(
-    instance::VSPInstance;
+    instance::StaticInstance;
     customer_markersize=4,
     depot_markersize=7,
     alpha_depot=0.8,
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/vsp/solution.jl b/src/DynamicVehicleScheduling/static_vsp/solution.jl
similarity index 100%
rename from src/DynamicVehicleScheduling/DynamicVSP/vsp/solution.jl
rename to src/DynamicVehicleScheduling/static_vsp/solution.jl
diff --git a/src/DynamicVehicleScheduling/utils.jl b/src/DynamicVehicleScheduling/utils.jl
index 1e17906..36eebd2 100644
--- a/src/DynamicVehicleScheduling/utils.jl
+++ b/src/DynamicVehicleScheduling/utils.jl
@@ -24,6 +24,18 @@ function cost(routes::Vector{Vector{Int}}, duration::AbstractMatrix)
     return total
 end
 
+"""
+$TYPEDEF
+
+Basic point structure.
+"""
+struct Point{T}
+    x::T
+    y::T
+end
+
+Base.show(io::IO, p::Point) = print(io, "($(p.x), $(p.y))")
+
 # """
 # $TYPEDSIGNATURES
 
diff --git a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
index e148d5e..41801c5 100644
--- a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
+++ b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
@@ -145,10 +145,7 @@ end
 $TYPEDSIGNATURES
 """
 function plot_instance(
-    ::StochasticVehicleSchedulingBenchmark,
-    sample::DataSample{<:Instance{City}};
-    color_scheme=:lightrainbow,
-    kwargs...,
+    ::StochasticVehicleSchedulingBenchmark, sample::DataSample{<:Instance{City}}; kwargs...
 )
     (; tasks, district_width, width) = sample.instance.city
     ticks = 0:district_width:width
@@ -197,7 +194,6 @@ function plot_instance(
             marker_z=task.end_time,
             colormap=:turbo,
             label=nothing,
-            # color=palette[max(floor(Int, task.end_time), 1)],
         )
         annotate!(fig, (points[1]..., text("$(i_task)", 10)))
     end
diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl
index e6ecb17..e443a15 100644
--- a/src/Utils/interface.jl
+++ b/src/Utils/interface.jl
@@ -15,28 +15,6 @@ The following methods are optional:
 """
 abstract type AbstractBenchmark end
 
-"""
-$TYPEDEF
-
-Abstract type interface for stochastic benchmark problems.
-This type should be used for benchmarks that involve single stage stochastic optimization problems.
-
-It follows the same interface as [`AbstractBenchmark`](@ref), with the addition of the following methods:
-TODO
-"""
-abstract type AbstractStochasticBenchmark <: AbstractBenchmark end
-
-"""
-$TYPEDEF
-
-Abstract type interface for dynamic benchmark problems.
-This type should be used for benchmarks that involve multi-stage stochastic optimization problems.
-
-It follows the same interface as [`AbstractStochasticBenchmark`](@ref), with the addition of the following methods:
-TODO
-"""
-abstract type AbstractDynamicBenchmark <: AbstractStochasticBenchmark end
-
 """
     generate_sample(::AbstractBenchmark, rng::AbstractRNG; kwargs...) -> DataSample
 
@@ -199,3 +177,36 @@ function compute_gap(
         end,
     )
 end
+
+"""
+$TYPEDEF
+
+Abstract type interface for stochastic benchmark problems.
+This type should be used for benchmarks that involve single stage stochastic optimization problems.
+
+It follows the same interface as [`AbstractBenchmark`](@ref), with the addition of the following methods:
+- [`generate_anticipative_solver`](@ref)
+"""
+abstract type AbstractStochasticBenchmark <: AbstractBenchmark end
+
+# only works for exogenous noise
+"""
+    generate_scenario(::AbstractStochasticBenchmark; kwargs...)
+"""
+function generate_scenario_generator end
+
+"""
+    generate_anticipative_solver(::AbstractStochasticBenchmark; kwargs...)
+"""
+function generate_anticipative_solver end
+
+"""
+$TYPEDEF
+
+Abstract type interface for dynamic benchmark problems.
+This type should be used for benchmarks that involve multi-stage stochastic optimization problems.
+
+It follows the same interface as [`AbstractStochasticBenchmark`](@ref), with the addition of the following methods:
+TODO
+"""
+abstract type AbstractDynamicBenchmark <: AbstractStochasticBenchmark end
diff --git a/src/Warcraft/Warcraft.jl b/src/Warcraft/Warcraft.jl
index 669a828..c4dcbae 100644
--- a/src/Warcraft/Warcraft.jl
+++ b/src/Warcraft/Warcraft.jl
@@ -2,7 +2,7 @@ module Warcraft
 
 using ..Utils
 
-using DataDeps
+using DataDeps: @datadep_str
 using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
 using Flux
 using Graphs

From 149a2912d0c7545f47bf4c10e125c627c1aa24e6 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Fri, 4 Jul 2025 16:35:53 +0200
Subject: [PATCH 07/29] update

---
 src/DecisionFocusedLearningBenchmarks.jl      |  3 +-
 .../DynamicVehicleScheduling.jl               | 48 +++++++------------
 .../environment/scenario.jl                   |  8 ++++
 .../policy/abstract_vsp_policy.jl             |  0
 .../policy/anticipative_policy.jl             |  0
 .../{DynamicVSP => }/policy/greedy_policy.jl  |  0
 .../policy/kleopatra_policy.jl                |  0
 .../{DynamicVSP => }/policy/lazy_policy.jl    |  0
 src/Utils/Utils.jl                            |  7 ++-
 src/Utils/data_sample.jl                      |  7 ++-
 test/dynamic_vsp.jl                           |  8 ++++
 11 files changed, 46 insertions(+), 35 deletions(-)
 rename src/DynamicVehicleScheduling/{DynamicVSP => }/policy/abstract_vsp_policy.jl (100%)
 rename src/DynamicVehicleScheduling/{DynamicVSP => }/policy/anticipative_policy.jl (100%)
 rename src/DynamicVehicleScheduling/{DynamicVSP => }/policy/greedy_policy.jl (100%)
 rename src/DynamicVehicleScheduling/{DynamicVSP => }/policy/kleopatra_policy.jl (100%)
 rename src/DynamicVehicleScheduling/{DynamicVSP => }/policy/lazy_policy.jl (100%)
 create mode 100644 test/dynamic_vsp.jl

diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index 252e5d4..97f9ea6 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -68,7 +68,8 @@ using .DynamicVehicleScheduling
 
 # Interface
 export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample
-export generate_sample, generate_dataset, generate_scenario
+export generate_sample,
+    generate_dataset, generate_scenario_generator, generate_anticipative_solver
 export generate_statistical_model
 export generate_maximizer, maximizer_kwargs
 export objective_value
diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index d003f40..e380d8e 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -31,9 +31,9 @@ include("static_vsp/plot.jl")
 
 # dynamic environment
 include("environment/instance.jl")
-include("environment/scenario.jl")
 include("environment/state.jl")
 include("environment/environment.jl")
+include("environment/scenario.jl")
 include("environment/plot.jl")
 
 include("DynamicVSP/algorithms/prize_collecting_vsp.jl")
@@ -43,44 +43,30 @@ include("DynamicVSP/learning/features.jl")
 include("DynamicVSP/learning/2d_features.jl")
 include("DynamicVSP/learning/dataset.jl")
 
-include("DynamicVSP/policy/abstract_vsp_policy.jl")
-include("DynamicVSP/policy/greedy_policy.jl")
-include("DynamicVSP/policy/lazy_policy.jl")
-include("DynamicVSP/policy/anticipative_policy.jl")
-include("DynamicVSP/policy/kleopatra_policy.jl")
+include("policy/abstract_vsp_policy.jl")
+include("policy/greedy_policy.jl")
+include("policy/lazy_policy.jl")
+include("policy/anticipative_policy.jl")
+include("policy/kleopatra_policy.jl")
 
 struct DVSPBenchmark <: AbstractDynamicBenchmark end
 
 function Utils.generate_sample(b::DVSPBenchmark, rng::AbstractRNG)
-    return Instance(read_vsp_instance(readdir(datadep"dvrptw"; join=true)[1]))
+    return DataSample(;
+        instance=Instance(read_vsp_instance(readdir(datadep"dvrptw"; join=true)[1]))
+    )
+end
+
+function Utils.generate_scenario_generator(::DVSPBenchmark)
+    return generate_scenario
+end
+
+function Utils.generate_anticipative_solver(::DVSPBenchmark; kwargs...)
+    return AnticipativeVSPPolicy(; kwargs...)
 end
 
 export DVSPBenchmark, generate_sample, generate_scenario
 export run_policy!,
     GreedyVSPPolicy, LazyVSPPolicy, KleopatraVSPPolicy, AnticipativeVSPPolicy
 
-# export highs_model, filtered_readdir
-
-# export solve_hindsight_problem
-
-# export AbstractDynamicPolicy, BasicDynamicPolicy
-
-# export GreedyPolicy, LazyPolicy, RandomPolicy, Kleopatra
-
-# export run_policy
-
-# export compute_features,
-#     compute_2D_features, compute_critic_features, compute_critic_2D_features, load_dataset
-
-# export VSPInstance,
-#     read_vsp_instance, start_time, env_routes_from_state_routes, state_route_from_env_routes
-# export DVSPEnv, prize_collecting_vsp
-# export anticipative_solver
-# export VSPSolution
-# export load_VSP_dataset
-# export GreedyVSPPolicy,
-#     LazyVSPPolicy, AnticipativeVSPPolicy, run_policy!, KleopatraVSPPolicy
-# export plot_routes, plot_instance, plot_environment, plot_epoch
-# export get_state
-# export nb_epochs, get_epoch_indices
 end
diff --git a/src/DynamicVehicleScheduling/environment/scenario.jl b/src/DynamicVehicleScheduling/environment/scenario.jl
index de5d858..0e9e056 100644
--- a/src/DynamicVehicleScheduling/environment/scenario.jl
+++ b/src/DynamicVehicleScheduling/environment/scenario.jl
@@ -45,3 +45,11 @@ function generate_scenario(
     end
     return Scenario(new_indices, new_service_time, new_start_time)
 end
+
+function generate_scenario(sample::DataSample; kwargs...)
+    return generate_scenario(sample.instance; kwargs...)
+end
+
+function generate_scenario(env::DVSPEnv; kwargs...)
+    return generate_scenario(env.instance; kwargs...)
+end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl b/src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl
similarity index 100%
rename from src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl
rename to src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl b/src/DynamicVehicleScheduling/policy/anticipative_policy.jl
similarity index 100%
rename from src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl
rename to src/DynamicVehicleScheduling/policy/anticipative_policy.jl
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl b/src/DynamicVehicleScheduling/policy/greedy_policy.jl
similarity index 100%
rename from src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl
rename to src/DynamicVehicleScheduling/policy/greedy_policy.jl
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl b/src/DynamicVehicleScheduling/policy/kleopatra_policy.jl
similarity index 100%
rename from src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl
rename to src/DynamicVehicleScheduling/policy/kleopatra_policy.jl
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl b/src/DynamicVehicleScheduling/policy/lazy_policy.jl
similarity index 100%
rename from src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl
rename to src/DynamicVehicleScheduling/policy/lazy_policy.jl
diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl
index 58bc161..67c3d94 100644
--- a/src/Utils/Utils.jl
+++ b/src/Utils/Utils.jl
@@ -20,7 +20,12 @@ include("model_builders.jl")
 export DataSample
 
 export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark
-export generate_dataset, generate_statistical_model, generate_maximizer, generate_sample
+export generate_dataset,
+    generate_statistical_model,
+    generate_maximizer,
+    generate_sample,
+    generate_scenario_generator,
+    generate_anticipative_solver
 export plot_data, compute_gap
 export maximizer_kwargs
 export grid_graph, get_path, path_to_matrix
diff --git a/src/Utils/data_sample.jl b/src/Utils/data_sample.jl
index e9a8a3c..fde1bf3 100644
--- a/src/Utils/data_sample.jl
+++ b/src/Utils/data_sample.jl
@@ -7,10 +7,13 @@ Data sample data structure.
 $TYPEDFIELDS
 """
 @kwdef struct DataSample{
-    I,F<:AbstractArray,S<:Union{AbstractArray,Nothing},C<:Union{AbstractArray,Nothing}
+    I,
+    F<:Union{AbstractArray,Nothing},
+    S<:Union{AbstractArray,Nothing},
+    C<:Union{AbstractArray,Nothing},
 }
     "features"
-    x::F
+    x::F = nothing
     "target cost parameters (optional)"
     θ_true::C = nothing
     "target solution (optional)"
diff --git a/test/dynamic_vsp.jl b/test/dynamic_vsp.jl
new file mode 100644
index 0000000..49c9b77
--- /dev/null
+++ b/test/dynamic_vsp.jl
@@ -0,0 +1,8 @@
+# @testitem "DVSP - parsing" begin
+#     using DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling:
+#         read_vsp_instance, location_count, customer_count
+#     path = joinpath(@__DIR__, "data", "vsp_instance.txt")
+#     instance = read_vsp_instance(path)
+#     @test location_count(instance) == 6
+#     @test customer_count(instance) == 5
+# end

From 7f9d322c226d8db8dd979e932693e587f6e8d7b0 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Mon, 7 Jul 2025 11:15:22 +0200
Subject: [PATCH 08/29] bugfix

---
 src/DecisionFocusedLearningBenchmarks.jl      |  6 ++++--
 .../DynamicVehicleScheduling.jl               |  8 ++++++--
 .../environment/environment.jl                |  4 ++++
 .../environment/scenario.jl                   |  4 ----
 src/Utils/Utils.jl                            |  4 +++-
 src/Utils/interface.jl                        | 20 +++++++++++++++++++
 6 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index 97f9ea6..dfd0a42 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -68,10 +68,12 @@ using .DynamicVehicleScheduling
 
 # Interface
 export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample
-export generate_sample,
-    generate_dataset, generate_scenario_generator, generate_anticipative_solver
+
+export generate_sample, generate_dataset, generate_environments
+export generate_scenario_generator, generate_anticipative_solver
 export generate_statistical_model
 export generate_maximizer, maximizer_kwargs
+
 export objective_value
 export plot_data, plot_instance, plot_solution
 export compute_gap
diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index e380d8e..f8d2df6 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -32,8 +32,8 @@ include("static_vsp/plot.jl")
 # dynamic environment
 include("environment/instance.jl")
 include("environment/state.jl")
-include("environment/environment.jl")
 include("environment/scenario.jl")
+include("environment/environment.jl")
 include("environment/plot.jl")
 
 include("DynamicVSP/algorithms/prize_collecting_vsp.jl")
@@ -65,7 +65,11 @@ function Utils.generate_anticipative_solver(::DVSPBenchmark; kwargs...)
     return AnticipativeVSPPolicy(; kwargs...)
 end
 
-export DVSPBenchmark, generate_sample, generate_scenario
+function Utils.generate_environment(::DVSPBenchmark, instance::Instance; kwargs...)
+    return DVSPEnv(instance; kwargs...)
+end
+
+export DVSPBenchmark, generate_environment # , generate_sample, generate_anticipative_solver
 export run_policy!,
     GreedyVSPPolicy, LazyVSPPolicy, KleopatraVSPPolicy, AnticipativeVSPPolicy
 
diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl
index 8109c0e..a09db9a 100644
--- a/src/DynamicVehicleScheduling/environment/environment.jl
+++ b/src/DynamicVehicleScheduling/environment/environment.jl
@@ -84,3 +84,7 @@ function CommonRLInterface.act!(env::DVSPEnv, routes, scenario=env.scenario)
     add_new_customers!(env.state, env.instance; scenario[current_epoch(env)]...)
     return reward
 end
+
+function generate_scenario(env::DVSPEnv; kwargs...)
+    return generate_scenario(env.instance; kwargs...)
+end
diff --git a/src/DynamicVehicleScheduling/environment/scenario.jl b/src/DynamicVehicleScheduling/environment/scenario.jl
index 0e9e056..cee4fe7 100644
--- a/src/DynamicVehicleScheduling/environment/scenario.jl
+++ b/src/DynamicVehicleScheduling/environment/scenario.jl
@@ -49,7 +49,3 @@ end
 function generate_scenario(sample::DataSample; kwargs...)
     return generate_scenario(sample.instance; kwargs...)
 end
-
-function generate_scenario(env::DVSPEnv; kwargs...)
-    return generate_scenario(env.instance; kwargs...)
-end
diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl
index 67c3d94..7a1e804 100644
--- a/src/Utils/Utils.jl
+++ b/src/Utils/Utils.jl
@@ -25,7 +25,9 @@ export generate_dataset,
     generate_maximizer,
     generate_sample,
     generate_scenario_generator,
-    generate_anticipative_solver
+    generate_anticipative_solver,
+    generate_environment,
+    generate_environments
 export plot_data, compute_gap
 export maximizer_kwargs
 export grid_graph, get_path, path_to_matrix
diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl
index e443a15..3b70f9e 100644
--- a/src/Utils/interface.jl
+++ b/src/Utils/interface.jl
@@ -210,3 +210,23 @@ It follows the same interface as [`AbstractStochasticBenchmark`](@ref), with the
 TODO
 """
 abstract type AbstractDynamicBenchmark <: AbstractStochasticBenchmark end
+
+"""
+    generate_environment(::AbstractDynamicBenchmark, instance; kwargs...)
+
+Initialize an environment for the given dynamic benchmark instance.
+"""
+function generate_environment end
+
+"""
+$TYPEDSIGNATURES
+
+Generate a vector of environments for the given dynamic benchmark and dataset.
+"""
+function generate_environments(
+    bench::AbstractDynamicBenchmark, dataset::Vector{<:DataSample}, kwargs...
+)
+    return map(dataset) do sample
+        generate_environment(bench, sample.instance; kwargs...)
+    end
+end

From 9fe5e86f3d124f0c6b5f1fe88d99c65a35e5dfaa Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Mon, 7 Jul 2025 17:08:24 +0200
Subject: [PATCH 09/29] now anticipative solver sirectly creates an epoch
 dataset

---
 .../DynamicVSP/learning/2d_features.jl        |  13 --
 .../DynamicVehicleScheduling.jl               |  18 ++-
 .../algorithms/anticipative_solver.jl         |  74 ++++++++-
 .../algorithms/prize_collecting_vsp.jl        | 150 +++++++++---------
 .../environment/environment.jl                |  14 +-
 .../learning/2d_features.jl                   |  16 ++
 .../{DynamicVSP => }/learning/dataset.jl      |   0
 .../{DynamicVSP => }/learning/features.jl     |   0
 8 files changed, 176 insertions(+), 109 deletions(-)
 delete mode 100644 src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl
 rename src/DynamicVehicleScheduling/{DynamicVSP => }/algorithms/anticipative_solver.jl (59%)
 rename src/DynamicVehicleScheduling/{DynamicVSP => }/algorithms/prize_collecting_vsp.jl (59%)
 create mode 100644 src/DynamicVehicleScheduling/learning/2d_features.jl
 rename src/DynamicVehicleScheduling/{DynamicVSP => }/learning/dataset.jl (100%)
 rename src/DynamicVehicleScheduling/{DynamicVSP => }/learning/features.jl (100%)

diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl b/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl
deleted file mode 100644
index 7226e9c..0000000
--- a/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl
+++ /dev/null
@@ -1,13 +0,0 @@
-function get_features_meanTimeToRequests(env::DVSPEnv)
-    quantiles = [0.5]
-    a = env.instance.static_instance.duration[env.state.location_indices, 2:end]
-    quantileTimeToRequests = mapslices(x -> quantile(x, quantiles), a; dims=2)
-    return quantileTimeToRequests
-end
-
-function compute_2D_features(env::DVSPEnv)
-    state = env.state
-    timeDepotRequest = state.state_instance.duration[:, 1][state.is_postponable]
-    quantileTimeToRequests = get_features_meanTimeToRequests(env)[state.is_postponable]
-    return hcat(timeDepotRequest, quantileTimeToRequests)'
-end
diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index f8d2df6..0b5649a 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -36,12 +36,12 @@ include("environment/scenario.jl")
 include("environment/environment.jl")
 include("environment/plot.jl")
 
-include("DynamicVSP/algorithms/prize_collecting_vsp.jl")
-include("DynamicVSP/algorithms/anticipative_solver.jl")
+include("algorithms/prize_collecting_vsp.jl")
+include("algorithms/anticipative_solver.jl")
 
-include("DynamicVSP/learning/features.jl")
-include("DynamicVSP/learning/2d_features.jl")
-include("DynamicVSP/learning/dataset.jl")
+include("learning/features.jl")
+include("learning/2d_features.jl")
+include("learning/dataset.jl")
 
 include("policy/abstract_vsp_policy.jl")
 include("policy/greedy_policy.jl")
@@ -62,14 +62,18 @@ function Utils.generate_scenario_generator(::DVSPBenchmark)
 end
 
 function Utils.generate_anticipative_solver(::DVSPBenchmark; kwargs...)
-    return AnticipativeVSPPolicy(; kwargs...)
+    return anticipative_solver
 end
 
 function Utils.generate_environment(::DVSPBenchmark, instance::Instance; kwargs...)
     return DVSPEnv(instance; kwargs...)
 end
 
-export DVSPBenchmark, generate_environment # , generate_sample, generate_anticipative_solver
+function Utils.generate_maximizer(::DVSPBenchmark)
+    return prize_collecting_vsp
+end
+
+export DVSPBenchmark #, generate_environment # , generate_sample, generate_anticipative_solver
 export run_policy!,
     GreedyVSPPolicy, LazyVSPPolicy, KleopatraVSPPolicy, AnticipativeVSPPolicy
 
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
similarity index 59%
rename from src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl
rename to src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
index ef897e5..4ff78e2 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl
+++ b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
@@ -116,6 +116,76 @@ function anticipative_solver(
 
     optimize!(model)
 
-    return JuMP.objective_value(model),
-    retrieve_routes_anticipative(value.(y), env, customer_index)
+    obj = JuMP.objective_value(model)
+    epoch_routes = retrieve_routes_anticipative(value.(y), env, customer_index)
+
+    epoch_indices = Vector{Int}[]
+    N = 1
+    indices = [1]
+    for epoch in 1:last_epoch
+        M = length(scenario.indices[epoch])
+        indices = vcat(indices, (N + 1):(N + M))
+        push!(epoch_indices, copy(indices))
+        N = N + M
+        epoch_routes[epoch]
+        dispatched = vcat(epoch_routes[epoch]...)
+        indices = setdiff(indices, dispatched)
+    end
+
+    indices = vcat(1, scenario.indices...)
+    start_time = vcat(0.0, scenario.start_time...)
+    service_time = vcat(0.0, scenario.service_time...)
+
+    dataset = map(1:last_epoch) do epoch
+        routes = epoch_routes[epoch]
+        epoch_customers = epoch_indices[epoch]
+        # y_true = [
+        #     map(idx -> findfirst(==(idx), epoch_customers), route) for route in routes
+        # ]
+
+        y_true =
+            VSPSolution(
+                Vector{Int}[
+                    map(idx -> findfirst(==(idx), epoch_customers), route) for
+                    route in routes
+                ];
+                max_index=length(epoch_customers),
+            ).edge_matrix
+
+        location_indices = indices[epoch_customers]
+        new_coordinates = env.instance.static_instance.coordinate[location_indices]
+        new_start_time = start_time[epoch_customers]
+        new_service_time = service_time[epoch_customers]
+        new_duration = env.instance.static_instance.duration[
+            location_indices, location_indices
+        ]
+        static_instance = StaticInstance(
+            new_coordinates, new_service_time, new_start_time, new_duration
+        )
+
+        is_must_dispatch = falses(length(location_indices))
+        is_postponable = falses(length(location_indices))
+
+        epoch_duration = env.instance.epoch_duration
+        Δ_dispatch = env.instance.Δ_dispatch
+        planning_start_time = (epoch - 1) * epoch_duration + Δ_dispatch
+        is_must_dispatch[2:end] .=
+            planning_start_time .+ epoch_duration .+ @view(new_duration[1, 2:end]) .>
+            new_start_time[2:end]
+        is_postponable[2:end] .= .!is_must_dispatch[2:end]
+
+        state = DVSPState(;
+            state_instance=static_instance,
+            is_must_dispatch,
+            is_postponable,
+            location_indices,
+            current_epoch=epoch,
+        )
+
+        x = compute_2D_features(state, env.instance)
+
+        return DataSample(; instance=state, y_true, x)
+    end
+
+    return obj, dataset
 end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl b/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl
similarity index 59%
rename from src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl
rename to src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl
index 75af6a4..14c51f9 100644
--- a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl
+++ b/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl
@@ -127,83 +127,83 @@ function prize_collecting_vsp(
     return retrieve_routes(value.(y), graph)
 end
 
-# ?
-function prize_collecting_vsp_Q(
-    θ::AbstractVector,
-    vals::AbstractVector;
-    instance::DVSPState,
-    model_builder=highs_model,
-    kwargs...,
-)
-    (; duration) = instance.instance
-    graph = create_graph(instance)
-    model = model_builder()
-    set_silent(model)
-    nb_nodes = nv(graph)
-    job_indices = 2:(nb_nodes)
-    @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0)
-    θ_ext = fill(0.0, location_count(instance.instance))  # no prize for must dispatch requests, only hard constraints
-    θ_ext[instance.is_postponable] .= θ
-    # v_ext = fill(0.0, nb_locations(instance.instance))  # no prize for must dispatch requests, only hard constraints
-    # v_ext[instance.is_postponable] .= vals
-    @objective(
-        model,
-        Max,
-        sum(
-            (θ_ext[dst(edge)] + vals[dst(edge)] - duration[src(edge), dst(edge)]) *
-            y[src(edge), dst(edge)] for edge in edges(graph)
-        )
-    )
-    @constraint(
-        model,
-        flow[i in 2:nb_nodes],
-        sum(y[j, i] for j in inneighbors(graph, i)) ==
-            sum(y[i, j] for j in outneighbors(graph, i))
-    )
-    @constraint(
-        model, demand[i in job_indices], sum(y[j, i] for j in inneighbors(graph, i)) <= 1
-    )
-    # must dispatch constraints
-    @constraint(
-        model,
-        demand_must_dispatch[i in job_indices; instance.is_must_dispatch[i]],
-        sum(y[j, i] for j in inneighbors(graph, i)) == 1
-    )
-    optimize!(model)
-    return retrieve_routes(value.(y), graph)
-end
+# # ?
+# function prize_collecting_vsp_Q(
+#     θ::AbstractVector,
+#     vals::AbstractVector;
+#     instance::DVSPState,
+#     model_builder=highs_model,
+#     kwargs...,
+# )
+#     (; duration) = instance.instance
+#     graph = create_graph(instance)
+#     model = model_builder()
+#     set_silent(model)
+#     nb_nodes = nv(graph)
+#     job_indices = 2:(nb_nodes)
+#     @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0)
+#     θ_ext = fill(0.0, location_count(instance.instance))  # no prize for must dispatch requests, only hard constraints
+#     θ_ext[instance.is_postponable] .= θ
+#     # v_ext = fill(0.0, nb_locations(instance.instance))  # no prize for must dispatch requests, only hard constraints
+#     # v_ext[instance.is_postponable] .= vals
+#     @objective(
+#         model,
+#         Max,
+#         sum(
+#             (θ_ext[dst(edge)] + vals[dst(edge)] - duration[src(edge), dst(edge)]) *
+#             y[src(edge), dst(edge)] for edge in edges(graph)
+#         )
+#     )
+#     @constraint(
+#         model,
+#         flow[i in 2:nb_nodes],
+#         sum(y[j, i] for j in inneighbors(graph, i)) ==
+#             sum(y[i, j] for j in outneighbors(graph, i))
+#     )
+#     @constraint(
+#         model, demand[i in job_indices], sum(y[j, i] for j in inneighbors(graph, i)) <= 1
+#     )
+#     # must dispatch constraints
+#     @constraint(
+#         model,
+#         demand_must_dispatch[i in job_indices; instance.is_must_dispatch[i]],
+#         sum(y[j, i] for j in inneighbors(graph, i)) == 1
+#     )
+#     optimize!(model)
+#     return retrieve_routes(value.(y), graph)
+# end
 
-function my_objective_value(θ, routes; instance)
-    (; duration) = instance.instance
-    total = 0.0
-    θ_ext = fill(0.0, location_count(instance))
-    θ_ext[instance.is_postponable] .= θ
-    for route in routes
-        for (u, v) in partition(vcat(1, route), 2, 1)
-            total += θ_ext[v] - duration[u, v]
-        end
-    end
-    return -total
-end
+# function my_objective_value(θ, routes; instance)
+#     (; duration) = instance.instance
+#     total = 0.0
+#     θ_ext = fill(0.0, location_count(instance))
+#     θ_ext[instance.is_postponable] .= θ
+#     for route in routes
+#         for (u, v) in partition(vcat(1, route), 2, 1)
+#             total += θ_ext[v] - duration[u, v]
+#         end
+#     end
+#     return -total
+# end
 
-function _objective_value(θ, routes; instance)
-    (; duration) = instance.instance
-    total = 0.0
-    θ_ext = fill(0.0, location_count(instance))
-    θ_ext[instance.is_postponable] .= θ
-    mapping = cumsum(instance.is_postponable)
-    g = falses(length(θ))
-    for route in routes
-        for (u, v) in partition(vcat(1, route), 2, 1)
-            total -= duration[u, v]
-            if instance.is_postponable[v]
-                total += θ_ext[v]
-                g[mapping[v]] = 1
-            end
-        end
-    end
-    return -total, g
-end
+# function _objective_value(θ, routes; instance)
+#     (; duration) = instance.instance
+#     total = 0.0
+#     θ_ext = fill(0.0, location_count(instance))
+#     θ_ext[instance.is_postponable] .= θ
+#     mapping = cumsum(instance.is_postponable)
+#     g = falses(length(θ))
+#     for route in routes
+#         for (u, v) in partition(vcat(1, route), 2, 1)
+#             total -= duration[u, v]
+#             if instance.is_postponable[v]
+#                 total += θ_ext[v]
+#                 g[mapping[v]] = 1
+#             end
+#         end
+#     end
+#     return -total, g
+# end
 
 # function ChainRulesCore.rrule(::typeof(my_objective_value), θ, routes; instance)
 #     total, g = _objective_value(θ, routes; instance)
diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl
index a09db9a..ffab69b 100644
--- a/src/DynamicVehicleScheduling/environment/environment.jl
+++ b/src/DynamicVehicleScheduling/environment/environment.jl
@@ -52,15 +52,6 @@ Check if the episode is terminated, i.e. if the current epoch is the last one.
 """
 CommonRLInterface.terminated(env::DVSPEnv) = current_epoch(env) >= last_epoch(env)
 
-"""
-draw new customers in scenario
-"""
-function draw_next_epoch!(env::DVSPEnv, scenario=env.scenario)
-    env.state.current_epoch += 1
-
-    return nothing
-end
-
 """
 $TYPEDSIGNATURES
 
@@ -78,10 +69,9 @@ remove dispatched customers, advance time, and add new requests to the environme
 function CommonRLInterface.act!(env::DVSPEnv, routes, scenario=env.scenario)
     reward = -apply_routes!(env.state, routes)
     env.state.current_epoch += 1
-    if current_epoch(env) > last_epoch(env)
-        return nothing
+    if current_epoch(env) <= last_epoch(env)
+        add_new_customers!(env.state, env.instance; scenario[current_epoch(env)]...)
     end
-    add_new_customers!(env.state, env.instance; scenario[current_epoch(env)]...)
     return reward
 end
 
diff --git a/src/DynamicVehicleScheduling/learning/2d_features.jl b/src/DynamicVehicleScheduling/learning/2d_features.jl
new file mode 100644
index 0000000..6e23810
--- /dev/null
+++ b/src/DynamicVehicleScheduling/learning/2d_features.jl
@@ -0,0 +1,16 @@
+function get_features_meanTimeToRequests(state::DVSPState, instance::Instance)
+    quantiles = [0.5]
+    a = instance.static_instance.duration[state.location_indices, 2:end]
+    quantileTimeToRequests = mapslices(x -> quantile(x, quantiles), a; dims=2)
+    return quantileTimeToRequests
+end
+
+function compute_2D_features(state::DVSPState, instance::Instance)
+    timeDepotRequest = state.state_instance.duration[:, 1][state.is_postponable]
+    quantileTimeToRequests = get_features_meanTimeToRequests(state, instance)[state.is_postponable]
+    return hcat(timeDepotRequest, quantileTimeToRequests)'
+end
+
+function compute_2D_features(env::DVSPEnv)
+    return compute_2D_features(env.state, env.instance)
+end
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl b/src/DynamicVehicleScheduling/learning/dataset.jl
similarity index 100%
rename from src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl
rename to src/DynamicVehicleScheduling/learning/dataset.jl
diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl b/src/DynamicVehicleScheduling/learning/features.jl
similarity index 100%
rename from src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl
rename to src/DynamicVehicleScheduling/learning/features.jl

From 67a0fa9ecc70243d64fab6bbf0bdd08e4006b44e Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Mon, 7 Jul 2025 18:00:39 +0200
Subject: [PATCH 10/29] fix tests and cleanup

---
 Project.toml                                  |   2 +
 docs/src/warcraft.md                          | 155 +++++++++++
 .../DynamicVehicleScheduling.jl               |  39 ++-
 .../abstract_policy.jl                        |   5 -
 .../environment/environment.jl                |   5 +-
 .../environment/instance.jl                   |   8 -
 .../environment/plot.jl                       | 242 +++++++++---------
 .../learning/dataset.jl                       |  37 ---
 src/DynamicVehicleScheduling/maximizer.jl     |  25 ++
 .../policy/abstract_vsp_policy.jl             |   6 +
 src/DynamicVehicleScheduling/utils.jl         |   9 -
 11 files changed, 340 insertions(+), 193 deletions(-)
 create mode 100644 docs/src/warcraft.md
 delete mode 100644 src/DynamicVehicleScheduling/abstract_policy.jl
 delete mode 100644 src/DynamicVehicleScheduling/learning/dataset.jl
 create mode 100644 src/DynamicVehicleScheduling/maximizer.jl

diff --git a/Project.toml b/Project.toml
index b9695a6..03e2d27 100644
--- a/Project.toml
+++ b/Project.toml
@@ -13,6 +13,7 @@ Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
 HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
 Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0"
+InferOpt = "4846b161-c94e-4150-8dac-c7ae193c601f"
 Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
 IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
@@ -40,6 +41,7 @@ Flux = "0.14, 0.15, 0.16"
 Graphs = "1.11"
 HiGHS = "1.9"
 Images = "0.26.1"
+InferOpt = "0.7.0"
 Ipopt = "1.6"
 IterTools = "1.10.0"
 JSON = "0.21.4"
diff --git a/docs/src/warcraft.md b/docs/src/warcraft.md
new file mode 100644
index 0000000..c3400e7
--- /dev/null
+++ b/docs/src/warcraft.md
@@ -0,0 +1,155 @@
+```@meta
+EditURL = "tutorials/warcraft.jl"
+```
+
+# Path-finding on image maps
+
+In this tutorial, we showcase DecisionFocusedLearningBenchmarks.jl capabilities on one of its main benchmarks: the Warcraft benchmark.
+This benchmark problem is a simple path-finding problem where the goal is to find the shortest path between the top left and bottom right corners of a given image map.
+The map is represented as a 2D image representing a 12x12 grid, each cell having an unknown travel cost depending on the terrain type.
+
+First, let's load the package and create a benchmark object as follows:
+
+````@example warcraft
+using DecisionFocusedLearningBenchmarks
+b = WarcraftBenchmark()
+````
+
+## Dataset generation
+
+These benchmark objects behave as generators that can generate various needed elements in order to build an algorithm to tackle the problem.
+First of all, all benchmarks are capable of generating datasets as needed, using the [`generate_dataset`](@ref) method.
+This method takes as input the benchmark object for which the dataset is to be generated, and a second argument specifying the number of samples to generate:
+
+````@example warcraft
+dataset = generate_dataset(b, 50);
+nothing #hide
+````
+
+We obtain a vector of [`DataSample`](@ref) objects, containing all needed data for the problem.
+Subdatasets can be created through regular slicing:
+
+````@example warcraft
+train_dataset, test_dataset = dataset[1:45], dataset[46:50]
+````
+
+And getting an individual sample will return a [`DataSample`](@ref) with four fields: `x`, `instance`, `θ`, and `y`:
+
+````@example warcraft
+sample = test_dataset[1]
+````
+
+`x` correspond to the input features, i.e. the input image (3D array) in the Warcraft benchmark case:
+
+````@example warcraft
+x = sample.x
+````
+
+`θ_true` correspond to the true unknown terrain weights. We use the opposite of the true weights in order to formulate the optimization problem as a maximization problem:
+
+````@example warcraft
+θ_true = sample.θ_true
+````
+
+`y_true` correspond to the optimal shortest path, encoded as a binary matrix:
+
+````@example warcraft
+y_true = sample.y_true
+````
+
+`instance` is not used in this benchmark, therefore set to nothing:
+
+````@example warcraft
+isnothing(sample.instance)
+````
+
+For some benchmarks, we provide the following plotting method [`plot_data`](@ref) to visualize the data:
+
+````@example warcraft
+plot_data(b, sample)
+````
+
+We can see here the terrain image, the true terrain weights, and the true shortest path avoiding the high cost cells.
+
+## Building a pipeline
+
+DecisionFocusedLearningBenchmarks also provides methods to build an hybrid machine learning and combinatorial optimization pipeline for the benchmark.
+First, the [`generate_statistical_model`](@ref) method generates a machine learning predictor to predict cell weights from the input image:
+
+````@example warcraft
+model = generate_statistical_model(b)
+````
+
+In the case of the Warcraft benchmark, the model is a convolutional neural network built using the Flux.jl package.
+
+````@example warcraft
+θ = model(x)
+````
+
+Note that the model is not trained yet, and its parameters are randomly initialized.
+
+Finally, the [`generate_maximizer`](@ref) method can be used to generate a combinatorial optimization algorithm that takes the predicted cell weights as input and returns the corresponding shortest path:
+
+````@example warcraft
+maximizer = generate_maximizer(b; dijkstra=true)
+````
+
+In the case o fthe Warcraft benchmark, the method has an additional keyword argument to chose the algorithm to use: Dijkstra's algorithm or Bellman-Ford algorithm.
+
+````@example warcraft
+y = maximizer(θ)
+````
+
+As we can see, currently the pipeline predicts random noise as cell weights, and therefore the maximizer returns a straight line path.
+
+````@example warcraft
+plot_data(b, DataSample(; x, θ_true=θ, y_true=y))
+````
+
+We can evaluate the current pipeline performance using the optimality gap metric:
+
+````@example warcraft
+starting_gap = compute_gap(b, test_dataset, model, maximizer)
+````
+
+## Using a learning algorithm
+
+We can now train the model using the InferOpt.jl package:
+
+````@example warcraft
+using InferOpt
+using Flux
+using Plots
+
+perturbed_maximizer = PerturbedMultiplicative(maximizer; ε=0.2, nb_samples=100)
+loss = FenchelYoungLoss(perturbed_maximizer)
+
+starting_gap = compute_gap(b, test_dataset, model, maximizer)
+
+opt_state = Flux.setup(Adam(1e-3), model)
+loss_history = Float64[]
+for epoch in 1:50
+    val, grads = Flux.withgradient(model) do m
+        sum(loss(m(x), y_true) for (; x, y_true) in train_dataset) / length(train_dataset)
+    end
+    Flux.update!(opt_state, model, grads[1])
+    push!(loss_history, val)
+end
+
+plot(loss_history; xlabel="Epoch", ylabel="Loss", title="Training loss")
+````
+
+````@example warcraft
+final_gap = compute_gap(b, test_dataset, model, maximizer)
+````
+
+````@example warcraft
+θ = model(x)
+y = maximizer(θ)
+plot_data(b, DataSample(; x, θ_true=θ, y_true=y))
+````
+
+---
+
+*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
+
diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index 0b5649a..34476b7 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -5,11 +5,10 @@ using ..Utils
 using Base: @kwdef
 using CommonRLInterface: CommonRLInterface, AbstractEnv, reset!, terminated, observe, act!
 using DataDeps: @datadep_str
-# using ChainRulesCore
 using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
 using Graphs
 using HiGHS
-# using InferOpt
+using InferOpt: LinearMaximizer
 using IterTools: partition
 using JSON
 using JuMP
@@ -21,8 +20,6 @@ using Statistics: mean, quantile
 
 include("utils.jl")
 
-include("abstract_policy.jl")
-
 # static vsp stuff
 include("static_vsp/instance.jl")
 include("static_vsp/parsing.jl")
@@ -41,7 +38,6 @@ include("algorithms/anticipative_solver.jl")
 
 include("learning/features.jl")
 include("learning/2d_features.jl")
-include("learning/dataset.jl")
 
 include("policy/abstract_vsp_policy.jl")
 include("policy/greedy_policy.jl")
@@ -49,12 +45,33 @@ include("policy/lazy_policy.jl")
 include("policy/anticipative_policy.jl")
 include("policy/kleopatra_policy.jl")
 
-struct DVSPBenchmark <: AbstractDynamicBenchmark end
+include("maximizer.jl")
+
+"""
+$TYPEDEF
+
+Abstract type for dynamic vehicle scheduling benchmarks.
+"""
+@kwdef struct DVSPBenchmark <: AbstractDynamicBenchmark
+    max_requests_per_epoch::Int = 10
+    Δ_dispatch::Float64 = 1.0
+    epoch_duration::Float64 = 1.0
+end
 
-function Utils.generate_sample(b::DVSPBenchmark, rng::AbstractRNG)
-    return DataSample(;
-        instance=Instance(read_vsp_instance(readdir(datadep"dvrptw"; join=true)[1]))
-    )
+function Utils.generate_dataset(b::DVSPBenchmark, dataset_size::Int=1)
+    (; max_requests_per_epoch, Δ_dispatch, epoch_duration) = b
+    files = readdir(datadep"dvrptw"; join=true)
+    dataset_size = min(dataset_size, length(files))
+    return [
+        DataSample(;
+            instance=Instance(
+                read_vsp_instance(files[i]);
+                max_requests_per_epoch,
+                Δ_dispatch,
+                epoch_duration,
+            ),
+        ) for i in 1:dataset_size
+    ]
 end
 
 function Utils.generate_scenario_generator(::DVSPBenchmark)
@@ -70,7 +87,7 @@ function Utils.generate_environment(::DVSPBenchmark, instance::Instance; kwargs.
 end
 
 function Utils.generate_maximizer(::DVSPBenchmark)
-    return prize_collecting_vsp
+    return LinearMaximizer(oracle; g, h)
 end
 
 export DVSPBenchmark #, generate_environment # , generate_sample, generate_anticipative_solver
diff --git a/src/DynamicVehicleScheduling/abstract_policy.jl b/src/DynamicVehicleScheduling/abstract_policy.jl
deleted file mode 100644
index 951efd0..0000000
--- a/src/DynamicVehicleScheduling/abstract_policy.jl
+++ /dev/null
@@ -1,5 +0,0 @@
-abstract type AbstractDynamicPolicy end
-
-function (π::AbstractDynamicPolicy)(env; kwargs...)
-    throw("Not implemented")
-end
diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl
index ffab69b..0bbc4ae 100644
--- a/src/DynamicVehicleScheduling/environment/environment.jl
+++ b/src/DynamicVehicleScheduling/environment/environment.jl
@@ -45,12 +45,13 @@ $TYPEDSIGNATURES
 Get the planning start time of the environment, i.e. the time at which vehicles routes dispatched in current epoch can depart.
 """
 planning_start_time(env::DVSPEnv) = time(env) + Δ_dispatch(env)
+
 """
 $TYPEDSIGNATURES
 
 Check if the episode is terminated, i.e. if the current epoch is the last one.
 """
-CommonRLInterface.terminated(env::DVSPEnv) = current_epoch(env) >= last_epoch(env)
+CommonRLInterface.terminated(env::DVSPEnv) = current_epoch(env) > last_epoch(env)
 
 """
 $TYPEDSIGNATURES
@@ -69,7 +70,7 @@ remove dispatched customers, advance time, and add new requests to the environme
 function CommonRLInterface.act!(env::DVSPEnv, routes, scenario=env.scenario)
     reward = -apply_routes!(env.state, routes)
     env.state.current_epoch += 1
-    if current_epoch(env) <= last_epoch(env)
+    if !CommonRLInterface.terminated(env)
         add_new_customers!(env.state, env.instance; scenario[current_epoch(env)]...)
     end
     return reward
diff --git a/src/DynamicVehicleScheduling/environment/instance.jl b/src/DynamicVehicleScheduling/environment/instance.jl
index b375077..17d5e9d 100644
--- a/src/DynamicVehicleScheduling/environment/instance.jl
+++ b/src/DynamicVehicleScheduling/environment/instance.jl
@@ -14,8 +14,6 @@ Instance data structure for the dynamic vehicle scheduling problem.
     epoch_duration::T = 1.0
     "last epoch index"
     last_epoch::Int
-    # "seed for customer sampling"
-    # seed::S
 end
 
 function Instance(
@@ -44,9 +42,3 @@ end
 epoch_duration(instance::Instance) = instance.epoch_duration
 last_epoch(instance::Instance) = instance.last_epoch
 max_requests_per_epoch(instance::Instance) = instance.max_requests_per_epoch
-# static_instance(instance::Instance) = instance.static_instance
-
-# duration(instance::Instance) = duration(instance.static_instance)
-# service_time(instance::Instance) = service_time(instance.static_instance)
-# coordinate(instance::Instance) = coordinate(instance.static_instance)
-# start_time(instance::Instance) = start_time(instance.static_instance)
diff --git a/src/DynamicVehicleScheduling/environment/plot.jl b/src/DynamicVehicleScheduling/environment/plot.jl
index 409ad79..53831e8 100644
--- a/src/DynamicVehicleScheduling/environment/plot.jl
+++ b/src/DynamicVehicleScheduling/environment/plot.jl
@@ -1,134 +1,134 @@
-"""
-$TYPEDSIGNATURES
+# """
+# $TYPEDSIGNATURES
 
-Plot the environment of a DVSPEnv, restricted to the given `epoch_indices` (all epoch if not given).
-"""
-function plot_environment(
-    env::DVSPEnv;
-    customer_markersize=4,
-    depot_markersize=7,
-    alpha_depot=0.8,
-    depot_color=:lightgreen,
-    epoch_indices=nothing,
-    kwargs...,
-)
-    draw_all_epochs!(env)
+# Plot the environment of a DVSPEnv, restricted to the given `epoch_indices` (all epoch if not given).
+# """
+# function plot_environment(
+#     env::DVSPEnv;
+#     customer_markersize=4,
+#     depot_markersize=7,
+#     alpha_depot=0.8,
+#     depot_color=:lightgreen,
+#     epoch_indices=nothing,
+#     kwargs...,
+# )
+#     draw_all_epochs!(env)
 
-    epoch_appearance = env.request_epoch
-    coordinates = coordinate(get_state(env))
+#     epoch_appearance = env.request_epoch
+#     coordinates = coordinate(get_state(env))
 
-    epoch_indices = isnothing(epoch_indices) ? get_epoch_indices(env) : epoch_indices
+#     epoch_indices = isnothing(epoch_indices) ? get_epoch_indices(env) : epoch_indices
 
-    xlims = (minimum(c.x for c in coordinates), maximum(c.x for c in coordinates))
-    ylims = (minimum(c.y for c in coordinates), maximum(c.y for c in coordinates))
+#     xlims = (minimum(c.x for c in coordinates), maximum(c.x for c in coordinates))
+#     ylims = (minimum(c.y for c in coordinates), maximum(c.y for c in coordinates))
 
-    fig = plot(;
-        legend=:topleft,
-        xlabel="x coordinate",
-        ylabel="y coordinate",
-        xlims,
-        ylims,
-        kwargs...,
-    )
+#     fig = plot(;
+#         legend=:topleft,
+#         xlabel="x coordinate",
+#         ylabel="y coordinate",
+#         xlims,
+#         ylims,
+#         kwargs...,
+#     )
 
-    for epoch in epoch_indices
-        requests = findall(epoch_appearance .== epoch)
-        x = [coordinates[request].x for request in requests]
-        y = [coordinates[request].y for request in requests]
-        scatter!(
-            fig, x, y; label="Epoch $epoch", marker=:circle, markersize=customer_markersize
-        )
-    end
-    scatter!(
-        fig,
-        [coordinates[1].x],
-        [coordinates[1].y];
-        label="Depot",
-        markercolor=depot_color,
-        marker=:rect,
-        markersize=depot_markersize,
-        alpha=alpha_depot,
-    )
+#     for epoch in epoch_indices
+#         requests = findall(epoch_appearance .== epoch)
+#         x = [coordinates[request].x for request in requests]
+#         y = [coordinates[request].y for request in requests]
+#         scatter!(
+#             fig, x, y; label="Epoch $epoch", marker=:circle, markersize=customer_markersize
+#         )
+#     end
+#     scatter!(
+#         fig,
+#         [coordinates[1].x],
+#         [coordinates[1].y];
+#         label="Depot",
+#         markercolor=depot_color,
+#         marker=:rect,
+#         markersize=depot_markersize,
+#         alpha=alpha_depot,
+#     )
 
-    return fig
-end
+#     return fig
+# end
 
-"""
-$TYPEDSIGNATURES
+# """
+# $TYPEDSIGNATURES
 
-Plot the given `routes`` for a VSP `state`.
-"""
-function plot_epoch(state::DVSPState, routes; kwargs...)
-    (; coordinate, start_time) = state.instance
-    x_depot = coordinate[1].x
-    y_depot = coordinate[1].y
-    X = [p.x for p in coordinate]
-    Y = [p.y for p in coordinate]
-    markersize = 5
-    fig = plot(;
-        legend=:topleft, xlabel="x", ylabel="y", clim=(0.0, maximum(start_time)), kwargs...
-    )
-    for route in routes
-        x_points = vcat(x_depot, X[route], x_depot)
-        y_points = vcat(y_depot, Y[route], y_depot)
-        plot!(fig, x_points, y_points; label=nothing)
-    end
-    scatter!(
-        fig,
-        [x_depot],
-        [y_depot];
-        label="depot",
-        markercolor=:lightgreen,
-        markersize,
-        marker=:rect,
-    )
-    if sum(state.is_postponable) > 0
-        scatter!(
-            fig,
-            X[state.is_postponable],
-            Y[state.is_postponable];
-            label="Postponable customers",
-            marker_z=start_time[state.is_postponable],
-            markersize,
-            colormap=:turbo,
-            marker=:utriangle,
-        )
-    end
-    if sum(state.is_must_dispatch) > 0
-        scatter!(
-            fig,
-            X[state.is_must_dispatch],
-            Y[state.is_must_dispatch];
-            label="Must-dispatch customers",
-            marker_z=start_time[state.is_must_dispatch],
-            markersize,
-            colormap=:turbo,
-            marker=:star5,
-        )
-    end
-    return fig
-end
+# Plot the given `routes`` for a VSP `state`.
+# """
+# function plot_epoch(state::DVSPState, routes; kwargs...)
+#     (; coordinate, start_time) = state.instance
+#     x_depot = coordinate[1].x
+#     y_depot = coordinate[1].y
+#     X = [p.x for p in coordinate]
+#     Y = [p.y for p in coordinate]
+#     markersize = 5
+#     fig = plot(;
+#         legend=:topleft, xlabel="x", ylabel="y", clim=(0.0, maximum(start_time)), kwargs...
+#     )
+#     for route in routes
+#         x_points = vcat(x_depot, X[route], x_depot)
+#         y_points = vcat(y_depot, Y[route], y_depot)
+#         plot!(fig, x_points, y_points; label=nothing)
+#     end
+#     scatter!(
+#         fig,
+#         [x_depot],
+#         [y_depot];
+#         label="depot",
+#         markercolor=:lightgreen,
+#         markersize,
+#         marker=:rect,
+#     )
+#     if sum(state.is_postponable) > 0
+#         scatter!(
+#             fig,
+#             X[state.is_postponable],
+#             Y[state.is_postponable];
+#             label="Postponable customers",
+#             marker_z=start_time[state.is_postponable],
+#             markersize,
+#             colormap=:turbo,
+#             marker=:utriangle,
+#         )
+#     end
+#     if sum(state.is_must_dispatch) > 0
+#         scatter!(
+#             fig,
+#             X[state.is_must_dispatch],
+#             Y[state.is_must_dispatch];
+#             label="Must-dispatch customers",
+#             marker_z=start_time[state.is_must_dispatch],
+#             markersize,
+#             colormap=:turbo,
+#             marker=:star5,
+#         )
+#     end
+#     return fig
+# end
 
-"""
-$TYPEDSIGNATURES
+# """
+# $TYPEDSIGNATURES
 
-Create a plot of routes for each epoch.
-"""
-function plot_routes(env::DVSPEnv, routes; epoch_indices=nothing, kwargs...)
-    reset!(env)
-    epoch_indices = isnothing(epoch_indices) ? get_epoch_indices(env) : epoch_indices
+# Create a plot of routes for each epoch.
+# """
+# function plot_routes(env::DVSPEnv, routes; epoch_indices=nothing, kwargs...)
+#     reset!(env)
+#     epoch_indices = isnothing(epoch_indices) ? get_epoch_indices(env) : epoch_indices
 
-    coordinates = env.config.static_instance.coordinate
-    xlims = (minimum(c.x for c in coordinates), maximum(c.x for c in coordinates))
-    ylims = (minimum(c.y for c in coordinates), maximum(c.y for c in coordinates))
+#     coordinates = env.config.static_instance.coordinate
+#     xlims = (minimum(c.x for c in coordinates), maximum(c.x for c in coordinates))
+#     ylims = (minimum(c.y for c in coordinates), maximum(c.y for c in coordinates))
 
-    figs = map(epoch_indices) do epoch
-        s = next_epoch!(env)
-        fig = plot_epoch(
-            s, state_route_from_env_routes(env, routes[epoch]); xlims, ylims, kwargs...
-        )
-        apply_decision!(env, routes[epoch])
-        return fig
-    end
-    return figs
-end
+#     figs = map(epoch_indices) do epoch
+#         s = next_epoch!(env)
+#         fig = plot_epoch(
+#             s, state_route_from_env_routes(env, routes[epoch]); xlims, ylims, kwargs...
+#         )
+#         apply_decision!(env, routes[epoch])
+#         return fig
+#     end
+#     return figs
+# end
diff --git a/src/DynamicVehicleScheduling/learning/dataset.jl b/src/DynamicVehicleScheduling/learning/dataset.jl
deleted file mode 100644
index da37b59..0000000
--- a/src/DynamicVehicleScheduling/learning/dataset.jl
+++ /dev/null
@@ -1,37 +0,0 @@
-function load_VSP_dataset(
-    datadir::String; model_builder=highs_model, use_2D_features=false, kwargs...
-)
-    instances_files = filtered_readdir(datadir)
-    X = Tuple{Matrix{Float32},DVSPState{VSPInstance{Float64}}}[]
-    Y = BitMatrix[]
-
-    for (i, f) in enumerate(instances_files)
-        static_instance = read_vsp_instance((joinpath(datadir, f)))
-        env = DVSPEnv(static_instance; seed=i, kwargs...)
-
-        # Compute the anticipative policy
-        routes_anticipative = anticipative_solver(env; model_builder)
-        reset!(env)
-        for e in eachindex(routes_anticipative)
-            next_epoch!(env)
-            # Store the state
-            state = env.state
-            features = Matrix(
-                use_2D_features ? compute_2D_features(env) : compute_features(env)
-            )
-            push!(X, (features, state))
-            routes = routes_anticipative[e]
-            # Store the solution
-            push!(
-                Y,
-                VSPSolution(
-                    state_route_from_env_routes(env, routes);
-                    max_index=location_count(state.instance),
-                ).edge_matrix,
-            )
-            # Update the environment
-            apply_decision!(env, routes)
-        end
-    end
-    return X, Y
-end
diff --git a/src/DynamicVehicleScheduling/maximizer.jl b/src/DynamicVehicleScheduling/maximizer.jl
new file mode 100644
index 0000000..eecbf57
--- /dev/null
+++ b/src/DynamicVehicleScheduling/maximizer.jl
@@ -0,0 +1,25 @@
+function oracle(θ; instance::DVSPState, kwargs...)
+    routes = prize_collecting_vsp(θ; instance=instance, kwargs...)
+    return VSPSolution(
+        routes; max_index=location_count(instance.state_instance)
+    ).edge_matrix
+end
+
+function g(y; instance, kwargs...)
+    return vec(sum(y[:, instance.is_postponable]; dims=1))
+end
+
+function h(y, duration)
+    value = 0.0
+    N = size(duration, 1)
+    for i in 1:N
+        for j in 1:N
+            value -= y[i, j] * duration[i, j]
+        end
+    end
+    return value
+end
+
+function h(y; instance, kwargs...)
+    return h(y, instance.state_instance.duration)
+end
diff --git a/src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl b/src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl
index bd640fe..0a1f755 100644
--- a/src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl
+++ b/src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl
@@ -1,3 +1,9 @@
+abstract type AbstractDynamicPolicy end
+
+function (π::AbstractDynamicPolicy)(env; kwargs...)
+    throw("Not implemented")
+end
+
 """
 $TYPEDEF
 
diff --git a/src/DynamicVehicleScheduling/utils.jl b/src/DynamicVehicleScheduling/utils.jl
index 36eebd2..bd1dfe8 100644
--- a/src/DynamicVehicleScheduling/utils.jl
+++ b/src/DynamicVehicleScheduling/utils.jl
@@ -35,12 +35,3 @@ struct Point{T}
 end
 
 Base.show(io::IO, p::Point) = print(io, "($(p.x), $(p.y))")
-
-# """
-# $TYPEDSIGNATURES
-
-# Readdir all files in `data` with extension `file_extension`.
-# """
-# function filtered_readdir(data, file_extension=".txt"; kwargs...)
-#     return filter(x -> endswith(x, file_extension), readdir(data; kwargs...))
-# end

From 20e8a4ffaedb5439d57a4ef3c4336375bda7876b Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Mon, 7 Jul 2025 18:27:19 +0200
Subject: [PATCH 11/29] fix features

---
 .../algorithms/anticipative_solver.jl         |  6 +-
 .../learning/features.jl                      | 67 +++++--------------
 2 files changed, 19 insertions(+), 54 deletions(-)

diff --git a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
index 4ff78e2..d0404b2 100644
--- a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
+++ b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
@@ -139,9 +139,6 @@ function anticipative_solver(
     dataset = map(1:last_epoch) do epoch
         routes = epoch_routes[epoch]
         epoch_customers = epoch_indices[epoch]
-        # y_true = [
-        #     map(idx -> findfirst(==(idx), epoch_customers), route) for route in routes
-        # ]
 
         y_true =
             VSPSolution(
@@ -182,7 +179,8 @@ function anticipative_solver(
             current_epoch=epoch,
         )
 
-        x = compute_2D_features(state, env.instance)
+        # x = compute_2D_features(state, env.instance)
+        x = compute_features(state, env.instance)
 
         return DataSample(; instance=state, y_true, x)
     end
diff --git a/src/DynamicVehicleScheduling/learning/features.jl b/src/DynamicVehicleScheduling/learning/features.jl
index 0cb4160..c9470c3 100644
--- a/src/DynamicVehicleScheduling/learning/features.jl
+++ b/src/DynamicVehicleScheduling/learning/features.jl
@@ -1,21 +1,19 @@
-function get_features_quantileTimeToRequests(env::DVSPEnv)
+function get_features_quantileTimeToRequests(state::DVSPState, instance::Instance)
     quantiles = [i * 0.1 for i in 1:9]
-    a = env.config.static_instance.duration[
-        env.customer_index[.!env.request_is_dispatched], 2:end
-    ]
+    a = instance.static_instance.duration[state.location_indices, 2:end]
     quantileTimeToRequests = mapslices(x -> quantile(x, quantiles), a; dims=2)
     return quantileTimeToRequests
 end
 
-function compute_model_free_features(state::DVSPState; env::DVSPEnv)
-    (; instance, is_postponable) = state
+function compute_model_free_features(state::DVSPState, instance::Instance)
+    (; state_instance, is_postponable) = state
 
-    startTimes = instance.start_time
-    endTimes = startTimes .+ instance.service_time
-    timeDepotRequest = instance.duration[:, 1]
-    timeRequestDepot = instance.duration[1, :]
+    startTimes = state_instance.start_time
+    endTimes = startTimes .+ state_instance.service_time
+    timeDepotRequest = state_instance.duration[:, 1]
+    timeRequestDepot = state_instance.duration[1, :]
 
-    slack_next_epoch = startTimes .- env.config.epoch_duration
+    slack_next_epoch = startTimes .- instance.epoch_duration
 
     model_free_features = hcat(
         startTimes[is_postponable], # 1
@@ -27,49 +25,18 @@ function compute_model_free_features(state::DVSPState; env::DVSPEnv)
     return model_free_features
 end
 
-function compute_model_aware_features(state::DVSPState; env::DVSPEnv)
-    quantileTimeToRequests = get_features_quantileTimeToRequests(env)
+function compute_model_aware_features(state::DVSPState, instance::Instance)
+    quantileTimeToRequests = get_features_quantileTimeToRequests(state, instance)
     model_aware_features = quantileTimeToRequests
     return model_aware_features[state.is_postponable, :]
 end
 
-function compute_features(env::DVSPEnv)
-    state = env.state
-    model_free_features = compute_model_free_features(state; env)
-    model_aware_features = compute_model_aware_features(state; env)
+function compute_features(state::DVSPState, instance::Instance)
+    model_free_features = compute_model_free_features(state, instance)
+    model_aware_features = compute_model_aware_features(state, instance)
     return hcat(model_free_features, model_aware_features)'
 end
 
-# # ? why is this needed
-# function model_free_features_critic(state::DVSPState; env::DVSPEnv)
-#     (; instance) = state
-#     startTimes = instance.start_time
-#     endTimes = instance.service_time .+ instance.start_time
-#     timeDepotRequest = instance.duration[:, 1]
-#     timeRequestDepot = instance.duration[1, :]
-#     slack_next_epoch = startTimes .- env.config.epoch_duration
-#     model_free_features = hcat(
-#         startTimes, endTimes, timeDepotRequest, timeRequestDepot, slack_next_epoch
-#     )
-#     return model_free_features
-# end
-
-# # ?
-# function compute_critic_features(env::DVSPEnv)
-#     state = env.state
-#     model_free_features = model_free_features_critic(state; env)
-#     model_aware_features = get_features_quantileTimeToRequests(env)
-#     postpon = state.is_postponable
-#     return hcat(model_free_features, model_aware_features, postpon)'
-# end
-
-# # ?
-# function compute_critic_2D_features(env::DVSPEnv)
-#     state = env.state
-#     timeDepotRequest = state.instance.duration[:, 1]
-#     quantileTimeToRequests = get_features_meanTimeToRequests(env)
-#     postpon = state.is_postponable
-#     # time_postpon = timeDepotRequest .* postpon
-#     # quant_postpon = quantileTimeToRequests .* postpon
-#     return hcat(timeDepotRequest, quantileTimeToRequests, postpon)'
-# end
+function compute_features(env::DVSPEnv)
+    return compute_features(env.state, env.instance)
+end

From b30fe3316c356967dbfe92c855d3156712930c53 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 10 Jul 2025 18:07:27 +0200
Subject: [PATCH 12/29] update

---
 src/DynamicAssortment/DynamicAssortment.jl    |  0
 .../DynamicVehicleScheduling.jl               | 24 ++++++++++++-------
 .../algorithms/anticipative_solver.jl         | 24 +++++++++++++++++--
 .../environment/plot.jl                       |  4 ++++
 src/Utils/interface.jl                        |  4 +++-
 5 files changed, 45 insertions(+), 11 deletions(-)
 create mode 100644 src/DynamicAssortment/DynamicAssortment.jl

diff --git a/src/DynamicAssortment/DynamicAssortment.jl b/src/DynamicAssortment/DynamicAssortment.jl
new file mode 100644
index 0000000..e69de29
diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index 34476b7..d57644b 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -51,11 +51,19 @@ include("maximizer.jl")
 $TYPEDEF
 
 Abstract type for dynamic vehicle scheduling benchmarks.
+
+# Fields
+$TYPEDFIELDS
 """
 @kwdef struct DVSPBenchmark <: AbstractDynamicBenchmark
+    "todo"
     max_requests_per_epoch::Int = 10
+    "todo"
     Δ_dispatch::Float64 = 1.0
+    "todo"
     epoch_duration::Float64 = 1.0
+    "todo"
+    two_dimensional_features::Bool = false
 end
 
 function Utils.generate_dataset(b::DVSPBenchmark, dataset_size::Int=1)
@@ -74,14 +82,6 @@ function Utils.generate_dataset(b::DVSPBenchmark, dataset_size::Int=1)
     ]
 end
 
-function Utils.generate_scenario_generator(::DVSPBenchmark)
-    return generate_scenario
-end
-
-function Utils.generate_anticipative_solver(::DVSPBenchmark; kwargs...)
-    return anticipative_solver
-end
-
 function Utils.generate_environment(::DVSPBenchmark, instance::Instance; kwargs...)
     return DVSPEnv(instance; kwargs...)
 end
@@ -90,6 +90,14 @@ function Utils.generate_maximizer(::DVSPBenchmark)
     return LinearMaximizer(oracle; g, h)
 end
 
+function Utils.generate_scenario_generator(::DVSPBenchmark)
+    return generate_scenario
+end
+
+function Utils.generate_anticipative_solver(b::DVSPBenchmark; kwargs...)
+    return AnticipativeSolver(b.two_dimensional_features)
+end
+
 export DVSPBenchmark #, generate_environment # , generate_sample, generate_anticipative_solver
 export run_policy!,
     GreedyVSPPolicy, LazyVSPPolicy, KleopatraVSPPolicy, AnticipativeVSPPolicy
diff --git a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
index d0404b2..dd52d19 100644
--- a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
+++ b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
@@ -41,7 +41,11 @@ Solve the anticipative VSP problem for environment `env`.
 For this, it uses the current environment history, so make sure that the environment is terminated before calling this method.
 """
 function anticipative_solver(
-    env::DVSPEnv, scenario=env.scenario; model_builder=highs_model, reset_env=false
+    env::DVSPEnv,
+    scenario=env.scenario;
+    model_builder=highs_model,
+    reset_env=false,
+    two_dimensional_features=false,
 )
     reset_env && reset!(env)
     request_epoch = [0]
@@ -180,10 +184,26 @@ function anticipative_solver(
         )
 
         # x = compute_2D_features(state, env.instance)
-        x = compute_features(state, env.instance)
+        x = if two_dimensional_features
+            compute_2D_features(state, env.instance)
+        else
+            compute_features(state, env.instance)
+        end
 
         return DataSample(; instance=state, y_true, x)
     end
 
     return obj, dataset
 end
+
+@kwdef struct AnticipativeSolver
+    is_2D::Bool = false
+end
+
+function (solver::AnticipativeSolver)(env::DVSPEnv, scenario=env.scenario; reset_env=false)
+    if solver.is_2D
+        return anticipative_solver(env, scenario; model_builder=highs_model_2d, reset_env)
+    else
+        return anticipative_solver(env, scenario; model_builder=highs_model, reset_env)
+    end
+end
diff --git a/src/DynamicVehicleScheduling/environment/plot.jl b/src/DynamicVehicleScheduling/environment/plot.jl
index 53831e8..adb0fa6 100644
--- a/src/DynamicVehicleScheduling/environment/plot.jl
+++ b/src/DynamicVehicleScheduling/environment/plot.jl
@@ -1,3 +1,7 @@
+function plot_instance(env::DVSPEnv; kwargs...)
+    return plot_instance(env.instance.static_instance; kwargs...)
+end
+
 # """
 # $TYPEDSIGNATURES
 
diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl
index 3b70f9e..ce9b6ed 100644
--- a/src/Utils/interface.jl
+++ b/src/Utils/interface.jl
@@ -189,9 +189,11 @@ It follows the same interface as [`AbstractBenchmark`](@ref), with the addition
 """
 abstract type AbstractStochasticBenchmark <: AbstractBenchmark end
 
+function generate_scenario end
+
 # only works for exogenous noise
 """
-    generate_scenario(::AbstractStochasticBenchmark; kwargs...)
+    generate_scenario_generator(::AbstractStochasticBenchmark; kwargs...)
 """
 function generate_scenario_generator end
 

From 0c7e20add763cfd0f259729dc091d3be3558aed7 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Fri, 11 Jul 2025 17:30:38 +0200
Subject: [PATCH 13/29] first version of DynamicAssortmentBenchmark

---
 Project.toml                               |   2 +
 src/DecisionFocusedLearningBenchmarks.jl   |   3 +
 src/DynamicAssortment/DynamicAssortment.jl |  36 +++
 src/DynamicAssortment/environment.jl       | 271 +++++++++++++++++++++
 4 files changed, 312 insertions(+)
 create mode 100644 src/DynamicAssortment/environment.jl

diff --git a/Project.toml b/Project.toml
index 03e2d27..68a66e4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,6 +4,7 @@ authors = ["Members of JuliaDecisionFocusedLearning"]
 version = "0.2.2"
 
 [deps]
+Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
 CommonRLInterface = "d842c3ba-07a1-494f-bbec-f5741b0a3e98"
 ConstrainedShortestPaths = "b3798467-87dc-4d99-943d-35a1bd39e395"
 DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe"
@@ -32,6 +33,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [compat]
+Combinatorics = "1.0.3"
 CommonRLInterface = "0.3.3"
 ConstrainedShortestPaths = "0.6.0"
 DataDeps = "0.7"
diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index dfd0a42..4199b49 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -55,6 +55,7 @@ include("FixedSizeShortestPath/FixedSizeShortestPath.jl")
 include("PortfolioOptimization/PortfolioOptimization.jl")
 include("StochasticVehicleScheduling/StochasticVehicleScheduling.jl")
 include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl")
+include("DynamicAssortment/DynamicAssortment.jl")
 
 using .Utils
 using .Argmax
@@ -65,6 +66,7 @@ using .FixedSizeShortestPath
 using .PortfolioOptimization
 using .StochasticVehicleScheduling
 using .DynamicVehicleScheduling
+using .DynamicAssortment
 
 # Interface
 export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample
@@ -87,5 +89,6 @@ export FixedSizeShortestPathBenchmark
 export PortfolioOptimizationBenchmark
 export StochasticVehicleSchedulingBenchmark
 export DVSPBenchmark
+export DynamicAssortmentBenchmark
 
 end # module DecisionFocusedLearningBenchmarks
diff --git a/src/DynamicAssortment/DynamicAssortment.jl b/src/DynamicAssortment/DynamicAssortment.jl
index e69de29..fd6c80e 100644
--- a/src/DynamicAssortment/DynamicAssortment.jl
+++ b/src/DynamicAssortment/DynamicAssortment.jl
@@ -0,0 +1,36 @@
+module DynamicAssortment
+
+using ..Utils
+
+using CommonRLInterface: CommonRLInterface, AbstractEnv
+using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
+using Distributions: Uniform, Categorical
+using LinearAlgebra: dot
+using Random: Random, AbstractRNG, MersenneTwister
+using Statistics: mean
+
+using Flux: Chain, Dense
+# using Flux.Optimise
+# using InferOpt
+# using Random
+# using JLD2
+# using Plots
+# using Distributions
+# using LinearAlgebra
+using Combinatorics: combinations
+
+include("environment.jl")
+
+struct DynamicAssortmentBenchmark <: AbstractDynamicBenchmark end
+
+function Utils.generate_sample(::DynamicAssortmentBenchmark)
+    return DataSample(; instance=Instance())
+end
+
+function Utils.generate_maximizer(::DynamicAssortmentBenchmark)
+    return DAP_optimization
+end
+
+export DynamicAssortmentBenchmark
+
+end
diff --git a/src/DynamicAssortment/environment.jl b/src/DynamicAssortment/environment.jl
new file mode 100644
index 0000000..9272a8f
--- /dev/null
+++ b/src/DynamicAssortment/environment.jl
@@ -0,0 +1,271 @@
+"""
+$TYPEDEF
+
+Feature 1:d Random static feature
+Feature 3: Hype
+Feature 4: Satisfaction
+Feature 5: Price
+
+# Fields
+$TYPEDFIELDS
+"""
+@kwdef struct Instance{M}
+    "customer choice model"
+    customer_choice_model::M = Chain(Dense([0.3 0.5 0.6 -0.4 -0.8 0.0]), vec)
+    "number of items"
+    N::Int = 20
+    "dimension of feature vectors (in addition to hype, satisfaction, and price)"
+    d::Int = 2
+    "assortment size constraint"
+    K::Int = 4
+    "number of steps per episode"
+    max_steps::Int = 80
+    "flags if the environment is endogenous"
+    endogenous::Bool = true
+end
+
+@kwdef mutable struct Environment{R<:AbstractRNG} <: AbstractEnv
+    "associated instance"
+    instance::Instance
+    "current step"
+    step::Int
+    "purchase history"
+    purchase_hist::Vector{Int}
+    "rng"
+    rng::R
+    "seed for RNG"
+    seed::Int
+    "customer utility for each item"
+    utility::Vector{Float64}
+    "prices for each item"
+    prices::Vector{Float64}
+    "current full features"
+    features::Matrix{Float64}
+    "starting satisfaction + hype features"
+    start_features::Matrix{Float64}
+    "satisfaction + hype feature change from the last step"
+    d_features::Matrix{Float64}
+end
+
+function Environment(
+    instance::Instance; seed::Int=0, rng::AbstractRNG=MersenneTwister(seed)
+)
+    return Environment(;
+        instance=instance,
+        step=1,
+        purchase_hist=Int[],
+        rng=rng,
+        seed=seed,
+        utility=zeros(instance.N),
+        prices=zeros(instance.N + 1),
+        features=zeros(instance.d + 4, instance.N),
+        start_features=zeros(2, instance.N),
+        d_features=zeros(2, instance.N),
+    )
+end
+
+## Basic operations of environment
+
+# Reset the environment
+function CommonRLInterface.reset!(env::Environment; reset_seed=false, seed=env.seed)
+    env.seed = seed
+    if reset_seed
+        Random.seed!(env.rng, env.seed)
+    end
+    (; d, N, customer_choice_model) = env.instance
+    features = rand(env.rng, Uniform(1.0, 10.0), (d + 3, N))
+    env.prices = vcat(features[end, :], 0.0)
+    features = vcat(features, ones(1, N))
+    env.d_features .= 0.0
+    env.step = 1
+    env.utility .= customer_choice_model(features)
+    env.features .= features
+    env.start_features .= features[(d + 1):(d + 2), :]
+    env.purchase_hist = Int[]
+    return nothing
+end
+
+# Update the hype vector
+function hype_update!(env::Environment)
+    hype_vector = ones(env.instance.N)
+    env.purchase_hist[end] != 0 ? hype_vector[env.purchase_hist[end]] += 0.02 : nothing
+    if length(env.purchase_hist) >= 2
+        if env.purchase_hist[end - 1] != 0
+            hype_vector[env.purchase_hist[end - 1]] -= 0.005
+        else
+            nothing
+        end
+        if length(env.purchase_hist) >= 3
+            if env.purchase_hist[end - 2] != 0
+                hype_vector[env.purchase_hist[end - 2]] -= 0.005
+            else
+                nothing
+            end
+            if length(env.purchase_hist) >= 4
+                if env.purchase_hist[end - 3] != 0
+                    hype_vector[env.purchase_hist[end - 3]] -= 0.005
+                else
+                    nothing
+                end
+                if length(env.purchase_hist) >= 5
+                    if env.purchase_hist[end - 4] != 0
+                        hype_vector[env.purchase_hist[end - 4]] -= 0.005
+                    else
+                        nothing
+                    end
+                end
+            end
+        end
+    end
+    return hype_vector
+end
+
+# Step function
+function step!(env::Environment, item)
+    old_features = copy(env.features)
+    push!(env.purchase_hist, item)
+    if env.instance.endogenous
+        hype_vector = hype_update!(env)
+        env.features[3, :] .*= hype_vector
+        item != 0 ? env.features[4, item] *= 1.01 : nothing
+        env.features[6, :] .+= 9 / env.instance.max_steps # ??
+    end
+    env.d_features = env.features[3:4, :] - old_features[3:4, :] # ! hardcoded everywhere :(
+    env.step += 1
+    return nothing
+end
+
+# Choice probabilities
+function choice_probabilities(env::Environment, S)
+    θ = env.utility
+    exp_values = [exp(θ[i]) * S[i] for i in 1:(env.instance.N)]
+    denominator = 1 + sum(exp_values)
+    probs = [exp_values[i] / denominator for i in 1:(env.instance.N)]
+    push!(probs, 1 / denominator) # Probability of no purchase
+    return probs
+end
+
+# Purchase decision
+function purchase!(env::Environment, S)
+    r = env.prices
+    probs = choice_probabilities(env, S)
+    item = rand(env.rng, Categorical(probs))
+    item == env.instance.N + 1 ? item = 0 : item  # TODO: cleanup this, not really needed and confusing
+    item != 0 ? revenue = r[item] : revenue = 0.0
+    return item, revenue
+end
+
+# enumerate all possible assortments of size K and return the best one
+# ? can't we do better than that, probably
+function expert_solution(env::Environment)
+    r = env.prices
+    local best_S
+    best_revenue = 0.0
+    for S in combinations(1:(env.instance.N), env.instance.K)
+        S_vec = zeros(env.instance.N)
+        S_vec[S] .= 1.0
+        probs = choice_probabilities(env, S_vec)
+        expected_revenue = dot(probs, r)
+        if expected_revenue > best_revenue
+            best_S, best_revenue = S_vec, expected_revenue
+        end
+    end
+    return best_S
+end
+
+# DAP CO-layer
+function DAP_optimization(θ; instance::Instance)
+    solution = partialsortperm(θ, 1:(instance.K); rev=true) # It never makes sense not to show k items
+    S = zeros(instance.N)
+    S[solution] .= 1
+    return S
+end
+
+## Solution functions
+
+# Anticipative (fixed)
+function expert_policy(env::Environment, episodes; first_seed=1, use_oracle=false)
+    dataset = []
+    rev_global = Float64[]
+    for i in 1:episodes
+        rev_episode = 0.0
+        CommonRLInterface.reset!(env; seed=first_seed - 1 + i, reset_seed=true)
+        done = false
+        training_instances = []
+        while !done
+            S = expert_solution(env)
+
+            delta_features = env.features[3:4, :] .- env.start_features  # ! hardcoded
+            feature_vector = vcat(env.features, env.d_features, delta_features)
+            push!(training_instances, (features=feature_vector, S_t=S))
+
+            item, revenue = purchase!(env, S)
+            rev_episode += revenue
+            step!(env, item)
+
+            env.step > env.instance.max_steps ? done = true : done = false
+        end
+        push!(rev_global, rev_episode)
+        push!(dataset, training_instances)
+    end
+    return mean(rev_global), rev_global, dataset
+end
+
+# Greedy heuristic
+function model_greedy(features)
+    model = Chain(Dense([0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0]), vec)
+    return model(features)
+end
+
+# Random heuristic
+function model_random(features)
+    rand_seed = Int(round(sum(features)))
+    return rand(MersenneTwister(rand_seed), Uniform(0.0, 1.0), size(features)[2])
+end
+
+# Episode generation
+function generate_episode(env::Environment, model, customer_model, sigma, random_seed)
+    buffer = []
+    start_features, d_features = reset!(env; seed=random_seed)
+    features = copy(start_features)
+    done = false
+    while !done
+        delta_features = features[3:4, :] .- start_features[3:4, :]
+        r = features[5, :]
+        feature_vector = vcat(features, d_features, delta_features)
+        θ = model(feature_vector)
+        η = rand(MersenneTwister(random_seed * env.step), p(θ, sigma), 1)[:, 1]
+        S = DAP_optimization(η; instance=env.instance)
+        θ_0 = customer_model(features)
+        item, revenue = purchase!(env, S)
+        features, d_features = step!(env, features, item)
+        feat_next = vcat(features, d_features, features[3:4, :] .- start_features[3:4, :])
+        push!(
+            buffer,
+            (
+                t=env.step - 1,
+                feat_t=feature_vector,
+                theta=θ,
+                eta=η,
+                S_t=S,
+                a_T=item,
+                rev_t=revenue,
+                ret_t=0.0,
+                feat_next=feat_next,
+            ),
+        )
+        count(!iszero, inventory) < env.instance.K ? break : nothing
+        env.step > env.instance.max_steps ? done = true : done = false
+    end
+    for i in (length(buffer) - 1):-1:1
+        if i == length(buffer) - 1
+            ret = buffer[i].rev_t
+        else
+            ret = buffer[i].rev_t + 0.99 * buffer[i + 1].ret_t
+        end
+        traj = buffer[i]
+        traj_updated = (; traj..., ret_t=ret)
+        buffer[i] = traj_updated
+    end
+    return buffer
+end

From e8057cf35af6cf027de04ade458897e80ed633f6 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Tue, 15 Jul 2025 08:02:04 +0200
Subject: [PATCH 14/29] wip

---
 src/DynamicAssortment/environment.jl | 111 ++++++++++++++-------------
 1 file changed, 56 insertions(+), 55 deletions(-)

diff --git a/src/DynamicAssortment/environment.jl b/src/DynamicAssortment/environment.jl
index 9272a8f..26e900c 100644
--- a/src/DynamicAssortment/environment.jl
+++ b/src/DynamicAssortment/environment.jl
@@ -11,7 +11,7 @@ $TYPEDFIELDS
 """
 @kwdef struct Instance{M}
     "customer choice model"
-    customer_choice_model::M = Chain(Dense([0.3 0.5 0.6 -0.4 -0.8 0.0]), vec)
+    customer_choice_model::M = Chain(Dense([0.3 0.5 0.6 -0.4 -0.8]), vec)
     "number of items"
     N::Int = 20
     "dimension of feature vectors (in addition to hype, satisfaction, and price)"
@@ -22,6 +22,7 @@ $TYPEDFIELDS
     max_steps::Int = 80
     "flags if the environment is endogenous"
     endogenous::Bool = true
+    # start_features?
 end
 
 @kwdef mutable struct Environment{R<:AbstractRNG} <: AbstractEnv
@@ -58,7 +59,7 @@ function Environment(
         seed=seed,
         utility=zeros(instance.N),
         prices=zeros(instance.N + 1),
-        features=zeros(instance.d + 4, instance.N),
+        features=zeros(instance.d + 3, instance.N),
         start_features=zeros(2, instance.N),
         d_features=zeros(2, instance.N),
     )
@@ -75,7 +76,7 @@ function CommonRLInterface.reset!(env::Environment; reset_seed=false, seed=env.s
     (; d, N, customer_choice_model) = env.instance
     features = rand(env.rng, Uniform(1.0, 10.0), (d + 3, N))
     env.prices = vcat(features[end, :], 0.0)
-    features = vcat(features, ones(1, N))
+    # features = vcat(features, ones(1, N)) # TODO
     env.d_features .= 0.0
     env.step = 1
     env.utility .= customer_choice_model(features)
@@ -128,7 +129,7 @@ function step!(env::Environment, item)
         hype_vector = hype_update!(env)
         env.features[3, :] .*= hype_vector
         item != 0 ? env.features[4, item] *= 1.01 : nothing
-        env.features[6, :] .+= 9 / env.instance.max_steps # ??
+        # env.features[6, :] .+= 9 / env.instance.max_steps # ??
     end
     env.d_features = env.features[3:4, :] - old_features[3:4, :] # ! hardcoded everywhere :(
     env.step += 1
@@ -146,13 +147,14 @@ function choice_probabilities(env::Environment, S)
 end
 
 # Purchase decision
-function purchase!(env::Environment, S)
+function CommonRLInterface.act!(env::Environment, S)
     r = env.prices
     probs = choice_probabilities(env, S)
     item = rand(env.rng, Categorical(probs))
+    reward = r[item]
     item == env.instance.N + 1 ? item = 0 : item  # TODO: cleanup this, not really needed and confusing
-    item != 0 ? revenue = r[item] : revenue = 0.0
-    return item, revenue
+    step!(env, item)
+    return reward
 end
 
 # enumerate all possible assortments of size K and return the best one
@@ -199,9 +201,8 @@ function expert_policy(env::Environment, episodes; first_seed=1, use_oracle=fals
             feature_vector = vcat(env.features, env.d_features, delta_features)
             push!(training_instances, (features=feature_vector, S_t=S))
 
-            item, revenue = purchase!(env, S)
-            rev_episode += revenue
-            step!(env, item)
+            reward = CommonRLInterface.act!(env, S)
+            rev_episode += reward
 
             env.step > env.instance.max_steps ? done = true : done = false
         end
@@ -224,48 +225,48 @@ function model_random(features)
 end
 
 # Episode generation
-function generate_episode(env::Environment, model, customer_model, sigma, random_seed)
-    buffer = []
-    start_features, d_features = reset!(env; seed=random_seed)
-    features = copy(start_features)
-    done = false
-    while !done
-        delta_features = features[3:4, :] .- start_features[3:4, :]
-        r = features[5, :]
-        feature_vector = vcat(features, d_features, delta_features)
-        θ = model(feature_vector)
-        η = rand(MersenneTwister(random_seed * env.step), p(θ, sigma), 1)[:, 1]
-        S = DAP_optimization(η; instance=env.instance)
-        θ_0 = customer_model(features)
-        item, revenue = purchase!(env, S)
-        features, d_features = step!(env, features, item)
-        feat_next = vcat(features, d_features, features[3:4, :] .- start_features[3:4, :])
-        push!(
-            buffer,
-            (
-                t=env.step - 1,
-                feat_t=feature_vector,
-                theta=θ,
-                eta=η,
-                S_t=S,
-                a_T=item,
-                rev_t=revenue,
-                ret_t=0.0,
-                feat_next=feat_next,
-            ),
-        )
-        count(!iszero, inventory) < env.instance.K ? break : nothing
-        env.step > env.instance.max_steps ? done = true : done = false
-    end
-    for i in (length(buffer) - 1):-1:1
-        if i == length(buffer) - 1
-            ret = buffer[i].rev_t
-        else
-            ret = buffer[i].rev_t + 0.99 * buffer[i + 1].ret_t
-        end
-        traj = buffer[i]
-        traj_updated = (; traj..., ret_t=ret)
-        buffer[i] = traj_updated
-    end
-    return buffer
-end
+# function generate_episode(env::Environment, model, customer_model, sigma, random_seed)
+#     buffer = []
+#     start_features, d_features = reset!(env; seed=random_seed)
+#     features = copy(start_features)
+#     done = false
+#     while !done
+#         delta_features = features[3:4, :] .- start_features[3:4, :]
+#         r = features[5, :]
+#         feature_vector = vcat(features, d_features, delta_features)
+#         θ = model(feature_vector)
+#         η = rand(MersenneTwister(random_seed * env.step), p(θ, sigma), 1)[:, 1]
+#         S = DAP_optimization(η; instance=env.instance)
+#         θ_0 = customer_model(features)
+#         item, revenue = purchase!(env, S)
+#         features, d_features = step!(env, item)
+#         feat_next = vcat(features, d_features, features[3:4, :] .- start_features[3:4, :])
+#         push!(
+#             buffer,
+#             (
+#                 t=env.step - 1,
+#                 feat_t=feature_vector,
+#                 theta=θ,
+#                 eta=η,
+#                 S_t=S,
+#                 a_T=item,
+#                 rev_t=revenue,
+#                 ret_t=0.0,
+#                 feat_next=feat_next,
+#             ),
+#         )
+#         count(!iszero, inventory) < env.instance.K ? break : nothing
+#         env.step > env.instance.max_steps ? done = true : done = false
+#     end
+#     for i in (length(buffer) - 1):-1:1
+#         if i == length(buffer) - 1
+#             ret = buffer[i].rev_t
+#         else
+#             ret = buffer[i].rev_t + 0.99 * buffer[i + 1].ret_t
+#         end
+#         traj = buffer[i]
+#         traj_updated = (; traj..., ret_t=ret)
+#         buffer[i] = traj_updated
+#     end
+#     return buffer
+# end

From 5f29047b187534fc0aa35912647e798545be1f19 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Wed, 6 Aug 2025 15:41:53 +0200
Subject: [PATCH 15/29] Advance dynamic assortment

---
 src/DecisionFocusedLearningBenchmarks.jl      |   3 +-
 src/DynamicAssortment/DynamicAssortment.jl    |  68 +++-
 src/DynamicAssortment/environment.jl          | 306 +++++++-----------
 src/DynamicAssortment/instance.jl             |  33 ++
 .../DynamicVehicleScheduling.jl               |   2 +-
 src/Utils/Utils.jl                            |   4 +
 src/Utils/interface.jl                        |  17 +-
 src/Utils/maximizers.jl                       |  22 ++
 8 files changed, 250 insertions(+), 205 deletions(-)
 create mode 100644 src/DynamicAssortment/instance.jl
 create mode 100644 src/Utils/maximizers.jl

diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index 4199b49..14fd2af 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -71,10 +71,11 @@ using .DynamicAssortment
 # Interface
 export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample
 
-export generate_sample, generate_dataset, generate_environments
+export generate_sample, generate_dataset, generate_environments, generate_environment
 export generate_scenario_generator, generate_anticipative_solver
 export generate_statistical_model
 export generate_maximizer, maximizer_kwargs
+export is_exogenous, is_endogenous
 
 export objective_value
 export plot_data, plot_instance, plot_solution
diff --git a/src/DynamicAssortment/DynamicAssortment.jl b/src/DynamicAssortment/DynamicAssortment.jl
index fd6c80e..f5abbbe 100644
--- a/src/DynamicAssortment/DynamicAssortment.jl
+++ b/src/DynamicAssortment/DynamicAssortment.jl
@@ -10,25 +10,69 @@ using Random: Random, AbstractRNG, MersenneTwister
 using Statistics: mean
 
 using Flux: Chain, Dense
-# using Flux.Optimise
-# using InferOpt
-# using Random
-# using JLD2
-# using Plots
-# using Distributions
-# using LinearAlgebra
 using Combinatorics: combinations
 
+"""
+$TYPEDEF
+
+Benchmark for the dynamic assortment problem.
+
+# Fields
+$TYPEDFIELDS
+"""
+struct DynamicAssortmentBenchmark{exogenous,M} <: AbstractDynamicBenchmark{exogenous}
+    "customer choice model (price, hype, saturation, and features)"
+    customer_choice_model::M
+    "number of items"
+    N::Int
+    "dimension of feature vectors (in addition to hype, satisfaction, and price)"
+    d::Int
+    "assortment size constraint"
+    K::Int
+    "number of steps per episode"
+    max_steps::Int
+end
+
+function DynamicAssortmentBenchmark(;
+    N=20,
+    d=2,
+    K=4,
+    max_steps=80,
+    customer_choice_model=Chain(Dense([-0.8 0.6 -0.4 0.3 0.5]), vec),
+    exogenous=false,
+)
+    return DynamicAssortmentBenchmark{exogenous,typeof(customer_choice_model)}(
+        customer_choice_model, N, d, K, max_steps
+    )
+end
+
+include("instance.jl")
 include("environment.jl")
 
-struct DynamicAssortmentBenchmark <: AbstractDynamicBenchmark end
+customer_choice_model(b::DynamicAssortmentBenchmark) = b.customer_choice_model
+item_count(b::DynamicAssortmentBenchmark) = b.N
+feature_count(b::DynamicAssortmentBenchmark) = b.d
+assortment_size(b::DynamicAssortmentBenchmark) = b.K
+max_steps(b::DynamicAssortmentBenchmark) = b.max_steps
+
+function Utils.generate_sample(
+    b::DynamicAssortmentBenchmark, rng::AbstractRNG=MersenneTwister(0); seed=nothing
+)
+    Random.seed!(rng, seed)
+    return DataSample(; instance=Instance(b, rng))
+end
 
-function Utils.generate_sample(::DynamicAssortmentBenchmark)
-    return DataSample(; instance=Instance())
+function Utils.generate_maximizer(b::DynamicAssortmentBenchmark)
+    return TopKMaximizer(assortment_size(b))
 end
 
-function Utils.generate_maximizer(::DynamicAssortmentBenchmark)
-    return DAP_optimization
+function Utils.generate_environment(
+    ::DynamicAssortmentBenchmark,
+    instance::Instance;
+    seed=nothing,
+    rng::AbstractRNG=MersenneTwister(seed),
+)
+    return Environment(instance; seed=seed, rng=rng)
 end
 
 export DynamicAssortmentBenchmark
diff --git a/src/DynamicAssortment/environment.jl b/src/DynamicAssortment/environment.jl
index 26e900c..f8617c0 100644
--- a/src/DynamicAssortment/environment.jl
+++ b/src/DynamicAssortment/environment.jl
@@ -1,272 +1,210 @@
 """
 $TYPEDEF
 
-Feature 1:d Random static feature
-Feature 3: Hype
-Feature 4: Satisfaction
-Feature 5: Price
+Environment for the dynamic assortment problem.
 
 # Fields
 $TYPEDFIELDS
 """
-@kwdef struct Instance{M}
-    "customer choice model"
-    customer_choice_model::M = Chain(Dense([0.3 0.5 0.6 -0.4 -0.8]), vec)
-    "number of items"
-    N::Int = 20
-    "dimension of feature vectors (in addition to hype, satisfaction, and price)"
-    d::Int = 2
-    "assortment size constraint"
-    K::Int = 4
-    "number of steps per episode"
-    max_steps::Int = 80
-    "flags if the environment is endogenous"
-    endogenous::Bool = true
-    # start_features?
-end
-
-@kwdef mutable struct Environment{R<:AbstractRNG} <: AbstractEnv
+@kwdef mutable struct Environment{I<:Instance,R<:AbstractRNG,S<:Union{Nothing,Int}} <:
+                      AbstractEnv
     "associated instance"
-    instance::Instance
+    instance::I
     "current step"
     step::Int
-    "purchase history"
+    "purchase history (used to update hype feature)"
     purchase_hist::Vector{Int}
     "rng"
     rng::R
     "seed for RNG"
-    seed::Int
+    seed::S
     "customer utility for each item"
     utility::Vector{Float64}
-    "prices for each item"
-    prices::Vector{Float64}
     "current full features"
     features::Matrix{Float64}
-    "starting satisfaction + hype features"
-    start_features::Matrix{Float64}
     "satisfaction + hype feature change from the last step"
     d_features::Matrix{Float64}
 end
 
-function Environment(
-    instance::Instance; seed::Int=0, rng::AbstractRNG=MersenneTwister(seed)
-)
-    return Environment(;
-        instance=instance,
+function Environment(instance::Instance; seed=0, rng::AbstractRNG=MersenneTwister(seed))
+    N = item_count(instance)
+    (; prices, features, starting_hype_and_saturation) = instance
+    full_features = vcat(
+        reshape(prices[1:(end - 1)], 1, :), starting_hype_and_saturation, features
+    )
+    model = customer_choice_model(instance)
+    env = Environment(;
+        instance,
         step=1,
         purchase_hist=Int[],
         rng=rng,
         seed=seed,
-        utility=zeros(instance.N),
-        prices=zeros(instance.N + 1),
-        features=zeros(instance.d + 3, instance.N),
-        start_features=zeros(2, instance.N),
-        d_features=zeros(2, instance.N),
+        utility=model(full_features),
+        features=full_features,
+        d_features=zeros(2, N),
     )
+    CommonRLInterface.reset!(env; reset_seed=true)
+    return env
 end
 
+customer_choice_model(b::Environment) = customer_choice_model(b.instance)
+item_count(b::Environment) = item_count(b.instance)
+feature_count(b::Environment) = feature_count(b.instance)
+assortment_size(b::Environment) = assortment_size(b.instance)
+max_steps(b::Environment) = max_steps(b.instance)
+prices(b::Environment) = b.instance.prices
+# features(b::Environment) = b.instance.features
+# starting_hype_and_saturation(b::Environment) = b.instance.starting_hype_and_saturation
+
 ## Basic operations of environment
 
 # Reset the environment
 function CommonRLInterface.reset!(env::Environment; reset_seed=false, seed=env.seed)
-    env.seed = seed
-    if reset_seed
-        Random.seed!(env.rng, env.seed)
-    end
-    (; d, N, customer_choice_model) = env.instance
-    features = rand(env.rng, Uniform(1.0, 10.0), (d + 3, N))
-    env.prices = vcat(features[end, :], 0.0)
-    # features = vcat(features, ones(1, N)) # TODO
-    env.d_features .= 0.0
+    reset_seed && Random.seed!(env.rng, seed)
+
     env.step = 1
-    env.utility .= customer_choice_model(features)
+
+    (; prices, starting_hype_and_saturation, features) = env.instance
+    features = vcat(
+        reshape(prices[1:(end - 1)], 1, :), starting_hype_and_saturation, features
+    )
     env.features .= features
-    env.start_features .= features[(d + 1):(d + 2), :]
-    env.purchase_hist = Int[]
+
+    env.d_features .= 0.0
+
+    model = customer_choice_model(env)
+    env.utility .= model(features)
+
+    empty!(env.purchase_hist)
     return nothing
 end
 
-# Update the hype vector
-function hype_update!(env::Environment)
-    hype_vector = ones(env.instance.N)
-    env.purchase_hist[end] != 0 ? hype_vector[env.purchase_hist[end]] += 0.02 : nothing
-    if length(env.purchase_hist) >= 2
-        if env.purchase_hist[end - 1] != 0
-            hype_vector[env.purchase_hist[end - 1]] -= 0.005
-        else
-            nothing
-        end
-        if length(env.purchase_hist) >= 3
-            if env.purchase_hist[end - 2] != 0
-                hype_vector[env.purchase_hist[end - 2]] -= 0.005
-            else
-                nothing
-            end
-            if length(env.purchase_hist) >= 4
-                if env.purchase_hist[end - 3] != 0
-                    hype_vector[env.purchase_hist[end - 3]] -= 0.005
-                else
-                    nothing
-                end
-                if length(env.purchase_hist) >= 5
-                    if env.purchase_hist[end - 4] != 0
-                        hype_vector[env.purchase_hist[end - 4]] -= 0.005
-                    else
-                        nothing
-                    end
-                end
+function CommonRLInterface.terminated(env::Environment)
+    return env.step > max_steps(env)
+end
+
+function CommonRLInterface.observe(env::Environment)
+    delta_features = env.features[2:3, :] .- env.instance.starting_hype_and_saturation
+    return vcat(
+        env.features,
+        env.d_features,
+        delta_features,
+        ones(1, item_count(env)) .* (env.step / max_steps(env) * 10),
+    ) #./ 10
+end
+
+# Compute the hype vector
+function hype_update(env::Environment)
+    N = item_count(env)
+    hype_vector = ones(N)
+    hist = env.purchase_hist
+
+    # Define decay factors for each time step
+    factors = [0.02, -0.005, -0.005, -0.005, -0.005]
+
+    # Apply updates for the last 5 purchases
+    for (i, factor) in enumerate(factors)
+        if length(hist) >= i
+            item = hist[end - i + 1]
+            if item <= N
+                hype_vector[item] += factor
             end
         end
     end
+
     return hype_vector
 end
 
 # Step function
-function step!(env::Environment, item)
-    old_features = copy(env.features)
+function buy_item!(env::Environment, item::Int)
     push!(env.purchase_hist, item)
-    if env.instance.endogenous
-        hype_vector = hype_update!(env)
-        env.features[3, :] .*= hype_vector
-        item != 0 ? env.features[4, item] *= 1.01 : nothing
-        # env.features[6, :] .+= 9 / env.instance.max_steps # ??
-    end
-    env.d_features = env.features[3:4, :] - old_features[3:4, :] # ! hardcoded everywhere :(
     env.step += 1
+
+    if is_endogenous(env.instance.config)
+        old_features = copy(env.features[2:3, :])
+        # update hype feature
+        hype_vector = hype_update(env)
+        env.features[2, :] .*= hype_vector
+
+        # update saturation feature
+        if item <= item_count(env)
+            env.features[3, item] *= 1.01
+        end
+
+        env.utility .= customer_choice_model(env)(env.features)
+        env.d_features = env.features[2:3, :] - old_features
+    end
     return nothing
 end
 
 # Choice probabilities
 function choice_probabilities(env::Environment, S)
+    N = item_count(env)
     θ = env.utility
-    exp_values = [exp(θ[i]) * S[i] for i in 1:(env.instance.N)]
-    denominator = 1 + sum(exp_values)
-    probs = [exp_values[i] / denominator for i in 1:(env.instance.N)]
-    push!(probs, 1 / denominator) # Probability of no purchase
+    exp_values = [exp(θ[i]) * S[i] for i in 1:N]
+    push!(exp_values, 1.0) # No purchase action
+    denominator = sum(exp_values)
+    probs = exp_values ./ denominator
     return probs
 end
 
 # Purchase decision
 function CommonRLInterface.act!(env::Environment, S)
-    r = env.prices
+    r = prices(env)
     probs = choice_probabilities(env, S)
     item = rand(env.rng, Categorical(probs))
     reward = r[item]
-    item == env.instance.N + 1 ? item = 0 : item  # TODO: cleanup this, not really needed and confusing
-    step!(env, item)
+    buy_item!(env, item)
     return reward
 end
 
+## Solution functions
 # enumerate all possible assortments of size K and return the best one
-# ? can't we do better than that, probably
+function compute_expected_revenue(env::Environment, S)
+    r = prices(env)
+    probs = choice_probabilities(env, S)
+    expected_revenue = dot(probs, r)
+    return expected_revenue
+end
+
 function expert_solution(env::Environment)
-    r = env.prices
-    local best_S
-    best_revenue = 0.0
-    for S in combinations(1:(env.instance.N), env.instance.K)
-        S_vec = zeros(env.instance.N)
-        S_vec[S] .= 1.0
-        probs = choice_probabilities(env, S_vec)
-        expected_revenue = dot(probs, r)
+    N = item_count(env)
+    K = assortment_size(env)
+    best_S = falses(N)
+    best_revenue = -1.0
+    S_vec = falses(N)
+    for S in combinations(1:N, K)
+        S_vec .= false
+        S_vec[S] .= true
+        expected_revenue = compute_expected_revenue(env, S_vec)
         if expected_revenue > best_revenue
-            best_S, best_revenue = S_vec, expected_revenue
+            best_S, best_revenue = copy(S_vec), expected_revenue
         end
     end
     return best_S
 end
 
-# DAP CO-layer
-function DAP_optimization(θ; instance::Instance)
-    solution = partialsortperm(θ, 1:(instance.K); rev=true) # It never makes sense not to show k items
-    S = zeros(instance.N)
-    S[solution] .= 1
-    return S
+function greedy_solution(env::Environment)
+    maximizer = generate_maximizer(env.instance.config)
+    return maximizer(prices(env))
 end
 
-## Solution functions
-
-# Anticipative (fixed)
-function expert_policy(env::Environment, episodes; first_seed=1, use_oracle=false)
+function run_policy(env::Environment, episodes::Int; first_seed=1, policy=expert_solution)
     dataset = []
     rev_global = Float64[]
     for i in 1:episodes
         rev_episode = 0.0
         CommonRLInterface.reset!(env; seed=first_seed - 1 + i, reset_seed=true)
-        done = false
         training_instances = []
-        while !done
-            S = expert_solution(env)
-
-            delta_features = env.features[3:4, :] .- env.start_features  # ! hardcoded
-            feature_vector = vcat(env.features, env.d_features, delta_features)
-            push!(training_instances, (features=feature_vector, S_t=S))
-
+        while !CommonRLInterface.terminated(env)
+            S = policy(env)
+            features = CommonRLInterface.observe(env)
+            push!(training_instances, DataSample(; x=features, y_true=S))
             reward = CommonRLInterface.act!(env, S)
             rev_episode += reward
-
-            env.step > env.instance.max_steps ? done = true : done = false
         end
         push!(rev_global, rev_episode)
         push!(dataset, training_instances)
     end
     return mean(rev_global), rev_global, dataset
 end
-
-# Greedy heuristic
-function model_greedy(features)
-    model = Chain(Dense([0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0]), vec)
-    return model(features)
-end
-
-# Random heuristic
-function model_random(features)
-    rand_seed = Int(round(sum(features)))
-    return rand(MersenneTwister(rand_seed), Uniform(0.0, 1.0), size(features)[2])
-end
-
-# Episode generation
-# function generate_episode(env::Environment, model, customer_model, sigma, random_seed)
-#     buffer = []
-#     start_features, d_features = reset!(env; seed=random_seed)
-#     features = copy(start_features)
-#     done = false
-#     while !done
-#         delta_features = features[3:4, :] .- start_features[3:4, :]
-#         r = features[5, :]
-#         feature_vector = vcat(features, d_features, delta_features)
-#         θ = model(feature_vector)
-#         η = rand(MersenneTwister(random_seed * env.step), p(θ, sigma), 1)[:, 1]
-#         S = DAP_optimization(η; instance=env.instance)
-#         θ_0 = customer_model(features)
-#         item, revenue = purchase!(env, S)
-#         features, d_features = step!(env, item)
-#         feat_next = vcat(features, d_features, features[3:4, :] .- start_features[3:4, :])
-#         push!(
-#             buffer,
-#             (
-#                 t=env.step - 1,
-#                 feat_t=feature_vector,
-#                 theta=θ,
-#                 eta=η,
-#                 S_t=S,
-#                 a_T=item,
-#                 rev_t=revenue,
-#                 ret_t=0.0,
-#                 feat_next=feat_next,
-#             ),
-#         )
-#         count(!iszero, inventory) < env.instance.K ? break : nothing
-#         env.step > env.instance.max_steps ? done = true : done = false
-#     end
-#     for i in (length(buffer) - 1):-1:1
-#         if i == length(buffer) - 1
-#             ret = buffer[i].rev_t
-#         else
-#             ret = buffer[i].rev_t + 0.99 * buffer[i + 1].ret_t
-#         end
-#         traj = buffer[i]
-#         traj_updated = (; traj..., ret_t=ret)
-#         buffer[i] = traj_updated
-#     end
-#     return buffer
-# end
diff --git a/src/DynamicAssortment/instance.jl b/src/DynamicAssortment/instance.jl
new file mode 100644
index 0000000..3250cdd
--- /dev/null
+++ b/src/DynamicAssortment/instance.jl
@@ -0,0 +1,33 @@
+"""
+$TYPEDEF
+
+Instance of the dynamic assortment problem.
+
+# Fields
+$TYPEDFIELDS
+"""
+@kwdef struct Instance{B<:DynamicAssortmentBenchmark}
+    "associated benchmark"
+    config::B
+    "item prices (including no purchase action)"
+    prices::Vector{Float64}
+    "static features, size (d, N)"
+    features::Matrix{Float64}
+    "starting hype and saturation features, size (2, N)"
+    starting_hype_and_saturation::Matrix{Float64}
+end
+
+function Instance(b::DynamicAssortmentBenchmark, rng::AbstractRNG)
+    N = item_count(b)
+    d = feature_count(b)
+    prices = vcat(rand(rng, Uniform(1.0, 10.0), N), 0.0) # last price is for no purchase action
+    features = rand(rng, Uniform(1.0, 10.0), (d, N))
+    starting_hype_and_saturation = rand(rng, Uniform(1.0, 10.0), (2, N))
+    return Instance(; config=b, prices, features, starting_hype_and_saturation)
+end
+
+customer_choice_model(b::Instance) = customer_choice_model(b.config)
+item_count(b::Instance) = item_count(b.config)
+feature_count(b::Instance) = feature_count(b.config)
+assortment_size(b::Instance) = assortment_size(b.config)
+max_steps(b::Instance) = max_steps(b.config)
diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index d57644b..ada7212 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -55,7 +55,7 @@ Abstract type for dynamic vehicle scheduling benchmarks.
 # Fields
 $TYPEDFIELDS
 """
-@kwdef struct DVSPBenchmark <: AbstractDynamicBenchmark
+@kwdef struct DVSPBenchmark <: AbstractDynamicBenchmark{true}
     "todo"
     max_requests_per_epoch::Int = 10
     "todo"
diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl
index 7a1e804..6c37b26 100644
--- a/src/Utils/Utils.jl
+++ b/src/Utils/Utils.jl
@@ -16,6 +16,7 @@ include("interface.jl")
 include("grid_graph.jl")
 include("misc.jl")
 include("model_builders.jl")
+include("maximizers.jl")
 
 export DataSample
 
@@ -34,5 +35,8 @@ export grid_graph, get_path, path_to_matrix
 export neg_tensor, squeeze_last_dims, average_tensor
 export scip_model, highs_model
 export objective_value
+export is_exogenous, is_endogenous
+
+export TopKMaximizer
 
 end
diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl
index ce9b6ed..1a5b826 100644
--- a/src/Utils/interface.jl
+++ b/src/Utils/interface.jl
@@ -39,7 +39,7 @@ function generate_dataset(
     bench::AbstractBenchmark,
     dataset_size::Int;
     seed=nothing,
-    rng=MersenneTwister(0),
+    rng=MersenneTwister(seed),
     kwargs...,
 )
     Random.seed!(rng, seed)
@@ -187,18 +187,21 @@ This type should be used for benchmarks that involve single stage stochastic opt
 It follows the same interface as [`AbstractBenchmark`](@ref), with the addition of the following methods:
 - [`generate_anticipative_solver`](@ref)
 """
-abstract type AbstractStochasticBenchmark <: AbstractBenchmark end
+abstract type AbstractStochasticBenchmark{exogenous} <: AbstractBenchmark end
 
-function generate_scenario end
+is_exogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = exogenous
+is_endogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = !exogenous
 
 # only works for exogenous noise
+function generate_scenario end
+
 """
-    generate_scenario_generator(::AbstractStochasticBenchmark; kwargs...)
+    generate_scenario_generator(::AbstractStochasticBenchmark{true}; kwargs...)
 """
 function generate_scenario_generator end
 
 """
-    generate_anticipative_solver(::AbstractStochasticBenchmark; kwargs...)
+    generate_anticipative_solver(::AbstractStochasticBenchmark{true}; kwargs...)
 """
 function generate_anticipative_solver end
 
@@ -211,7 +214,7 @@ This type should be used for benchmarks that involve multi-stage stochastic opti
 It follows the same interface as [`AbstractStochasticBenchmark`](@ref), with the addition of the following methods:
 TODO
 """
-abstract type AbstractDynamicBenchmark <: AbstractStochasticBenchmark end
+abstract type AbstractDynamicBenchmark{exogenous} <: AbstractStochasticBenchmark{exogenous} end
 
 """
     generate_environment(::AbstractDynamicBenchmark, instance; kwargs...)
@@ -226,7 +229,7 @@ $TYPEDSIGNATURES
 Generate a vector of environments for the given dynamic benchmark and dataset.
 """
 function generate_environments(
-    bench::AbstractDynamicBenchmark, dataset::Vector{<:DataSample}, kwargs...
+    bench::AbstractDynamicBenchmark, dataset::Vector{<:DataSample}; kwargs...
 )
     return map(dataset) do sample
         generate_environment(bench, sample.instance; kwargs...)
diff --git a/src/Utils/maximizers.jl b/src/Utils/maximizers.jl
new file mode 100644
index 0000000..ee5ceea
--- /dev/null
+++ b/src/Utils/maximizers.jl
@@ -0,0 +1,22 @@
+"""
+$TYPEDEF
+
+Top k maximizer.
+"""
+struct TopKMaximizer
+    k::Int
+end
+
+"""
+$TYPEDSIGNATURES
+
+Return the top k indices of `θ`.
+"""
+function (m::TopKMaximizer)(θ; kwargs...)
+    N = length(θ)
+    @assert N >= m.k "The length of θ must be at least k"
+    solution = partialsortperm(θ, 1:(m.k); rev=true)
+    res = falses(N)
+    res[solution] .= 1
+    return res
+end

From d51fe95e03b06216beca25cbfc0e5f68ed104689 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Wed, 6 Aug 2025 17:18:12 +0200
Subject: [PATCH 16/29] Fix tests

---
 src/Argmax2D/Argmax2D.jl                      | 22 ++++------
 src/DynamicAssortment/DynamicAssortment.jl    |  4 +-
 src/DynamicAssortment/environment.jl          | 42 -------------------
 src/DynamicAssortment/policies.jl             | 41 ++++++++++++++++++
 .../algorithms/anticipative_solver.jl         | 12 +++---
 5 files changed, 59 insertions(+), 62 deletions(-)
 create mode 100644 src/DynamicAssortment/policies.jl

diff --git a/src/Argmax2D/Argmax2D.jl b/src/Argmax2D/Argmax2D.jl
index a1c76c4..169c403 100644
--- a/src/Argmax2D/Argmax2D.jl
+++ b/src/Argmax2D/Argmax2D.jl
@@ -7,7 +7,7 @@ using Flux: Chain, Dense
 using LaTeXStrings: @L_str
 using LinearAlgebra: dot, norm
 using Plots: Plots
-using Random: Random, MersenneTwister
+using Random: Random, MersenneTwister, AbstractRNG
 
 include("polytope.jl")
 
@@ -53,20 +53,16 @@ maximizer(θ; instance, kwargs...) = instance[argmax(dot(θ, v) for v in instanc
 """
 $TYPEDSIGNATURES
 
-Generate a dataset for the [`Argmax2DBenchmark`](@ref).
+Generate a sample for the [`Argmax2DBenchmark`](@ref).
 """
-function Utils.generate_dataset(
-    bench::Argmax2DBenchmark, dataset_size=10; seed=nothing, rng=MersenneTwister(seed)
-)
+function Utils.generate_sample(bench::Argmax2DBenchmark, rng::AbstractRNG)
     (; nb_features, encoder, polytope_vertex_range) = bench
-    return map(1:dataset_size) do _
-        x = randn(rng, Float32, nb_features)
-        θ_true = encoder(x)
-        θ_true ./= 2 * norm(θ_true)
-        instance = build_polytope(rand(rng, polytope_vertex_range); shift=rand(rng))
-        y_true = maximizer(θ_true; instance)
-        return DataSample(; x=x, θ_true=θ_true, y_true=y_true, instance=instance)
-    end
+    x = randn(rng, Float32, nb_features)
+    θ_true = encoder(x)
+    θ_true ./= 2 * norm(θ_true)
+    instance = build_polytope(rand(rng, polytope_vertex_range); shift=rand(rng))
+    y_true = maximizer(θ_true; instance)
+    return DataSample(; x=x, θ_true=θ_true, y_true=y_true, instance=instance)
 end
 
 """
diff --git a/src/DynamicAssortment/DynamicAssortment.jl b/src/DynamicAssortment/DynamicAssortment.jl
index f5abbbe..4ab32e4 100644
--- a/src/DynamicAssortment/DynamicAssortment.jl
+++ b/src/DynamicAssortment/DynamicAssortment.jl
@@ -48,6 +48,7 @@ end
 
 include("instance.jl")
 include("environment.jl")
+include("policies.jl")
 
 customer_choice_model(b::DynamicAssortmentBenchmark) = b.customer_choice_model
 item_count(b::DynamicAssortmentBenchmark) = b.N
@@ -56,9 +57,8 @@ assortment_size(b::DynamicAssortmentBenchmark) = b.K
 max_steps(b::DynamicAssortmentBenchmark) = b.max_steps
 
 function Utils.generate_sample(
-    b::DynamicAssortmentBenchmark, rng::AbstractRNG=MersenneTwister(0); seed=nothing
+    b::DynamicAssortmentBenchmark, rng::AbstractRNG=MersenneTwister(0)
 )
-    Random.seed!(rng, seed)
     return DataSample(; instance=Instance(b, rng))
 end
 
diff --git a/src/DynamicAssortment/environment.jl b/src/DynamicAssortment/environment.jl
index f8617c0..df9b01b 100644
--- a/src/DynamicAssortment/environment.jl
+++ b/src/DynamicAssortment/environment.jl
@@ -166,45 +166,3 @@ function compute_expected_revenue(env::Environment, S)
     expected_revenue = dot(probs, r)
     return expected_revenue
 end
-
-function expert_solution(env::Environment)
-    N = item_count(env)
-    K = assortment_size(env)
-    best_S = falses(N)
-    best_revenue = -1.0
-    S_vec = falses(N)
-    for S in combinations(1:N, K)
-        S_vec .= false
-        S_vec[S] .= true
-        expected_revenue = compute_expected_revenue(env, S_vec)
-        if expected_revenue > best_revenue
-            best_S, best_revenue = copy(S_vec), expected_revenue
-        end
-    end
-    return best_S
-end
-
-function greedy_solution(env::Environment)
-    maximizer = generate_maximizer(env.instance.config)
-    return maximizer(prices(env))
-end
-
-function run_policy(env::Environment, episodes::Int; first_seed=1, policy=expert_solution)
-    dataset = []
-    rev_global = Float64[]
-    for i in 1:episodes
-        rev_episode = 0.0
-        CommonRLInterface.reset!(env; seed=first_seed - 1 + i, reset_seed=true)
-        training_instances = []
-        while !CommonRLInterface.terminated(env)
-            S = policy(env)
-            features = CommonRLInterface.observe(env)
-            push!(training_instances, DataSample(; x=features, y_true=S))
-            reward = CommonRLInterface.act!(env, S)
-            rev_episode += reward
-        end
-        push!(rev_global, rev_episode)
-        push!(dataset, training_instances)
-    end
-    return mean(rev_global), rev_global, dataset
-end
diff --git a/src/DynamicAssortment/policies.jl b/src/DynamicAssortment/policies.jl
new file mode 100644
index 0000000..56aba6d
--- /dev/null
+++ b/src/DynamicAssortment/policies.jl
@@ -0,0 +1,41 @@
+function expert_policy(env::Environment)
+    N = item_count(env)
+    K = assortment_size(env)
+    best_S = falses(N)
+    best_revenue = -1.0
+    S_vec = falses(N)
+    for S in combinations(1:N, K)
+        S_vec .= false
+        S_vec[S] .= true
+        expected_revenue = compute_expected_revenue(env, S_vec)
+        if expected_revenue > best_revenue
+            best_S, best_revenue = copy(S_vec), expected_revenue
+        end
+    end
+    return best_S
+end
+
+function greedy_policy(env::Environment)
+    maximizer = generate_maximizer(env.instance.config)
+    return maximizer(prices(env))
+end
+
+function run_policy(env::Environment, episodes::Int; first_seed=1, policy=expert_policy)
+    dataset = []
+    rev_global = Float64[]
+    for i in 1:episodes
+        rev_episode = 0.0
+        CommonRLInterface.reset!(env; seed=first_seed - 1 + i, reset_seed=true)
+        training_instances = []
+        while !CommonRLInterface.terminated(env)
+            S = policy(env)
+            features = CommonRLInterface.observe(env)
+            push!(training_instances, DataSample(; x=features, y_true=S))
+            reward = CommonRLInterface.act!(env, S)
+            rev_episode += reward
+        end
+        push!(rev_global, rev_episode)
+        push!(dataset, training_instances)
+    end
+    return mean(rev_global), rev_global, dataset
+end
diff --git a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
index dd52d19..aeaa6ad 100644
--- a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
+++ b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
@@ -201,9 +201,11 @@ end
 end
 
 function (solver::AnticipativeSolver)(env::DVSPEnv, scenario=env.scenario; reset_env=false)
-    if solver.is_2D
-        return anticipative_solver(env, scenario; model_builder=highs_model_2d, reset_env)
-    else
-        return anticipative_solver(env, scenario; model_builder=highs_model, reset_env)
-    end
+    return anticipative_solver(
+        env,
+        scenario;
+        model_builder=highs_model,
+        reset_env,
+        two_dimensional_features=solver.is_2D,
+    )
 end

From a90a3d5f080c99cde9db76f7670d7bfceee1ec84 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Wed, 6 Aug 2025 18:08:10 +0200
Subject: [PATCH 17/29] simplify interface

---
 src/DecisionFocusedLearningBenchmarks.jl      |  4 ++-
 .../DynamicVehicleScheduling.jl               | 24 ++++++++++-------
 .../algorithms/anticipative_solver.jl         | 26 +++++++++----------
 .../environment/environment.jl                |  4 +--
 .../environment/scenario.jl                   |  6 ++---
 .../policy/anticipative_policy.jl             |  2 +-
 src/Utils/Utils.jl                            |  2 ++
 src/Utils/interface.jl                        | 21 ++++++++++-----
 8 files changed, 53 insertions(+), 36 deletions(-)

diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index 67bd006..68e0b1d 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -74,9 +74,11 @@ using .DynamicAssortment
 export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample
 
 export generate_sample, generate_dataset, generate_environments, generate_environment
+export generate_scenario
 export generate_scenario_generator, generate_anticipative_solver
 export generate_statistical_model
 export generate_maximizer, maximizer_kwargs
+export generate_anticipative_solution
 export is_exogenous, is_endogenous
 
 export objective_value
@@ -92,7 +94,7 @@ export WarcraftBenchmark
 export FixedSizeShortestPathBenchmark
 export PortfolioOptimizationBenchmark
 export StochasticVehicleSchedulingBenchmark
-export DVSPBenchmark
+export DynamicVehicleSchedulingBenchmark
 export DynamicAssortmentBenchmark
 
 end # module DecisionFocusedLearningBenchmarks
diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index ada7212..d044c5e 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -55,7 +55,7 @@ Abstract type for dynamic vehicle scheduling benchmarks.
 # Fields
 $TYPEDFIELDS
 """
-@kwdef struct DVSPBenchmark <: AbstractDynamicBenchmark{true}
+@kwdef struct DynamicVehicleSchedulingBenchmark <: AbstractDynamicBenchmark{true}
     "todo"
     max_requests_per_epoch::Int = 10
     "todo"
@@ -66,7 +66,7 @@ $TYPEDFIELDS
     two_dimensional_features::Bool = false
 end
 
-function Utils.generate_dataset(b::DVSPBenchmark, dataset_size::Int=1)
+function Utils.generate_dataset(b::DynamicVehicleSchedulingBenchmark, dataset_size::Int=1)
     (; max_requests_per_epoch, Δ_dispatch, epoch_duration) = b
     files = readdir(datadep"dvrptw"; join=true)
     dataset_size = min(dataset_size, length(files))
@@ -82,23 +82,29 @@ function Utils.generate_dataset(b::DVSPBenchmark, dataset_size::Int=1)
     ]
 end
 
-function Utils.generate_environment(::DVSPBenchmark, instance::Instance; kwargs...)
+function Utils.generate_environment(
+    ::DynamicVehicleSchedulingBenchmark, instance::Instance; kwargs...
+)
     return DVSPEnv(instance; kwargs...)
 end
 
-function Utils.generate_maximizer(::DVSPBenchmark)
+function Utils.generate_maximizer(::DynamicVehicleSchedulingBenchmark)
     return LinearMaximizer(oracle; g, h)
 end
 
-function Utils.generate_scenario_generator(::DVSPBenchmark)
-    return generate_scenario
+function Utils.generate_scenario(b::DynamicVehicleSchedulingBenchmark, args...; kwargs...)
+    return Utils.generate_scenario(args...; kwargs...)
 end
 
-function Utils.generate_anticipative_solver(b::DVSPBenchmark; kwargs...)
-    return AnticipativeSolver(b.two_dimensional_features)
+function Utils.generate_anticipative_solution(
+    b::DynamicVehicleSchedulingBenchmark, args...; kwargs...
+)
+    return anticipative_solver(
+        args...; kwargs..., two_dimensional_features=b.two_dimensional_features
+    )
 end
 
-export DVSPBenchmark #, generate_environment # , generate_sample, generate_anticipative_solver
+export DynamicVehicleSchedulingBenchmark
 export run_policy!,
     GreedyVSPPolicy, LazyVSPPolicy, KleopatraVSPPolicy, AnticipativeVSPPolicy
 
diff --git a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
index aeaa6ad..f274e02 100644
--- a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
+++ b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
@@ -196,16 +196,16 @@ function anticipative_solver(
     return obj, dataset
 end
 
-@kwdef struct AnticipativeSolver
-    is_2D::Bool = false
-end
-
-function (solver::AnticipativeSolver)(env::DVSPEnv, scenario=env.scenario; reset_env=false)
-    return anticipative_solver(
-        env,
-        scenario;
-        model_builder=highs_model,
-        reset_env,
-        two_dimensional_features=solver.is_2D,
-    )
-end
+# @kwdef struct AnticipativeSolver
+#     is_2D::Bool = false
+# end
+
+# function (solver::AnticipativeSolver)(env::DVSPEnv, scenario=env.scenario; reset_env=false)
+#     return generate_anticipative_decision(
+#         env,
+#         scenario;
+#         model_builder=highs_model,
+#         reset_env,
+#         two_dimensional_features=solver.is_2D,
+#     )
+# end
diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl
index 0bbc4ae..29d9d35 100644
--- a/src/DynamicVehicleScheduling/environment/environment.jl
+++ b/src/DynamicVehicleScheduling/environment/environment.jl
@@ -13,7 +13,7 @@ $TYPEDSIGNATURES
 Constructor for [`DVSPEnv`](@ref).
 """
 function DVSPEnv(instance::Instance; seed=nothing, rng=MersenneTwister(seed))
-    scenario = generate_scenario(instance; rng, seed)
+    scenario = Utils.generate_scenario(instance; rng, seed)
     initial_state = DVSPState(instance; scenario[1]...)
     return DVSPEnv(instance, initial_state, scenario)
 end
@@ -76,6 +76,6 @@ function CommonRLInterface.act!(env::DVSPEnv, routes, scenario=env.scenario)
     return reward
 end
 
-function generate_scenario(env::DVSPEnv; kwargs...)
+function Utils.generate_scenario(env::DVSPEnv; kwargs...)
     return generate_scenario(env.instance; kwargs...)
 end
diff --git a/src/DynamicVehicleScheduling/environment/scenario.jl b/src/DynamicVehicleScheduling/environment/scenario.jl
index cee4fe7..9059477 100644
--- a/src/DynamicVehicleScheduling/environment/scenario.jl
+++ b/src/DynamicVehicleScheduling/environment/scenario.jl
@@ -16,7 +16,7 @@ function Base.getindex(scenario::Scenario, idx::Integer)
     )
 end
 
-function generate_scenario(
+function Utils.generate_scenario(
     instance::Instance; seed=nothing, rng::AbstractRNG=MersenneTwister(seed)
 )
     (; Δ_dispatch, static_instance, last_epoch, epoch_duration, max_requests_per_epoch) =
@@ -46,6 +46,6 @@ function generate_scenario(
     return Scenario(new_indices, new_service_time, new_start_time)
 end
 
-function generate_scenario(sample::DataSample; kwargs...)
-    return generate_scenario(sample.instance; kwargs...)
+function Utils.generate_scenario(sample::DataSample; kwargs...)
+    return Utils.generate_scenario(sample.instance; kwargs...)
 end
diff --git a/src/DynamicVehicleScheduling/policy/anticipative_policy.jl b/src/DynamicVehicleScheduling/policy/anticipative_policy.jl
index d7f2381..f63b620 100644
--- a/src/DynamicVehicleScheduling/policy/anticipative_policy.jl
+++ b/src/DynamicVehicleScheduling/policy/anticipative_policy.jl
@@ -13,5 +13,5 @@ Apply the anticipative policy to the environment.
 function run_policy!(
     ::AnticipativeVSPPolicy, env::DVSPEnv, scenario=env.scenario; model_builder=highs_model
 )
-    return anticipative_solver(env, scenario; model_builder, reset_env=true)
+    return generate_anticipative_solution(env, scenario; model_builder, reset_env=true)
 end
diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl
index 6c37b26..c9a7153 100644
--- a/src/Utils/Utils.jl
+++ b/src/Utils/Utils.jl
@@ -25,10 +25,12 @@ export generate_dataset,
     generate_statistical_model,
     generate_maximizer,
     generate_sample,
+    generate_scenario,
     generate_scenario_generator,
     generate_anticipative_solver,
     generate_environment,
     generate_environments
+export generate_anticipative_solution
 export plot_data, compute_gap
 export maximizer_kwargs
 export grid_graph, get_path, path_to_matrix
diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl
index 1a5b826..2994e4e 100644
--- a/src/Utils/interface.jl
+++ b/src/Utils/interface.jl
@@ -192,18 +192,25 @@ abstract type AbstractStochasticBenchmark{exogenous} <: AbstractBenchmark end
 is_exogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = exogenous
 is_endogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = !exogenous
 
-# only works for exogenous noise
-function generate_scenario end
-
 """
-    generate_scenario_generator(::AbstractStochasticBenchmark{true}; kwargs...)
+    generate_anticipative_solver(::AbstractStochasticBenchmark{true}, instance; kwargs...)
 """
-function generate_scenario_generator end
+function generate_scenario end
 
 """
-    generate_anticipative_solver(::AbstractStochasticBenchmark{true}; kwargs...)
+    anticipative_policy(::AbstractStochasticBenchmark{true}, instance, scenario; kwargs...)
 """
-function generate_anticipative_solver end
+function generate_anticipative_solution end
+
+# """
+#     generate_scenario_generator(::AbstractStochasticBenchmark{true}; kwargs...)
+# """
+# function generate_scenario_generator end
+
+# """
+#     generate_anticipative_solver(::AbstractStochasticBenchmark{true}; kwargs...)
+# """
+# function generate_anticipative_solver end
 
 """
 $TYPEDEF

From e8be49680c9ca55cdf0bf155a1c641da948ea5a8 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 7 Aug 2025 16:04:45 +0200
Subject: [PATCH 18/29] Dynamic assortment is in a good state; fix docs;
 working on DVSP

---
 Project.toml                                  |  6 +-
 docs/src/api/dynamic_assorment.md             | 15 ++++
 docs/src/benchmarks/dynamic_assorment.md      |  3 +
 src/DecisionFocusedLearningBenchmarks.jl      | 38 ++++----
 src/DynamicAssortment/DynamicAssortment.jl    | 29 +++++--
 src/DynamicAssortment/environment.jl          | 21 ++---
 .../DynamicVehicleScheduling.jl               | 26 +++---
 .../algorithms/prize_collecting_vsp.jl        | 87 -------------------
 .../environment/environment.jl                | 12 +--
 src/Utils/Utils.jl                            | 27 +++---
 src/Utils/environment.jl                      | 52 +++++++++++
 src/Utils/interface.jl                        | 26 +++---
 src/Utils/policy.jl                           | 83 ++++++++++++++++++
 13 files changed, 255 insertions(+), 170 deletions(-)
 create mode 100644 docs/src/api/dynamic_assorment.md
 create mode 100644 docs/src/benchmarks/dynamic_assorment.md
 create mode 100644 src/Utils/environment.jl
 create mode 100644 src/Utils/policy.jl

diff --git a/Project.toml b/Project.toml
index 87fe839..09e60e4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,9 +4,8 @@ authors = ["Members of JuliaDecisionFocusedLearning"]
 version = "0.2.4"
 
 [deps]
-Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
-CommonRLInterface = "d842c3ba-07a1-494f-bbec-f5741b0a3e98"
 Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
+Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
 ConstrainedShortestPaths = "b3798467-87dc-4d99-943d-35a1bd39e395"
 DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
@@ -35,9 +34,8 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [compat]
-Combinatorics = "1.0.3"
-CommonRLInterface = "0.3.3"
 Colors = "0.13.1"
+Combinatorics = "1.0.3"
 ConstrainedShortestPaths = "0.6.0"
 DataDeps = "0.7"
 Distributions = "0.25"
diff --git a/docs/src/api/dynamic_assorment.md b/docs/src/api/dynamic_assorment.md
new file mode 100644
index 0000000..d738692
--- /dev/null
+++ b/docs/src/api/dynamic_assorment.md
@@ -0,0 +1,15 @@
+# Dynamic Assortment
+
+## Public
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.DynamicAssortment]
+Private = false
+```
+
+## Private
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.DynamicAssortment]
+Public = false
+```
diff --git a/docs/src/benchmarks/dynamic_assorment.md b/docs/src/benchmarks/dynamic_assorment.md
new file mode 100644
index 0000000..dcf3243
--- /dev/null
+++ b/docs/src/benchmarks/dynamic_assorment.md
@@ -0,0 +1,3 @@
+# Dynamic Assortment
+
+[`DynamicAssortmentBenchmark`](@ref).
diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index 68e0b1d..b49362a 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -59,23 +59,16 @@ include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl")
 include("DynamicAssortment/DynamicAssortment.jl")
 
 using .Utils
-using .Argmax
-using .Argmax2D
-using .Ranking
-using .SubsetSelection
-using .Warcraft
-using .FixedSizeShortestPath
-using .PortfolioOptimization
-using .StochasticVehicleScheduling
-using .DynamicVehicleScheduling
-using .DynamicAssortment
 
 # Interface
 export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample
+export AbstractEnv, get_seed, is_terminated, observe, reset!, step!
+
+export Policy, run_policy!
 
 export generate_sample, generate_dataset, generate_environments, generate_environment
 export generate_scenario
-export generate_scenario_generator, generate_anticipative_solver
+export generate_policies
 export generate_statistical_model
 export generate_maximizer, maximizer_kwargs
 export generate_anticipative_solution
@@ -86,15 +79,26 @@ export plot_data, plot_instance, plot_solution
 export compute_gap
 
 # Export all benchmarks
-export ArgmaxBenchmark
+using .Argmax
+using .Argmax2D
+using .Ranking
+using .SubsetSelection
+using .Warcraft
+using .FixedSizeShortestPath
+using .PortfolioOptimization
+using .StochasticVehicleScheduling
+using .DynamicVehicleScheduling
+using .DynamicAssortment
+
 export Argmax2DBenchmark
-export RankingBenchmark
-export SubsetSelectionBenchmark
-export WarcraftBenchmark
+export ArgmaxBenchmark
+export DynamicAssortmentBenchmark
+export DynamicVehicleSchedulingBenchmark
 export FixedSizeShortestPathBenchmark
 export PortfolioOptimizationBenchmark
+export RankingBenchmark
 export StochasticVehicleSchedulingBenchmark
-export DynamicVehicleSchedulingBenchmark
-export DynamicAssortmentBenchmark
+export SubsetSelectionBenchmark
+export WarcraftBenchmark
 
 end # module DecisionFocusedLearningBenchmarks
diff --git a/src/DynamicAssortment/DynamicAssortment.jl b/src/DynamicAssortment/DynamicAssortment.jl
index 4ab32e4..f04d3e2 100644
--- a/src/DynamicAssortment/DynamicAssortment.jl
+++ b/src/DynamicAssortment/DynamicAssortment.jl
@@ -2,7 +2,6 @@ module DynamicAssortment
 
 using ..Utils
 
-using CommonRLInterface: CommonRLInterface, AbstractEnv
 using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
 using Distributions: Uniform, Categorical
 using LinearAlgebra: dot
@@ -62,17 +61,35 @@ function Utils.generate_sample(
     return DataSample(; instance=Instance(b, rng))
 end
 
+function Utils.generate_statistical_model(b::DynamicAssortmentBenchmark; seed=nothing)
+    Random.seed!(seed)
+    d = feature_count(b)
+    return Chain(Dense(d + 8 => 5), Dense(5 => 1), vec)
+end
+
 function Utils.generate_maximizer(b::DynamicAssortmentBenchmark)
     return TopKMaximizer(assortment_size(b))
 end
 
 function Utils.generate_environment(
-    ::DynamicAssortmentBenchmark,
-    instance::Instance;
-    seed=nothing,
-    rng::AbstractRNG=MersenneTwister(seed),
+    ::DynamicAssortmentBenchmark, instance::Instance, rng::AbstractRNG
 )
-    return Environment(instance; seed=seed, rng=rng)
+    seed = rand(rng, 1:typemax(Int))
+    return Environment(instance; seed)
+end
+
+function Utils.generate_policies(b::DynamicAssortmentBenchmark)
+    greedy = Policy(
+        "Greedy",
+        "policy that selects the assortment with items with the highest prices",
+        greedy_policy,
+    )
+    expert = Policy(
+        "Expert",
+        "policy that selects the assortment with the highest expected revenue",
+        expert_policy,
+    )
+    return (expert, greedy)
 end
 
 export DynamicAssortmentBenchmark
diff --git a/src/DynamicAssortment/environment.jl b/src/DynamicAssortment/environment.jl
index df9b01b..5f62acc 100644
--- a/src/DynamicAssortment/environment.jl
+++ b/src/DynamicAssortment/environment.jl
@@ -7,7 +7,7 @@ Environment for the dynamic assortment problem.
 $TYPEDFIELDS
 """
 @kwdef mutable struct Environment{I<:Instance,R<:AbstractRNG,S<:Union{Nothing,Int}} <:
-                      AbstractEnv
+                      Utils.AbstractEnvironment
     "associated instance"
     instance::I
     "current step"
@@ -43,23 +43,22 @@ function Environment(instance::Instance; seed=0, rng::AbstractRNG=MersenneTwiste
         features=full_features,
         d_features=zeros(2, N),
     )
-    CommonRLInterface.reset!(env; reset_seed=true)
+    Utils.reset!(env; reset_seed=true)
     return env
 end
 
+Utils.get_seed(env::Environment) = env.seed
 customer_choice_model(b::Environment) = customer_choice_model(b.instance)
 item_count(b::Environment) = item_count(b.instance)
 feature_count(b::Environment) = feature_count(b.instance)
 assortment_size(b::Environment) = assortment_size(b.instance)
 max_steps(b::Environment) = max_steps(b.instance)
 prices(b::Environment) = b.instance.prices
-# features(b::Environment) = b.instance.features
-# starting_hype_and_saturation(b::Environment) = b.instance.starting_hype_and_saturation
 
 ## Basic operations of environment
 
 # Reset the environment
-function CommonRLInterface.reset!(env::Environment; reset_seed=false, seed=env.seed)
+function Utils.reset!(env::Environment; reset_seed=false, seed=env.seed)
     reset_seed && Random.seed!(env.rng, seed)
 
     env.step = 1
@@ -79,18 +78,19 @@ function CommonRLInterface.reset!(env::Environment; reset_seed=false, seed=env.s
     return nothing
 end
 
-function CommonRLInterface.terminated(env::Environment)
+function Utils.is_terminated(env::Environment)
     return env.step > max_steps(env)
 end
 
-function CommonRLInterface.observe(env::Environment)
+function Utils.observe(env::Environment)
     delta_features = env.features[2:3, :] .- env.instance.starting_hype_and_saturation
     return vcat(
         env.features,
         env.d_features,
         delta_features,
         ones(1, item_count(env)) .* (env.step / max_steps(env) * 10),
-    ) #./ 10
+    ) ./ 10,
+    nothing
 end
 
 # Compute the hype vector
@@ -149,9 +149,10 @@ function choice_probabilities(env::Environment, S)
 end
 
 # Purchase decision
-function CommonRLInterface.act!(env::Environment, S)
+function Utils.step!(env::Environment, assortment)
+    @assert !Utils.is_terminated(env) "Environment is terminated, cannot act!"
     r = prices(env)
-    probs = choice_probabilities(env, S)
+    probs = choice_probabilities(env, assortment)
     item = rand(env.rng, Categorical(probs))
     reward = r[item]
     buy_item!(env, item)
diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index d044c5e..1cc5262 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -3,7 +3,6 @@ module DynamicVehicleScheduling
 using ..Utils
 
 using Base: @kwdef
-using CommonRLInterface: CommonRLInterface, AbstractEnv, reset!, terminated, observe, act!
 using DataDeps: @datadep_str
 using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
 using Graphs
@@ -39,11 +38,11 @@ include("algorithms/anticipative_solver.jl")
 include("learning/features.jl")
 include("learning/2d_features.jl")
 
-include("policy/abstract_vsp_policy.jl")
-include("policy/greedy_policy.jl")
-include("policy/lazy_policy.jl")
-include("policy/anticipative_policy.jl")
-include("policy/kleopatra_policy.jl")
+# include("policy/abstract_vsp_policy.jl")
+# include("policy/greedy_policy.jl")
+# include("policy/lazy_policy.jl")
+# include("policy/anticipative_policy.jl")
+# include("policy/kleopatra_policy.jl")
 
 include("maximizer.jl")
 
@@ -56,13 +55,13 @@ Abstract type for dynamic vehicle scheduling benchmarks.
 $TYPEDFIELDS
 """
 @kwdef struct DynamicVehicleSchedulingBenchmark <: AbstractDynamicBenchmark{true}
-    "todo"
+    "maximum number of customers entering the system per epoch"
     max_requests_per_epoch::Int = 10
-    "todo"
+    "time between decision and dispatch of a vehicle"
     Δ_dispatch::Float64 = 1.0
-    "todo"
+    "duration of an epoch"
     epoch_duration::Float64 = 1.0
-    "todo"
+    "whether to use two-dimensional features"
     two_dimensional_features::Bool = false
 end
 
@@ -83,9 +82,10 @@ function Utils.generate_dataset(b::DynamicVehicleSchedulingBenchmark, dataset_si
 end
 
 function Utils.generate_environment(
-    ::DynamicVehicleSchedulingBenchmark, instance::Instance; kwargs...
+    ::DynamicVehicleSchedulingBenchmark, instance::Instance, rng::AbstractRNG
 )
-    return DVSPEnv(instance; kwargs...)
+    seed = rand(rng, 1:typemax(Int))
+    return DVSPEnv(instance; seed)
 end
 
 function Utils.generate_maximizer(::DynamicVehicleSchedulingBenchmark)
@@ -105,7 +105,5 @@ function Utils.generate_anticipative_solution(
 end
 
 export DynamicVehicleSchedulingBenchmark
-export run_policy!,
-    GreedyVSPPolicy, LazyVSPPolicy, KleopatraVSPPolicy, AnticipativeVSPPolicy
 
 end
diff --git a/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl b/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl
index 14c51f9..d98f4d0 100644
--- a/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl
+++ b/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl
@@ -126,90 +126,3 @@ function prize_collecting_vsp(
 
     return retrieve_routes(value.(y), graph)
 end
-
-# # ?
-# function prize_collecting_vsp_Q(
-#     θ::AbstractVector,
-#     vals::AbstractVector;
-#     instance::DVSPState,
-#     model_builder=highs_model,
-#     kwargs...,
-# )
-#     (; duration) = instance.instance
-#     graph = create_graph(instance)
-#     model = model_builder()
-#     set_silent(model)
-#     nb_nodes = nv(graph)
-#     job_indices = 2:(nb_nodes)
-#     @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0)
-#     θ_ext = fill(0.0, location_count(instance.instance))  # no prize for must dispatch requests, only hard constraints
-#     θ_ext[instance.is_postponable] .= θ
-#     # v_ext = fill(0.0, nb_locations(instance.instance))  # no prize for must dispatch requests, only hard constraints
-#     # v_ext[instance.is_postponable] .= vals
-#     @objective(
-#         model,
-#         Max,
-#         sum(
-#             (θ_ext[dst(edge)] + vals[dst(edge)] - duration[src(edge), dst(edge)]) *
-#             y[src(edge), dst(edge)] for edge in edges(graph)
-#         )
-#     )
-#     @constraint(
-#         model,
-#         flow[i in 2:nb_nodes],
-#         sum(y[j, i] for j in inneighbors(graph, i)) ==
-#             sum(y[i, j] for j in outneighbors(graph, i))
-#     )
-#     @constraint(
-#         model, demand[i in job_indices], sum(y[j, i] for j in inneighbors(graph, i)) <= 1
-#     )
-#     # must dispatch constraints
-#     @constraint(
-#         model,
-#         demand_must_dispatch[i in job_indices; instance.is_must_dispatch[i]],
-#         sum(y[j, i] for j in inneighbors(graph, i)) == 1
-#     )
-#     optimize!(model)
-#     return retrieve_routes(value.(y), graph)
-# end
-
-# function my_objective_value(θ, routes; instance)
-#     (; duration) = instance.instance
-#     total = 0.0
-#     θ_ext = fill(0.0, location_count(instance))
-#     θ_ext[instance.is_postponable] .= θ
-#     for route in routes
-#         for (u, v) in partition(vcat(1, route), 2, 1)
-#             total += θ_ext[v] - duration[u, v]
-#         end
-#     end
-#     return -total
-# end
-
-# function _objective_value(θ, routes; instance)
-#     (; duration) = instance.instance
-#     total = 0.0
-#     θ_ext = fill(0.0, location_count(instance))
-#     θ_ext[instance.is_postponable] .= θ
-#     mapping = cumsum(instance.is_postponable)
-#     g = falses(length(θ))
-#     for route in routes
-#         for (u, v) in partition(vcat(1, route), 2, 1)
-#             total -= duration[u, v]
-#             if instance.is_postponable[v]
-#                 total += θ_ext[v]
-#                 g[mapping[v]] = 1
-#             end
-#         end
-#     end
-#     return -total, g
-# end
-
-# function ChainRulesCore.rrule(::typeof(my_objective_value), θ, routes; instance)
-#     total, g = _objective_value(θ, routes; instance)
-#     function pullback(dy)
-#         g = g .* dy
-#         return NoTangent(), g, NoTangent()
-#     end
-#     return total, pullback
-# end
diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl
index 29d9d35..ce146e6 100644
--- a/src/DynamicVehicleScheduling/environment/environment.jl
+++ b/src/DynamicVehicleScheduling/environment/environment.jl
@@ -1,4 +1,4 @@
-struct DVSPEnv{S<:DVSPState} <: AbstractEnv
+struct DVSPEnv{S<:DVSPState} <: Utils.AbstractEnvironment
     "associated instance"
     instance::Instance
     "current state"
@@ -28,7 +28,7 @@ $TYPEDSIGNATURES
 
 Get the current state of the environment.
 """
-CommonRLInterface.observe(env::DVSPEnv) = env.state
+Utils.observe(env::DVSPEnv) = nothing, env.state
 
 current_epoch(env::DVSPEnv) = current_epoch(env.state)
 
@@ -51,7 +51,7 @@ $TYPEDSIGNATURES
 
 Check if the episode is terminated, i.e. if the current epoch is the last one.
 """
-CommonRLInterface.terminated(env::DVSPEnv) = current_epoch(env) > last_epoch(env)
+Utils.is_terminated(env::DVSPEnv) = current_epoch(env) > last_epoch(env)
 
 """
 $TYPEDSIGNATURES
@@ -59,7 +59,7 @@ $TYPEDSIGNATURES
 Reset the environment to its initial state.
 Also reset the seed if `reset_seed` is set to true.
 """
-function CommonRLInterface.reset!(env::DVSPEnv, scenario=env.scenario)
+function Utils.reset!(env::DVSPEnv, scenario=env.scenario)
     reset_state!(env.state, env.instance; scenario[1]...)
     return nothing
 end
@@ -67,10 +67,10 @@ end
 """
 remove dispatched customers, advance time, and add new requests to the environment.
 """
-function CommonRLInterface.act!(env::DVSPEnv, routes, scenario=env.scenario)
+function Utils.step!(env::DVSPEnv, routes, scenario=env.scenario)
     reward = -apply_routes!(env.state, routes)
     env.state.current_epoch += 1
-    if !CommonRLInterface.terminated(env)
+    if !Utils.is_terminated(env)
         add_new_customers!(env.state, env.instance; scenario[current_epoch(env)]...)
     end
     return reward
diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl
index c9a7153..2efd0d0 100644
--- a/src/Utils/Utils.jl
+++ b/src/Utils/Utils.jl
@@ -12,25 +12,28 @@ using StatsBase: StatsBase
 using Statistics: mean
 
 include("data_sample.jl")
+include("maximizers.jl")
+include("environment.jl")
+include("policy.jl")
 include("interface.jl")
 include("grid_graph.jl")
 include("misc.jl")
 include("model_builders.jl")
-include("maximizers.jl")
 
-export DataSample
+export DataSample, Policy
+export run_policy!
+export TopKMaximizer
+
+export AbstractEnv, get_seed, is_terminated, observe, reset!, step!
 
 export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark
-export generate_dataset,
-    generate_statistical_model,
-    generate_maximizer,
-    generate_sample,
-    generate_scenario,
-    generate_scenario_generator,
-    generate_anticipative_solver,
-    generate_environment,
-    generate_environments
+export generate_sample, generate_dataset
+export generate_statistical_model, generate_maximizer
+export generate_scenario
+export generate_environment, generate_environments
+export generate_policies
 export generate_anticipative_solution
+
 export plot_data, compute_gap
 export maximizer_kwargs
 export grid_graph, get_path, path_to_matrix
@@ -39,6 +42,4 @@ export scip_model, highs_model
 export objective_value
 export is_exogenous, is_endogenous
 
-export TopKMaximizer
-
 end
diff --git a/src/Utils/environment.jl b/src/Utils/environment.jl
new file mode 100644
index 0000000..38a3f34
--- /dev/null
+++ b/src/Utils/environment.jl
@@ -0,0 +1,52 @@
+"""
+$TYPEDEF
+
+Abstract type for environments in decision-focused learning benchmarks.
+"""
+abstract type AbstractEnvironment end
+
+"""
+$TYPEDSIGNATURES
+
+Seed accessor for environments.
+By default, environments have no seed.
+Override this method to provide a seed for the environment.
+"""
+function get_seed(::AbstractEnvironment)
+    return nothing
+end
+
+"""
+    is_terminated(env::AbstractEnvironment) --> Bool
+
+Check if the environment has reached a terminal state.
+"""
+function is_terminated end
+
+"""
+    observe(env::AbstractEnvironment) --> Tuple
+
+Get the current observation from the environment.
+This function should return a tuple of two elements:
+    1. An array of features representing the current state of the environment.
+    2. An internal state of the environment, which can be used for further processing (return `nothing` if not needed).
+"""
+function observe end
+
+"""
+    reset!(env::AbstractEnvironment; reset_seed::Bool, seed=get_seed(env)) --> Nothing
+
+Reset the environment to its initial state.
+If `reset_seed` is true, the random number generator is reset to the given `seed`.
+"""
+function reset! end
+
+"""
+    step!(env::AbstractEnvironment, action) --> Float64
+
+Perform a step in the environment with the given action.
+Returns the reward received after taking the action.
+This function may also update the internal state of the environment.
+If the environment is terminated, it should raise an error.
+"""
+function step! end
diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl
index 2994e4e..8c3c0c7 100644
--- a/src/Utils/interface.jl
+++ b/src/Utils/interface.jl
@@ -62,6 +62,11 @@ It's usually a Flux model, that takes a feature matrix x as input, and returns a
 """
 function generate_statistical_model end
 
+"""
+    generate_policies(::AbstractBenchmark) -> Vector{Policy}
+"""
+function generate_policies end
+
 """
     plot_data(::AbstractBenchmark, ::DataSample; kwargs...)
 
@@ -202,16 +207,6 @@ function generate_scenario end
 """
 function generate_anticipative_solution end
 
-# """
-#     generate_scenario_generator(::AbstractStochasticBenchmark{true}; kwargs...)
-# """
-# function generate_scenario_generator end
-
-# """
-#     generate_anticipative_solver(::AbstractStochasticBenchmark{true}; kwargs...)
-# """
-# function generate_anticipative_solver end
-
 """
 $TYPEDEF
 
@@ -224,7 +219,7 @@ TODO
 abstract type AbstractDynamicBenchmark{exogenous} <: AbstractStochasticBenchmark{exogenous} end
 
 """
-    generate_environment(::AbstractDynamicBenchmark, instance; kwargs...)
+    generate_environment(::AbstractDynamicBenchmark, instance, rng::AbstractRNG; kwargs...)
 
 Initialize an environment for the given dynamic benchmark instance.
 """
@@ -236,9 +231,14 @@ $TYPEDSIGNATURES
 Generate a vector of environments for the given dynamic benchmark and dataset.
 """
 function generate_environments(
-    bench::AbstractDynamicBenchmark, dataset::Vector{<:DataSample}; kwargs...
+    bench::AbstractDynamicBenchmark,
+    dataset::Vector{<:DataSample};
+    seed=nothing,
+    rng=MersenneTwister(seed),
+    kwargs...,
 )
+    Random.seed!(rng, seed)
     return map(dataset) do sample
-        generate_environment(bench, sample.instance; kwargs...)
+        generate_environment(bench, sample.instance, rng; kwargs...)
     end
 end
diff --git a/src/Utils/policy.jl b/src/Utils/policy.jl
new file mode 100644
index 0000000..0e216fd
--- /dev/null
+++ b/src/Utils/policy.jl
@@ -0,0 +1,83 @@
+"""
+$TYPEDEF
+
+Policy type for decision-focused learning benchmarks.
+"""
+struct Policy{P}
+    "policy name"
+    name::String
+    "policy description"
+    description::String
+    "policy run function"
+    policy::P
+end
+
+function Base.show(io::IO, p::Policy)
+    println(io, "$(p.name): $(p.description)")
+    return nothing
+end
+"""
+$TYPEDSIGNATURES
+
+Run the policy and get the next decision on the given environment/instance.
+"""
+function (p::Policy)(args...; kwargs...)
+    return p.policy(args...; kwargs...)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Run the policy on the environment and return the total reward and a dataset of observations.
+By default, the environment is reset before running the policy.
+"""
+function run_policy!(policy, env::AbstractEnvironment)
+    total_reward = 0.0
+    reset!(env; reset_seed=false)
+    local labeled_dataset
+    while !is_terminated(env)
+        y = policy(env)
+        features, state = observe(env)
+        if @isdefined labeled_dataset
+            push!(labeled_dataset, DataSample(; x=features, y_true=y, instance=state))
+        else
+            labeled_dataset = [DataSample(; x=features, y_true=y, instance=state)]
+        end
+        reward = step!(env, y)
+        total_reward += reward
+    end
+    return total_reward, labeled_dataset
+end
+
+function run_policy!(policy, envs::Vector{<:AbstractEnvironment})
+    E = length(envs)
+    rewards = zeros(Float64, E)
+    datasets = map(1:E) do e
+        reward, dataset = run_policy!(policy, envs[e])
+        rewards[e] = reward
+        return dataset
+    end
+    return rewards, vcat(datasets...)
+end
+
+function run_policy!(policy, env::AbstractEnvironment, episodes::Int; seed=get_seed(env))
+    reset!(env; reset_seed=true, seed)
+    total_reward = 0.0
+    datasets = map(1:episodes) do _i
+        reward, dataset = run_policy!(policy, env)
+        total_reward += reward
+        return dataset
+    end
+    return total_reward / episodes, vcat(datasets...)
+end
+
+function run_policy!(policy, envs::Vector{<:AbstractEnvironment}, episodes::Int)
+    E = length(envs)
+    rewards = zeros(Float64, E)
+    datasets = map(1:E) do e
+        reward, dataset = run_policy!(policy, envs[e], episodes)
+        rewards[e] = reward
+        return dataset
+    end
+    return rewards, vcat(datasets...)
+end

From 987fec867e3fe8da27e200eda0537a5ef5d6e434 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 7 Aug 2025 17:45:40 +0200
Subject: [PATCH 19/29] Implement policy generator for DVSP, and cleanup seed
 handling

---
 .../DynamicVehicleScheduling.jl               | 15 ++++++++
 .../environment/environment.jl                | 27 +++++++++++----
 src/DynamicVehicleScheduling/policy.jl        | 34 +++++++++++++++++++
 .../policy/greedy_policy.jl                   |  9 +++++
 .../policy/kleopatra_policy.jl                | 32 -----------------
 src/Utils/policy.jl                           | 18 +++++-----
 6 files changed, 88 insertions(+), 47 deletions(-)
 create mode 100644 src/DynamicVehicleScheduling/policy.jl

diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index 1cc5262..4c54966 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -38,6 +38,7 @@ include("algorithms/anticipative_solver.jl")
 include("learning/features.jl")
 include("learning/2d_features.jl")
 
+include("policy.jl")
 # include("policy/abstract_vsp_policy.jl")
 # include("policy/greedy_policy.jl")
 # include("policy/lazy_policy.jl")
@@ -104,6 +105,20 @@ function Utils.generate_anticipative_solution(
     )
 end
 
+function Utils.generate_policies(b::DynamicVehicleSchedulingBenchmark)
+    lazy = Policy(
+        "Lazy",
+        "Lazy policy that dispatches vehicles only when they are ready.",
+        lazy_policy,
+    )
+    greedy = Policy(
+        "Greedy",
+        "Greedy policy that dispatches vehicles to the nearest customer.",
+        greedy_policy,
+    )
+    return (lazy, greedy)
+end
+
 export DynamicVehicleSchedulingBenchmark
 
 end
diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl
index ce146e6..afbefb5 100644
--- a/src/DynamicVehicleScheduling/environment/environment.jl
+++ b/src/DynamicVehicleScheduling/environment/environment.jl
@@ -1,10 +1,14 @@
-struct DVSPEnv{S<:DVSPState} <: Utils.AbstractEnvironment
+mutable struct DVSPEnv{S<:DVSPState,R<:AbstractRNG,SS} <: Utils.AbstractEnvironment
     "associated instance"
     instance::Instance
     "current state"
     state::S
     "scenario the environment will use when not given a specific one"
     scenario::Scenario
+    "random number generator"
+    rng::R
+    "seed for the environment"
+    seed::SS
 end
 
 """
@@ -12,10 +16,11 @@ $TYPEDSIGNATURES
 
 Constructor for [`DVSPEnv`](@ref).
 """
-function DVSPEnv(instance::Instance; seed=nothing, rng=MersenneTwister(seed))
-    scenario = Utils.generate_scenario(instance; rng, seed)
+function DVSPEnv(instance::Instance; seed=nothing)
+    rng = MersenneTwister(seed)
+    scenario = Utils.generate_scenario(instance; rng)
     initial_state = DVSPState(instance; scenario[1]...)
-    return DVSPEnv(instance, initial_state, scenario)
+    return DVSPEnv(instance, initial_state, scenario, rng, seed)
 end
 
 currrent_epoch(env::DVSPEnv) = current_epoch(env.state)
@@ -23,6 +28,8 @@ epoch_duration(env::DVSPEnv) = epoch_duration(env.instance)
 last_epoch(env::DVSPEnv) = last_epoch(env.instance)
 Δ_dispatch(env::DVSPEnv) = Δ_dispatch(env.instance)
 
+Utils.get_seed(env::DVSPEnv) = env.seed
+
 """
 $TYPEDSIGNATURES
 
@@ -59,13 +66,19 @@ $TYPEDSIGNATURES
 Reset the environment to its initial state.
 Also reset the seed if `reset_seed` is set to true.
 """
-function Utils.reset!(env::DVSPEnv, scenario=env.scenario)
-    reset_state!(env.state, env.instance; scenario[1]...)
+function Utils.reset!(env::DVSPEnv; seed=get_seed(env), reset_seed=false)
+    if reset_seed
+        Random.seed!(env.rng, seed)
+    end
+    env.scenario = Utils.generate_scenario(env; rng=env.rng)
+    reset_state!(env.state, env.instance; env.scenario[1]...)
     return nothing
 end
 
 """
-remove dispatched customers, advance time, and add new requests to the environment.
+$TYPEDSIGNATURES
+
+Remove dispatched customers, advance time, and add new requests to the environment.
 """
 function Utils.step!(env::DVSPEnv, routes, scenario=env.scenario)
     reward = -apply_routes!(env.state, routes)
diff --git a/src/DynamicVehicleScheduling/policy.jl b/src/DynamicVehicleScheduling/policy.jl
new file mode 100644
index 0000000..ebce796
--- /dev/null
+++ b/src/DynamicVehicleScheduling/policy.jl
@@ -0,0 +1,34 @@
+function greedy_policy(env::DVSPEnv; model_builder=highs_model)
+    _, state = observe(env)
+    (; is_postponable) = state
+    nb_postponable_requests = sum(is_postponable)
+    θ = ones(nb_postponable_requests) * 1e9
+    routes = prize_collecting_vsp(θ; instance=state, model_builder)
+    return routes
+end
+
+function lazy_policy(env::DVSPEnv; model_builder=highs_model)
+    _, state = observe(env)
+    nb_postponable_requests = sum(state.is_postponable)
+    θ = ones(nb_postponable_requests) * -1e9
+    routes = prize_collecting_vsp(θ; instance=state, model_builder)
+    return routes
+end
+
+"""
+$TYPEDEF
+
+Kleopatra policy for the Dynamic Vehicle Scheduling Problem.
+"""
+struct KleopatraVSPPolicy{P}
+    prize_predictor::P
+end
+
+function (π::KleopatraVSPPolicy)(env::DVSPEnv; model_builder=highs_model)
+    x, state = observe(env)
+    (; prize_predictor) = π
+    # x = has_2D_features ? compute_2D_features(env) : compute_features(env)
+    θ = prize_predictor(x)
+    routes = prize_collecting_vsp(θ; instance=state, model_builder)
+    return routes
+end
diff --git a/src/DynamicVehicleScheduling/policy/greedy_policy.jl b/src/DynamicVehicleScheduling/policy/greedy_policy.jl
index a15a3b9..90de293 100644
--- a/src/DynamicVehicleScheduling/policy/greedy_policy.jl
+++ b/src/DynamicVehicleScheduling/policy/greedy_policy.jl
@@ -14,3 +14,12 @@ function (π::GreedyVSPPolicy)(env::DVSPEnv; model_builder=highs_model)
     routes = prize_collecting_vsp(θ; instance=state, model_builder)
     return routes
 end
+
+function greedy_policy(env::DVSPEnv; model_builder=highs_model)
+    _, state = observe(env)
+    (; is_postponable) = state
+    nb_postponable_requests = sum(is_postponable)
+    θ = ones(nb_postponable_requests) * 1e9
+    routes = prize_collecting_vsp(θ; instance=state, model_builder)
+    return routes
+end
diff --git a/src/DynamicVehicleScheduling/policy/kleopatra_policy.jl b/src/DynamicVehicleScheduling/policy/kleopatra_policy.jl
index 8a7e8d1..e69de29 100644
--- a/src/DynamicVehicleScheduling/policy/kleopatra_policy.jl
+++ b/src/DynamicVehicleScheduling/policy/kleopatra_policy.jl
@@ -1,32 +0,0 @@
-"""
-$TYPEDEF
-
-Kleopatra policy for the Dynamic Vehicle Scheduling Problem.
-"""
-struct KleopatraVSPPolicy{P} <: AbstractDynamicVSPPolicy
-    prize_predictor::P
-    has_2D_features::Bool
-end
-
-"""
-$TYPEDSIGNATURES
-
-Custom constructor for [`KleopatraVSPPolicy`](@ref).
-"""
-function KleopatraVSPPolicy(prize_predictor; has_2D_features=nothing)
-    has_2D_features = if isnothing(has_2D_features)
-        size(prize_predictor[1].weight, 2) == 2
-    else
-        has_2D_features
-    end
-    return KleopatraVSPPolicy(prize_predictor, has_2D_features)
-end
-
-function (π::KleopatraVSPPolicy)(env::DVSPEnv; model_builder=highs_model)
-    state = observe(env)
-    (; prize_predictor, has_2D_features) = π
-    x = has_2D_features ? compute_2D_features(env) : compute_features(env)
-    θ = prize_predictor(x)
-    routes = prize_collecting_vsp(θ; instance=state, model_builder)
-    return routes
-end
diff --git a/src/Utils/policy.jl b/src/Utils/policy.jl
index 0e216fd..cf8fbd3 100644
--- a/src/Utils/policy.jl
+++ b/src/Utils/policy.jl
@@ -31,12 +31,12 @@ $TYPEDSIGNATURES
 Run the policy on the environment and return the total reward and a dataset of observations.
 By default, the environment is reset before running the policy.
 """
-function run_policy!(policy, env::AbstractEnvironment)
+function run_policy!(policy, env::AbstractEnvironment; kwargs...)
     total_reward = 0.0
     reset!(env; reset_seed=false)
     local labeled_dataset
     while !is_terminated(env)
-        y = policy(env)
+        y = policy(env; kwargs...)
         features, state = observe(env)
         if @isdefined labeled_dataset
             push!(labeled_dataset, DataSample(; x=features, y_true=y, instance=state))
@@ -49,33 +49,35 @@ function run_policy!(policy, env::AbstractEnvironment)
     return total_reward, labeled_dataset
 end
 
-function run_policy!(policy, envs::Vector{<:AbstractEnvironment})
+function run_policy!(policy, envs::Vector{<:AbstractEnvironment}; kwargs...)
     E = length(envs)
     rewards = zeros(Float64, E)
     datasets = map(1:E) do e
-        reward, dataset = run_policy!(policy, envs[e])
+        reward, dataset = run_policy!(policy, envs[e]; kwargs...)
         rewards[e] = reward
         return dataset
     end
     return rewards, vcat(datasets...)
 end
 
-function run_policy!(policy, env::AbstractEnvironment, episodes::Int; seed=get_seed(env))
+function run_policy!(
+    policy, env::AbstractEnvironment, episodes::Int; seed=get_seed(env), kwargs...
+)
     reset!(env; reset_seed=true, seed)
     total_reward = 0.0
     datasets = map(1:episodes) do _i
-        reward, dataset = run_policy!(policy, env)
+        reward, dataset = run_policy!(policy, env; kwargs...)
         total_reward += reward
         return dataset
     end
     return total_reward / episodes, vcat(datasets...)
 end
 
-function run_policy!(policy, envs::Vector{<:AbstractEnvironment}, episodes::Int)
+function run_policy!(policy, envs::Vector{<:AbstractEnvironment}, episodes::Int; kwargs...)
     E = length(envs)
     rewards = zeros(Float64, E)
     datasets = map(1:E) do e
-        reward, dataset = run_policy!(policy, envs[e], episodes)
+        reward, dataset = run_policy!(policy, envs[e], episodes; kwargs...)
         rewards[e] = reward
         return dataset
     end

From ed01c451f0d615991d4cd73e818b52d394c134f3 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Fri, 8 Aug 2025 17:05:16 +0200
Subject: [PATCH 20/29] Improve anticipative policy

---
 .../DynamicVehicleScheduling.jl               |  3 +-
 .../algorithms/anticipative_solver.jl         | 87 ++++++++++---------
 .../environment/environment.jl                |  8 +-
 .../environment/instance.jl                   |  4 +
 .../environment/state.jl                      | 30 ++++++-
 src/DynamicVehicleScheduling/policy.jl        |  2 +
 src/Utils/data_sample.jl                      | 17 ++++
 src/Utils/policy.jl                           |  7 +-
 8 files changed, 108 insertions(+), 50 deletions(-)

diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index 4c54966..ab4ba8c 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -67,7 +67,7 @@ $TYPEDFIELDS
 end
 
 function Utils.generate_dataset(b::DynamicVehicleSchedulingBenchmark, dataset_size::Int=1)
-    (; max_requests_per_epoch, Δ_dispatch, epoch_duration) = b
+    (; max_requests_per_epoch, Δ_dispatch, epoch_duration, two_dimensional_features) = b
     files = readdir(datadep"dvrptw"; join=true)
     dataset_size = min(dataset_size, length(files))
     return [
@@ -77,6 +77,7 @@ function Utils.generate_dataset(b::DynamicVehicleSchedulingBenchmark, dataset_si
                 max_requests_per_epoch,
                 Δ_dispatch,
                 epoch_duration,
+                two_dimensional_features,
             ),
         ) for i in 1:dataset_size
     ]
diff --git a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
index f274e02..47a2c1f 100644
--- a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
+++ b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
@@ -4,15 +4,17 @@ $TYPEDSIGNATURES
 Retrieve anticipative routes solution from the given MIP solution `y`.
 Outputs a set of routes per epoch.
 """
-function retrieve_routes_anticipative(y::AbstractArray, dvspenv::DVSPEnv, customer_index)
+function retrieve_routes_anticipative(
+    y::AbstractArray, dvspenv::DVSPEnv, customer_index, epoch_indices
+)
     nb_tasks = length(customer_index)
-    first_epoch = 1
-    (; last_epoch) = dvspenv.instance
+    # first_epoch = 1
+    # (; last_epoch) = dvspenv.instance
     job_indices = 2:(nb_tasks)
-    epoch_indices = first_epoch:last_epoch
+    # epoch_indices = first_epoch:last_epoch
 
     routes = [Vector{Int}[] for _ in epoch_indices]
-    for t in epoch_indices
+    for (i, t) in enumerate(epoch_indices)
         start = [i for i in job_indices if y[1, i, t] ≈ 1]
         for task in start
             route = Int[]
@@ -28,7 +30,7 @@ function retrieve_routes_anticipative(y::AbstractArray, dvspenv::DVSPEnv, custom
                 end
                 current_task = next_task
             end
-            push!(routes[t], route)
+            push!(routes[i], route)
         end
     end
     return routes
@@ -44,28 +46,33 @@ function anticipative_solver(
     env::DVSPEnv,
     scenario=env.scenario;
     model_builder=highs_model,
-    reset_env=false,
-    two_dimensional_features=false,
+    two_dimensional_features=env.instance.two_dimensional_features,
+    reset_env=true,
+    nb_epochs=typemax(Int),
 )
-    reset_env && reset!(env)
+    reset_env && reset!(env; reset_seed=true)
+
+    start_epoch = current_epoch(env)
+    end_epoch = min(last_epoch(env), start_epoch + nb_epochs - 1)
+    T = start_epoch:end_epoch
+
     request_epoch = [0]
-    for (epoch, indices) in enumerate(scenario.indices)
-        request_epoch = vcat(request_epoch, fill(epoch, length(indices)))
+    for t in T
+        request_epoch = vcat(request_epoch, fill(t, length(scenario.indices[t])))
     end
-    customer_index = vcat(1, scenario.indices...)
-    service_time = vcat(0.0, scenario.service_time...)
-    start_time = vcat(0.0, scenario.start_time...)
+    customer_index = vcat(1, scenario.indices[T]...)
+    service_time = vcat(0.0, scenario.service_time[T]...)
+    start_time = vcat(0.0, scenario.start_time[T]...)
 
     duration = env.instance.static_instance.duration[customer_index, customer_index]
-    first_epoch = 1
-    (; last_epoch, epoch_duration, Δ_dispatch) = env.instance
+    (; epoch_duration, Δ_dispatch) = env.instance
 
     model = model_builder()
     set_silent(model)
 
     nb_nodes = length(customer_index)
     job_indices = 2:nb_nodes
-    epoch_indices = first_epoch:last_epoch
+    epoch_indices = T#first_epoch:last_epoch
 
     @variable(model, y[i=1:nb_nodes, j=1:nb_nodes, t=epoch_indices]; binary=true)
 
@@ -102,7 +109,7 @@ function anticipative_solver(
 
     # a trip from i can be done only before limit date
     for i in job_indices, t in epoch_indices, j in 1:nb_nodes
-        if (t - 1) * epoch_duration + duration[1, i] + Δ_dispatch > start_time[i]  # ! this only works if first_epoch = 1
+        if (t - 1) * epoch_duration + duration[1, i] + Δ_dispatch > start_time[i]
             @constraint(model, y[i, j, t] <= 0)
         end
     end
@@ -121,27 +128,32 @@ function anticipative_solver(
     optimize!(model)
 
     obj = JuMP.objective_value(model)
-    epoch_routes = retrieve_routes_anticipative(value.(y), env, customer_index)
+    epoch_routes = retrieve_routes_anticipative(
+        value.(y), env, customer_index, epoch_indices
+    )
 
     epoch_indices = Vector{Int}[]
     N = 1
     indices = [1]
-    for epoch in 1:last_epoch
+    index = 1
+    for epoch in 1:last_epoch(env)
         M = length(scenario.indices[epoch])
         indices = vcat(indices, (N + 1):(N + M))
         push!(epoch_indices, copy(indices))
         N = N + M
-        epoch_routes[epoch]
-        dispatched = vcat(epoch_routes[epoch]...)
-        indices = setdiff(indices, dispatched)
+        if epoch in T
+            dispatched = vcat(epoch_routes[index]...)
+            index += 1
+            indices = setdiff(indices, dispatched)
+        end
     end
 
     indices = vcat(1, scenario.indices...)
     start_time = vcat(0.0, scenario.start_time...)
     service_time = vcat(0.0, scenario.service_time...)
 
-    dataset = map(1:last_epoch) do epoch
-        routes = epoch_routes[epoch]
+    dataset = map(enumerate(T)) do (i, epoch)
+        routes = epoch_routes[i]
         epoch_customers = epoch_indices[epoch]
 
         y_true =
@@ -170,9 +182,13 @@ function anticipative_solver(
         epoch_duration = env.instance.epoch_duration
         Δ_dispatch = env.instance.Δ_dispatch
         planning_start_time = (epoch - 1) * epoch_duration + Δ_dispatch
-        is_must_dispatch[2:end] .=
-            planning_start_time .+ epoch_duration .+ @view(new_duration[1, 2:end]) .>
-            new_start_time[2:end]
+        if epoch == last_epoch
+            # If we are in the last epoch, all requests must be dispatched
+            is_must_dispatch[2:end] .= true
+        else
+            is_must_dispatch[2:end] .=
+                planning_start_time .+ epoch_duration .+ @view(new_duration[1, 2:end]) .> new_start_time[2:end]
+        end
         is_postponable[2:end] .= .!is_must_dispatch[2:end]
 
         state = DVSPState(;
@@ -183,7 +199,6 @@ function anticipative_solver(
             current_epoch=epoch,
         )
 
-        # x = compute_2D_features(state, env.instance)
         x = if two_dimensional_features
             compute_2D_features(state, env.instance)
         else
@@ -195,17 +210,3 @@ function anticipative_solver(
 
     return obj, dataset
 end
-
-# @kwdef struct AnticipativeSolver
-#     is_2D::Bool = false
-# end
-
-# function (solver::AnticipativeSolver)(env::DVSPEnv, scenario=env.scenario; reset_env=false)
-#     return generate_anticipative_decision(
-#         env,
-#         scenario;
-#         model_builder=highs_model,
-#         reset_env,
-#         two_dimensional_features=solver.is_2D,
-#     )
-# end
diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl
index afbefb5..cd8d6a6 100644
--- a/src/DynamicVehicleScheduling/environment/environment.jl
+++ b/src/DynamicVehicleScheduling/environment/environment.jl
@@ -35,7 +35,13 @@ $TYPEDSIGNATURES
 
 Get the current state of the environment.
 """
-Utils.observe(env::DVSPEnv) = nothing, env.state
+function Utils.observe(env::DVSPEnv)
+    if env.instance.two_dimensional_features
+        return compute_2D_features(env.state, env.instance), env.state
+    end
+    # else
+    return compute_features(env.state, env.instance), env.state
+end
 
 current_epoch(env::DVSPEnv) = current_epoch(env.state)
 
diff --git a/src/DynamicVehicleScheduling/environment/instance.jl b/src/DynamicVehicleScheduling/environment/instance.jl
index 17d5e9d..d65010c 100644
--- a/src/DynamicVehicleScheduling/environment/instance.jl
+++ b/src/DynamicVehicleScheduling/environment/instance.jl
@@ -14,6 +14,8 @@ Instance data structure for the dynamic vehicle scheduling problem.
     epoch_duration::T = 1.0
     "last epoch index"
     last_epoch::Int
+    "whether to use two-dimensional features"
+    two_dimensional_features::Bool = false
 end
 
 function Instance(
@@ -21,6 +23,7 @@ function Instance(
     max_requests_per_epoch::Int=10,
     Δ_dispatch::Float64=1.0,
     epoch_duration::Float64=1.0,
+    two_dimensional_features::Bool=false,
 )
     last_epoch = trunc(
         Int,
@@ -35,6 +38,7 @@ function Instance(
         Δ_dispatch=Δ_dispatch,
         epoch_duration=epoch_duration,
         last_epoch=last_epoch,
+        two_dimensional_features=two_dimensional_features,
     )
 end
 
diff --git a/src/DynamicVehicleScheduling/environment/state.jl b/src/DynamicVehicleScheduling/environment/state.jl
index ebac101..704ef79 100644
--- a/src/DynamicVehicleScheduling/environment/state.jl
+++ b/src/DynamicVehicleScheduling/environment/state.jl
@@ -16,6 +16,25 @@ State data structure for the Dynamic Vehicle Scheduling Problem.
     is_postponable::BitVector = falses(0)
 end
 
+function Base.show(io::IO, state::DVSPState)
+    return print(
+        io,
+        "DVSPState(",
+        "current_epoch=",
+        state.current_epoch,
+        ", ",
+        "location_indices=",
+        state.location_indices,
+        ", ",
+        "is_must_dispatch=",
+        state.is_must_dispatch,
+        ", ",
+        "is_postponable=",
+        state.is_postponable,
+        ")",
+    )
+end
+
 function reset_state!(
     state::DVSPState, instance::Instance; indices, service_time, start_time
 )
@@ -189,9 +208,14 @@ function add_new_customers!(
     epoch_duration = instance.epoch_duration
     Δ_dispatch = instance.Δ_dispatch
     planning_start_time = (state.current_epoch - 1) * epoch_duration + Δ_dispatch
-    is_must_dispatch[2:end] .=
-        planning_start_time .+ epoch_duration .+ @view(updated_duration[1, 2:end]) .>
-        updated_start_time[2:end]
+    if state.current_epoch == last_epoch(instance)
+        # If we are in the last epoch, all requests must be dispatched
+        is_must_dispatch[2:end] .= true
+    else
+        is_must_dispatch[2:end] .=
+            planning_start_time .+ epoch_duration .+ @view(updated_duration[1, 2:end]) .>
+            updated_start_time[2:end]
+    end
     is_postponable[2:end] .= .!is_must_dispatch[2:end]
 
     state.is_must_dispatch = is_must_dispatch
diff --git a/src/DynamicVehicleScheduling/policy.jl b/src/DynamicVehicleScheduling/policy.jl
index ebce796..244dc66 100644
--- a/src/DynamicVehicleScheduling/policy.jl
+++ b/src/DynamicVehicleScheduling/policy.jl
@@ -4,6 +4,7 @@ function greedy_policy(env::DVSPEnv; model_builder=highs_model)
     nb_postponable_requests = sum(is_postponable)
     θ = ones(nb_postponable_requests) * 1e9
     routes = prize_collecting_vsp(θ; instance=state, model_builder)
+    @assert is_feasible(state, routes)
     return routes
 end
 
@@ -12,6 +13,7 @@ function lazy_policy(env::DVSPEnv; model_builder=highs_model)
     nb_postponable_requests = sum(state.is_postponable)
     θ = ones(nb_postponable_requests) * -1e9
     routes = prize_collecting_vsp(θ; instance=state, model_builder)
+    @assert is_feasible(state, routes)
     return routes
 end
 
diff --git a/src/Utils/data_sample.jl b/src/Utils/data_sample.jl
index fde1bf3..d0cccc6 100644
--- a/src/Utils/data_sample.jl
+++ b/src/Utils/data_sample.jl
@@ -22,6 +22,23 @@ $TYPEDFIELDS
     instance::I = nothing
 end
 
+function Base.show(io::IO, d::DataSample)
+    fields = String[]
+    if !isnothing(d.x)
+        push!(fields, "x=$(d.x)")
+    end
+    if !isnothing(d.θ_true)
+        push!(fields, "θ_true=$(d.θ_true)")
+    end
+    if !isnothing(d.y_true)
+        push!(fields, "y_true=$(d.y_true)")
+    end
+    if !isnothing(d.instance)
+        push!(fields, "instance=$(d.instance)")
+    end
+    return print(io, "DataSample(", join(fields, ", "), ")")
+end
+
 """
 $TYPEDSIGNATURES
 
diff --git a/src/Utils/policy.jl b/src/Utils/policy.jl
index cf8fbd3..8f17f79 100644
--- a/src/Utils/policy.jl
+++ b/src/Utils/policy.jl
@@ -39,9 +39,12 @@ function run_policy!(policy, env::AbstractEnvironment; kwargs...)
         y = policy(env; kwargs...)
         features, state = observe(env)
         if @isdefined labeled_dataset
-            push!(labeled_dataset, DataSample(; x=features, y_true=y, instance=state))
+            push!(
+                labeled_dataset,
+                DataSample(; x=features, y_true=y, instance=deepcopy(state)),
+            )
         else
-            labeled_dataset = [DataSample(; x=features, y_true=y, instance=state)]
+            labeled_dataset = [DataSample(; x=features, y_true=y, instance=deepcopy(state))]
         end
         reward = step!(env, y)
         total_reward += reward

From 5f2ccca6b1298b765ff9ff464089d16e4e007c72 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Fri, 8 Aug 2025 17:45:27 +0200
Subject: [PATCH 21/29] cleanup + fix tests

---
 src/DecisionFocusedLearningBenchmarks.jl      |   2 +-
 src/DynamicAssortment/DynamicAssortment.jl    |   2 +-
 src/DynamicAssortment/policies.jl             |  20 ---
 .../DynamicVehicleScheduling.jl               |  28 ++--
 .../algorithms/prize_collecting_vsp.jl        | 128 -----------------
 .../{algorithms => }/anticipative_solver.jl   |   0
 .../{environment => }/environment.jl          |   0
 .../{learning => }/features.jl                |  17 +++
 .../{environment => }/instance.jl             |   0
 .../learning/2d_features.jl                   |  16 ---
 src/DynamicVehicleScheduling/maximizer.jl     | 129 ++++++++++++++++++
 .../{environment => }/plot.jl                 |   0
 .../policy/abstract_vsp_policy.jl             |  42 ------
 .../policy/anticipative_policy.jl             |  17 ---
 .../policy/greedy_policy.jl                   |  25 ----
 .../policy/kleopatra_policy.jl                |   0
 .../policy/lazy_policy.jl                     |  15 --
 .../{environment => }/scenario.jl             |   0
 .../{environment => }/state.jl                |   0
 src/Utils/Utils.jl                            |   2 +-
 test/dynamic_vsp.jl                           |  32 +++--
 21 files changed, 182 insertions(+), 293 deletions(-)
 delete mode 100644 src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl
 rename src/DynamicVehicleScheduling/{algorithms => }/anticipative_solver.jl (100%)
 rename src/DynamicVehicleScheduling/{environment => }/environment.jl (100%)
 rename src/DynamicVehicleScheduling/{learning => }/features.jl (69%)
 rename src/DynamicVehicleScheduling/{environment => }/instance.jl (100%)
 delete mode 100644 src/DynamicVehicleScheduling/learning/2d_features.jl
 rename src/DynamicVehicleScheduling/{environment => }/plot.jl (100%)
 delete mode 100644 src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl
 delete mode 100644 src/DynamicVehicleScheduling/policy/anticipative_policy.jl
 delete mode 100644 src/DynamicVehicleScheduling/policy/greedy_policy.jl
 delete mode 100644 src/DynamicVehicleScheduling/policy/kleopatra_policy.jl
 delete mode 100644 src/DynamicVehicleScheduling/policy/lazy_policy.jl
 rename src/DynamicVehicleScheduling/{environment => }/scenario.jl (100%)
 rename src/DynamicVehicleScheduling/{environment => }/state.jl (100%)

diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index b49362a..a33c1de 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -62,7 +62,7 @@ using .Utils
 
 # Interface
 export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample
-export AbstractEnv, get_seed, is_terminated, observe, reset!, step!
+export AbstractEnvironment, get_seed, is_terminated, observe, reset!, step!
 
 export Policy, run_policy!
 
diff --git a/src/DynamicAssortment/DynamicAssortment.jl b/src/DynamicAssortment/DynamicAssortment.jl
index f04d3e2..2c61c5f 100644
--- a/src/DynamicAssortment/DynamicAssortment.jl
+++ b/src/DynamicAssortment/DynamicAssortment.jl
@@ -72,7 +72,7 @@ function Utils.generate_maximizer(b::DynamicAssortmentBenchmark)
 end
 
 function Utils.generate_environment(
-    ::DynamicAssortmentBenchmark, instance::Instance, rng::AbstractRNG
+    ::DynamicAssortmentBenchmark, instance::Instance, rng::AbstractRNG; kwargs...
 )
     seed = rand(rng, 1:typemax(Int))
     return Environment(instance; seed)
diff --git a/src/DynamicAssortment/policies.jl b/src/DynamicAssortment/policies.jl
index 56aba6d..320c501 100644
--- a/src/DynamicAssortment/policies.jl
+++ b/src/DynamicAssortment/policies.jl
@@ -19,23 +19,3 @@ function greedy_policy(env::Environment)
     maximizer = generate_maximizer(env.instance.config)
     return maximizer(prices(env))
 end
-
-function run_policy(env::Environment, episodes::Int; first_seed=1, policy=expert_policy)
-    dataset = []
-    rev_global = Float64[]
-    for i in 1:episodes
-        rev_episode = 0.0
-        CommonRLInterface.reset!(env; seed=first_seed - 1 + i, reset_seed=true)
-        training_instances = []
-        while !CommonRLInterface.terminated(env)
-            S = policy(env)
-            features = CommonRLInterface.observe(env)
-            push!(training_instances, DataSample(; x=features, y_true=S))
-            reward = CommonRLInterface.act!(env, S)
-            rev_episode += reward
-        end
-        push!(rev_global, rev_episode)
-        push!(dataset, training_instances)
-    end
-    return mean(rev_global), rev_global, dataset
-end
diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index ab4ba8c..f76a43b 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -25,27 +25,17 @@ include("static_vsp/parsing.jl")
 include("static_vsp/solution.jl")
 include("static_vsp/plot.jl")
 
-# dynamic environment
-include("environment/instance.jl")
-include("environment/state.jl")
-include("environment/scenario.jl")
-include("environment/environment.jl")
-include("environment/plot.jl")
+include("instance.jl")
+include("state.jl")
+include("scenario.jl")
+include("environment.jl")
+include("plot.jl")
 
-include("algorithms/prize_collecting_vsp.jl")
-include("algorithms/anticipative_solver.jl")
-
-include("learning/features.jl")
-include("learning/2d_features.jl")
+include("maximizer.jl")
+include("anticipative_solver.jl")
 
+include("features.jl")
 include("policy.jl")
-# include("policy/abstract_vsp_policy.jl")
-# include("policy/greedy_policy.jl")
-# include("policy/lazy_policy.jl")
-# include("policy/anticipative_policy.jl")
-# include("policy/kleopatra_policy.jl")
-
-include("maximizer.jl")
 
 """
 $TYPEDEF
@@ -84,7 +74,7 @@ function Utils.generate_dataset(b::DynamicVehicleSchedulingBenchmark, dataset_si
 end
 
 function Utils.generate_environment(
-    ::DynamicVehicleSchedulingBenchmark, instance::Instance, rng::AbstractRNG
+    ::DynamicVehicleSchedulingBenchmark, instance::Instance, rng::AbstractRNG; kwargs...
 )
     seed = rand(rng, 1:typemax(Int))
     return DVSPEnv(instance; seed)
diff --git a/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl b/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl
deleted file mode 100644
index d98f4d0..0000000
--- a/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl
+++ /dev/null
@@ -1,128 +0,0 @@
-"""
-$TYPEDSIGNATURES
-
-Create the acyclic digraph associated with the given VSP `instance`.
-"""
-function create_graph(instance::StaticInstance)
-    (; duration, start_time, service_time) = instance
-    # Initialize directed graph
-    nb_vertices = location_count(instance)
-    graph = SimpleDiGraph(nb_vertices)
-
-    depot = 1  # depot is always index 1
-    customers = 2:nb_vertices  # other vertices are customers
-
-    # Create existing edges
-    for i₁ in customers
-        # link every task to depot
-        add_edge!(graph, depot, i₁)
-        add_edge!(graph, i₁, depot)
-
-        t₁ = start_time[i₁]
-        for i₂ in (i₁ + 1):nb_vertices
-            t₂ = start_time[i₂]
-
-            if t₁ <= t₂
-                if t₁ + service_time[i₁] + duration[i₁, i₂] <= t₂
-                    add_edge!(graph, i₁, i₂)
-                end
-            else
-                if t₂ + service_time[i₂] + duration[i₂, i₁] <= t₁
-                    add_edge!(graph, i₂, i₁)
-                end
-            end
-        end
-    end
-
-    return graph
-end
-
-"""
-$TYPEDSIGNATURES
-
-Create the acyclic digraph associated with the given VSP `state`.
-"""
-function create_graph(state::DVSPState)
-    return create_graph(state.state_instance)
-end
-
-"""
-$TYPEDSIGNATURES
-
-Retrieve routes solution from the given MIP solution `y` matrix and `graph`.
-"""
-function retrieve_routes(y::AbstractArray, graph::AbstractGraph)
-    nb_tasks = nv(graph)
-    job_indices = 2:(nb_tasks)
-    routes = Vector{Int}[]
-
-    start = [i for i in job_indices if y[1, i] ≈ 1]
-    for task in start
-        route = Int[]
-        current_task = task
-        while current_task != 1 # < nb_tasks
-            push!(route, current_task)
-            local next_task
-            for i in outneighbors(graph, current_task)
-                if isapprox(y[current_task, i], 1; atol=0.1)
-                    next_task = i
-                    break
-                end
-            end
-            current_task = next_task
-        end
-        push!(routes, route)
-    end
-    return routes
-end
-
-"""
-$TYPEDSIGNATURES
-
-Solve the Prize Collecting Vehicle Scheduling Problem defined by `instance` and prize vector `θ`.
-"""
-function prize_collecting_vsp(
-    θ::AbstractVector; instance::DVSPState, model_builder=highs_model, kwargs...
-)
-    (; duration) = instance.state_instance
-    graph = create_graph(instance)
-
-    model = model_builder()
-    set_silent(model)
-
-    nb_nodes = nv(graph)
-    job_indices = 2:(nb_nodes)
-
-    @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0)
-
-    θ_ext = fill(0.0, location_count(instance))  # no prize for must dispatch requests, only hard constraints
-    θ_ext[instance.is_postponable] .= θ
-
-    @objective(
-        model,
-        Max,
-        sum(
-            (θ_ext[dst(edge)] - duration[src(edge), dst(edge)]) * y[src(edge), dst(edge)]
-            for edge in edges(graph)
-        )
-    )
-    @constraint(
-        model,
-        flow[i in 2:nb_nodes],
-        sum(y[j, i] for j in inneighbors(graph, i)) ==
-            sum(y[i, j] for j in outneighbors(graph, i))
-    )
-    @constraint(
-        model, demand[i in job_indices], sum(y[j, i] for j in inneighbors(graph, i)) <= 1
-    )
-    # must dispatch constraints
-    @constraint(
-        model,
-        demand_must_dispatch[i in job_indices; instance.is_must_dispatch[i]],
-        sum(y[j, i] for j in inneighbors(graph, i)) == 1
-    )
-
-    optimize!(model)
-
-    return retrieve_routes(value.(y), graph)
-end
diff --git a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/anticipative_solver.jl
similarity index 100%
rename from src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
rename to src/DynamicVehicleScheduling/anticipative_solver.jl
diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment.jl
similarity index 100%
rename from src/DynamicVehicleScheduling/environment/environment.jl
rename to src/DynamicVehicleScheduling/environment.jl
diff --git a/src/DynamicVehicleScheduling/learning/features.jl b/src/DynamicVehicleScheduling/features.jl
similarity index 69%
rename from src/DynamicVehicleScheduling/learning/features.jl
rename to src/DynamicVehicleScheduling/features.jl
index c9470c3..10e0ab8 100644
--- a/src/DynamicVehicleScheduling/learning/features.jl
+++ b/src/DynamicVehicleScheduling/features.jl
@@ -40,3 +40,20 @@ end
 function compute_features(env::DVSPEnv)
     return compute_features(env.state, env.instance)
 end
+
+function get_features_meanTimeToRequests(state::DVSPState, instance::Instance)
+    quantiles = [0.5]
+    a = instance.static_instance.duration[state.location_indices, 2:end]
+    quantileTimeToRequests = mapslices(x -> quantile(x, quantiles), a; dims=2)
+    return quantileTimeToRequests
+end
+
+function compute_2D_features(state::DVSPState, instance::Instance)
+    timeDepotRequest = state.state_instance.duration[:, 1][state.is_postponable]
+    quantileTimeToRequests = get_features_meanTimeToRequests(state, instance)[state.is_postponable]
+    return hcat(timeDepotRequest, quantileTimeToRequests)'
+end
+
+function compute_2D_features(env::DVSPEnv)
+    return compute_2D_features(env.state, env.instance)
+end
diff --git a/src/DynamicVehicleScheduling/environment/instance.jl b/src/DynamicVehicleScheduling/instance.jl
similarity index 100%
rename from src/DynamicVehicleScheduling/environment/instance.jl
rename to src/DynamicVehicleScheduling/instance.jl
diff --git a/src/DynamicVehicleScheduling/learning/2d_features.jl b/src/DynamicVehicleScheduling/learning/2d_features.jl
deleted file mode 100644
index 6e23810..0000000
--- a/src/DynamicVehicleScheduling/learning/2d_features.jl
+++ /dev/null
@@ -1,16 +0,0 @@
-function get_features_meanTimeToRequests(state::DVSPState, instance::Instance)
-    quantiles = [0.5]
-    a = instance.static_instance.duration[state.location_indices, 2:end]
-    quantileTimeToRequests = mapslices(x -> quantile(x, quantiles), a; dims=2)
-    return quantileTimeToRequests
-end
-
-function compute_2D_features(state::DVSPState, instance::Instance)
-    timeDepotRequest = state.state_instance.duration[:, 1][state.is_postponable]
-    quantileTimeToRequests = get_features_meanTimeToRequests(state, instance)[state.is_postponable]
-    return hcat(timeDepotRequest, quantileTimeToRequests)'
-end
-
-function compute_2D_features(env::DVSPEnv)
-    return compute_2D_features(env.state, env.instance)
-end
diff --git a/src/DynamicVehicleScheduling/maximizer.jl b/src/DynamicVehicleScheduling/maximizer.jl
index eecbf57..450ab8a 100644
--- a/src/DynamicVehicleScheduling/maximizer.jl
+++ b/src/DynamicVehicleScheduling/maximizer.jl
@@ -1,3 +1,132 @@
+"""
+$TYPEDSIGNATURES
+
+Create the acyclic digraph associated with the given VSP `instance`.
+"""
+function create_graph(instance::StaticInstance)
+    (; duration, start_time, service_time) = instance
+    # Initialize directed graph
+    nb_vertices = location_count(instance)
+    graph = SimpleDiGraph(nb_vertices)
+
+    depot = 1  # depot is always index 1
+    customers = 2:nb_vertices  # other vertices are customers
+
+    # Create existing edges
+    for i₁ in customers
+        # link every task to depot
+        add_edge!(graph, depot, i₁)
+        add_edge!(graph, i₁, depot)
+
+        t₁ = start_time[i₁]
+        for i₂ in (i₁ + 1):nb_vertices
+            t₂ = start_time[i₂]
+
+            if t₁ <= t₂
+                if t₁ + service_time[i₁] + duration[i₁, i₂] <= t₂
+                    add_edge!(graph, i₁, i₂)
+                end
+            else
+                if t₂ + service_time[i₂] + duration[i₂, i₁] <= t₁
+                    add_edge!(graph, i₂, i₁)
+                end
+            end
+        end
+    end
+
+    return graph
+end
+
+"""
+$TYPEDSIGNATURES
+
+Create the acyclic digraph associated with the given VSP `state`.
+"""
+function create_graph(state::DVSPState)
+    return create_graph(state.state_instance)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Retrieve routes solution from the given MIP solution `y` matrix and `graph`.
+"""
+function retrieve_routes(y::AbstractArray, graph::AbstractGraph)
+    nb_tasks = nv(graph)
+    job_indices = 2:(nb_tasks)
+    routes = Vector{Int}[]
+
+    start = [i for i in job_indices if y[1, i] ≈ 1]
+    for task in start
+        route = Int[]
+        current_task = task
+        while current_task != 1 # < nb_tasks
+            push!(route, current_task)
+            local next_task
+            for i in outneighbors(graph, current_task)
+                if isapprox(y[current_task, i], 1; atol=0.1)
+                    next_task = i
+                    break
+                end
+            end
+            current_task = next_task
+        end
+        push!(routes, route)
+    end
+    return routes
+end
+
+"""
+$TYPEDSIGNATURES
+
+Solve the Prize Collecting Vehicle Scheduling Problem defined by `instance` and prize vector `θ`.
+"""
+function prize_collecting_vsp(
+    θ::AbstractVector; instance::DVSPState, model_builder=highs_model, kwargs...
+)
+    (; duration) = instance.state_instance
+    graph = create_graph(instance)
+
+    model = model_builder()
+    set_silent(model)
+
+    nb_nodes = nv(graph)
+    job_indices = 2:(nb_nodes)
+
+    @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0)
+
+    θ_ext = fill(0.0, location_count(instance))  # no prize for must dispatch requests, only hard constraints
+    θ_ext[instance.is_postponable] .= θ
+
+    @objective(
+        model,
+        Max,
+        sum(
+            (θ_ext[dst(edge)] - duration[src(edge), dst(edge)]) * y[src(edge), dst(edge)]
+            for edge in edges(graph)
+        )
+    )
+    @constraint(
+        model,
+        flow[i in 2:nb_nodes],
+        sum(y[j, i] for j in inneighbors(graph, i)) ==
+            sum(y[i, j] for j in outneighbors(graph, i))
+    )
+    @constraint(
+        model, demand[i in job_indices], sum(y[j, i] for j in inneighbors(graph, i)) <= 1
+    )
+    # must dispatch constraints
+    @constraint(
+        model,
+        demand_must_dispatch[i in job_indices; instance.is_must_dispatch[i]],
+        sum(y[j, i] for j in inneighbors(graph, i)) == 1
+    )
+
+    optimize!(model)
+
+    return retrieve_routes(value.(y), graph)
+end
+
 function oracle(θ; instance::DVSPState, kwargs...)
     routes = prize_collecting_vsp(θ; instance=instance, kwargs...)
     return VSPSolution(
diff --git a/src/DynamicVehicleScheduling/environment/plot.jl b/src/DynamicVehicleScheduling/plot.jl
similarity index 100%
rename from src/DynamicVehicleScheduling/environment/plot.jl
rename to src/DynamicVehicleScheduling/plot.jl
diff --git a/src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl b/src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl
deleted file mode 100644
index 0a1f755..0000000
--- a/src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl
+++ /dev/null
@@ -1,42 +0,0 @@
-abstract type AbstractDynamicPolicy end
-
-function (π::AbstractDynamicPolicy)(env; kwargs...)
-    throw("Not implemented")
-end
-
-"""
-$TYPEDEF
-
-Abstract type for dynamic VSP policies.
-"""
-abstract type AbstractDynamicVSPPolicy <: AbstractDynamicPolicy end
-
-"""
-$TYPEDSIGNATURES
-
-Apply the policy to the environment.
-"""
-function run_policy!(
-    π::AbstractDynamicVSPPolicy,
-    env::DVSPEnv,
-    scenario=env.scenario;
-    check_feasibility=true,
-    kwargs...,
-)
-    # reset environment, and initialize variables
-    reset!(env)
-    total_cost = 0
-    epoch_routes = Vector{Vector{Int}}[]
-
-    # epoch loop
-    while !terminated(env)
-        state_routes = π(env; kwargs...)
-        check_feasibility && @assert is_feasible(observe(env), state_routes)
-        # env_routes = env_routes_from_state_routes(env, state_routes)
-        push!(epoch_routes, state_routes)
-        local_cost = act!(env, state_routes, scenario)
-        total_cost += local_cost
-    end
-
-    return total_cost, epoch_routes
-end
diff --git a/src/DynamicVehicleScheduling/policy/anticipative_policy.jl b/src/DynamicVehicleScheduling/policy/anticipative_policy.jl
deleted file mode 100644
index f63b620..0000000
--- a/src/DynamicVehicleScheduling/policy/anticipative_policy.jl
+++ /dev/null
@@ -1,17 +0,0 @@
-"""
-$TYPEDEF
-
-Anticipative policy for the Dynamic Vehicle Scheduling Problem.
-"""
-struct AnticipativeVSPPolicy <: AbstractDynamicPolicy end
-
-"""
-$TYPEDSIGNATURES
-
-Apply the anticipative policy to the environment.
-"""
-function run_policy!(
-    ::AnticipativeVSPPolicy, env::DVSPEnv, scenario=env.scenario; model_builder=highs_model
-)
-    return generate_anticipative_solution(env, scenario; model_builder, reset_env=true)
-end
diff --git a/src/DynamicVehicleScheduling/policy/greedy_policy.jl b/src/DynamicVehicleScheduling/policy/greedy_policy.jl
deleted file mode 100644
index 90de293..0000000
--- a/src/DynamicVehicleScheduling/policy/greedy_policy.jl
+++ /dev/null
@@ -1,25 +0,0 @@
-"""
-$TYPEDEF
-
-Greedy policy for the Dynamic Vehicle Scheduling Problem.
-Dispatch customers as soon as they appear.
-"""
-struct GreedyVSPPolicy <: AbstractDynamicVSPPolicy end
-
-function (π::GreedyVSPPolicy)(env::DVSPEnv; model_builder=highs_model)
-    state = observe(env)
-    (; is_postponable) = state
-    nb_postponable_requests = sum(is_postponable)
-    θ = ones(nb_postponable_requests) * 1e9
-    routes = prize_collecting_vsp(θ; instance=state, model_builder)
-    return routes
-end
-
-function greedy_policy(env::DVSPEnv; model_builder=highs_model)
-    _, state = observe(env)
-    (; is_postponable) = state
-    nb_postponable_requests = sum(is_postponable)
-    θ = ones(nb_postponable_requests) * 1e9
-    routes = prize_collecting_vsp(θ; instance=state, model_builder)
-    return routes
-end
diff --git a/src/DynamicVehicleScheduling/policy/kleopatra_policy.jl b/src/DynamicVehicleScheduling/policy/kleopatra_policy.jl
deleted file mode 100644
index e69de29..0000000
diff --git a/src/DynamicVehicleScheduling/policy/lazy_policy.jl b/src/DynamicVehicleScheduling/policy/lazy_policy.jl
deleted file mode 100644
index 50b44d3..0000000
--- a/src/DynamicVehicleScheduling/policy/lazy_policy.jl
+++ /dev/null
@@ -1,15 +0,0 @@
-"""
-$TYPEDEF
-
-Lazy policy for the Dynamic Vehicle Scheduling Problem.
-Dispatch customers only when necessary (i.e. must-dispatch).
-"""
-struct LazyVSPPolicy <: AbstractDynamicVSPPolicy end
-
-function (π::LazyVSPPolicy)(env::DVSPEnv; model_builder=highs_model)
-    state = observe(env)
-    nb_postponable_requests = sum(state.is_postponable)
-    θ = ones(nb_postponable_requests) * -1e9
-    routes = prize_collecting_vsp(θ; instance=state, model_builder)
-    return routes
-end
diff --git a/src/DynamicVehicleScheduling/environment/scenario.jl b/src/DynamicVehicleScheduling/scenario.jl
similarity index 100%
rename from src/DynamicVehicleScheduling/environment/scenario.jl
rename to src/DynamicVehicleScheduling/scenario.jl
diff --git a/src/DynamicVehicleScheduling/environment/state.jl b/src/DynamicVehicleScheduling/state.jl
similarity index 100%
rename from src/DynamicVehicleScheduling/environment/state.jl
rename to src/DynamicVehicleScheduling/state.jl
diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl
index 2efd0d0..00d9547 100644
--- a/src/Utils/Utils.jl
+++ b/src/Utils/Utils.jl
@@ -24,7 +24,7 @@ export DataSample, Policy
 export run_policy!
 export TopKMaximizer
 
-export AbstractEnv, get_seed, is_terminated, observe, reset!, step!
+export AbstractEnvironment, get_seed, is_terminated, observe, reset!, step!
 
 export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark
 export generate_sample, generate_dataset
diff --git a/test/dynamic_vsp.jl b/test/dynamic_vsp.jl
index 49c9b77..367db06 100644
--- a/test/dynamic_vsp.jl
+++ b/test/dynamic_vsp.jl
@@ -1,8 +1,24 @@
-# @testitem "DVSP - parsing" begin
-#     using DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling:
-#         read_vsp_instance, location_count, customer_count
-#     path = joinpath(@__DIR__, "data", "vsp_instance.txt")
-#     instance = read_vsp_instance(path)
-#     @test location_count(instance) == 6
-#     @test customer_count(instance) == 5
-# end
+@testitem "DVSP" begin
+    using DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling
+
+    b = DynamicVehicleSchedulingBenchmark(; two_dimensional_features=true)
+    dataset = generate_dataset(b, 10)
+    environments = generate_environments(b, dataset)
+
+    env = environments[1]
+    get_seed(env)
+
+    policies = generate_policies(b)
+    lazy = policies[1]
+    greedy = policies[2]
+
+    d = run_policy!(lazy, env, 1; seed=0)[2]
+
+    r, d = run_policy!(lazy, environments, 10)
+    r, d = run_policy!(greedy, environments, 10)
+
+    env = environments[1]
+    instance = dataset[1].instance
+    scenario = generate_scenario(b, instance)
+    v, y = generate_anticipative_solution(b, env, scenario; nb_epochs=2, reset_env=true)
+end

From d9eaa8f86f479a7672ccb18288edaa05a47acfb1 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Fri, 8 Aug 2025 17:50:03 +0200
Subject: [PATCH 22/29] fix doc

---
 docs/src/api/dvsp.md        | 15 +++++++++++++++
 docs/src/benchmarks/dvsp.md |  3 +++
 2 files changed, 18 insertions(+)
 create mode 100644 docs/src/api/dvsp.md
 create mode 100644 docs/src/benchmarks/dvsp.md

diff --git a/docs/src/api/dvsp.md b/docs/src/api/dvsp.md
new file mode 100644
index 0000000..4ecdfd9
--- /dev/null
+++ b/docs/src/api/dvsp.md
@@ -0,0 +1,15 @@
+# Dynamic Vehicle Scheduling
+
+## Public
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling]
+Private = false
+```
+
+## Private
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling]
+Public = false
+```
diff --git a/docs/src/benchmarks/dvsp.md b/docs/src/benchmarks/dvsp.md
new file mode 100644
index 0000000..2b96c67
--- /dev/null
+++ b/docs/src/benchmarks/dvsp.md
@@ -0,0 +1,3 @@
+# Dynamic Vehicle Scheduling
+
+[`DynamicVehicleSchedulingBenchmark`](@ref).

From fd0e247b3011d62b30b9276ed295383fe2ac19e6 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Fri, 8 Aug 2025 17:54:23 +0200
Subject: [PATCH 23/29] fix doc (again)

---
 src/Utils/interface.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl
index 8c3c0c7..1fa1f65 100644
--- a/src/Utils/interface.jl
+++ b/src/Utils/interface.jl
@@ -190,7 +190,7 @@ Abstract type interface for stochastic benchmark problems.
 This type should be used for benchmarks that involve single stage stochastic optimization problems.
 
 It follows the same interface as [`AbstractBenchmark`](@ref), with the addition of the following methods:
-- [`generate_anticipative_solver`](@ref)
+- TODO
 """
 abstract type AbstractStochasticBenchmark{exogenous} <: AbstractBenchmark end
 
@@ -198,12 +198,12 @@ is_exogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = exoge
 is_endogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = !exogenous
 
 """
-    generate_anticipative_solver(::AbstractStochasticBenchmark{true}, instance; kwargs...)
+    generate_scenario(::AbstractStochasticBenchmark{true}, instance; kwargs...)
 """
 function generate_scenario end
 
 """
-    anticipative_policy(::AbstractStochasticBenchmark{true}, instance, scenario; kwargs...)
+    generate_anticipative_solution(::AbstractStochasticBenchmark{true}, instance, scenario; kwargs...)
 """
 function generate_anticipative_solution end
 

From cd3bb3584464fbecf77efcd56764a33788eb9d56 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 21 Aug 2025 15:35:39 +0200
Subject: [PATCH 24/29] fix doc

---
 docs/make.jl                          | 16 +++++-----------
 docs/src/benchmarks/argmax.md         |  1 +
 docs/src/benchmarks/ranking.md        |  1 +
 docs/src/tutorials/warcraft.jl        |  3 +++
 src/DynamicVehicleScheduling/state.jl |  2 +-
 5 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index 21f5480..1946e7f 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -12,8 +12,8 @@ api_dir = joinpath(@__DIR__, "src", "api")
 api_files = map(x -> joinpath("api", x), readdir(api_dir))
 tutorial_files = readdir(tutorial_dir)
 md_tutorial_files = [split(file, ".")[1] * ".md" for file in tutorial_files]
-benchmark_files = readdir(benchmarks_dir)
-md_benchmark_files = [split(file, ".")[1] * ".md" for file in benchmark_files]
+benchmark_files = [joinpath("benchmarks", e) for e in readdir(benchmarks_dir)]
+# md_benchmark_files = [split(file, ".")[1] * ".md" for file in benchmark_files]
 
 include_tutorial = true
 
@@ -25,20 +25,14 @@ if include_tutorial
 end
 
 makedocs(;
-    modules=[DecisionFocusedLearningBenchmarks, DecisionFocusedLearningBenchmarks.Warcraft],
+    modules=[DecisionFocusedLearningBenchmarks],
     authors="Members of JuliaDecisionFocusedLearning",
     sitename="DecisionFocusedLearningBenchmarks.jl",
-    format=Documenter.HTML(),
+    format=Documenter.HTML(; size_threshold=typemax(Int)),
     pages=[
         "Home" => "index.md",
         "Tutorials" => include_tutorial ? md_tutorial_files : [],
-        "Benchmark problems list" => [
-            "benchmarks/subset_selection.md",
-            "benchmarks/fixed_size_shortest_path.md",
-            "benchmarks/warcraft.md",
-            "benchmarks/portfolio_optimization.md",
-            "benchmarks/vsp.md",
-        ],
+        "Benchmark problems list" => benchmark_files,
         "API reference" => api_files,
     ],
 )
diff --git a/docs/src/benchmarks/argmax.md b/docs/src/benchmarks/argmax.md
index e69de29..1ab74f9 100644
--- a/docs/src/benchmarks/argmax.md
+++ b/docs/src/benchmarks/argmax.md
@@ -0,0 +1 @@
+# Argmax
diff --git a/docs/src/benchmarks/ranking.md b/docs/src/benchmarks/ranking.md
index e69de29..5bfcaeb 100644
--- a/docs/src/benchmarks/ranking.md
+++ b/docs/src/benchmarks/ranking.md
@@ -0,0 +1 @@
+# Ranking
diff --git a/docs/src/tutorials/warcraft.jl b/docs/src/tutorials/warcraft.jl
index 13f21ba..2d41563 100644
--- a/docs/src/tutorials/warcraft.jl
+++ b/docs/src/tutorials/warcraft.jl
@@ -86,3 +86,6 @@ final_gap = compute_gap(b, test_dataset, model, maximizer)
 θ = model(x)
 y = maximizer(θ)
 plot_data(b, DataSample(; x, θ_true=θ, y_true=y))
+
+using Test #src
+@test final_gap < starting_gap #src
diff --git a/src/DynamicVehicleScheduling/state.jl b/src/DynamicVehicleScheduling/state.jl
index 704ef79..0d0a177 100644
--- a/src/DynamicVehicleScheduling/state.jl
+++ b/src/DynamicVehicleScheduling/state.jl
@@ -121,7 +121,7 @@ $TYPEDSIGNATURES
 
 Check if the given routes are feasible.
 Routes should be given with global indexation.
-Use [`env_routes_from_state_routes`](@ref) if needed to convert the indices beforehand.
+Use `env_routes_from_state_routes` if needed to convert the indices beforehand.
 """
 function is_feasible(state::DVSPState, routes::Vector{Vector{Int}}; verbose::Bool=false)
     (; is_must_dispatch, state_instance) = state

From d32b1e813be16987ac9a41e32d401ef6742e58e1 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 21 Aug 2025 16:20:45 +0200
Subject: [PATCH 25/29] Inpromve basic coverage

---
 docs/src/api/0_interface.md                   |  4 ++
 docs/src/api/argmax.md                        |  4 ++
 docs/src/api/argmax_2d.md                     |  4 ++
 docs/src/api/dvsp.md                          |  4 ++
 docs/src/api/dynamic_assorment.md             |  4 ++
 docs/src/api/fixed_shortest_path.md           |  4 ++
 docs/src/api/portfolio_optimization.md        |  4 ++
 docs/src/api/ranking.md                       |  4 ++
 docs/src/api/subset_selection.md              |  4 ++
 docs/src/api/vsp.md                           |  4 ++
 docs/src/api/warcraft.md                      |  4 ++
 src/DecisionFocusedLearningBenchmarks.jl      | 30 +++++------
 src/DynamicAssortment/environment.jl          |  6 +--
 .../anticipative_solver.jl                    |  3 +-
 src/DynamicVehicleScheduling/environment.jl   |  6 +--
 .../PortfolioOptimization.jl                  | 34 -------------
 src/Utils/Utils.jl                            |  2 +-
 src/Utils/environment.jl                      |  4 +-
 src/Utils/policy.jl                           | 50 ++++++++++++-------
 test/dynamic_vsp.jl                           | 15 ++++--
 test/utils.jl                                 | 32 ++++++++++++
 21 files changed, 145 insertions(+), 81 deletions(-)

diff --git a/docs/src/api/0_interface.md b/docs/src/api/0_interface.md
index 1b0a22c..6363833 100644
--- a/docs/src/api/0_interface.md
+++ b/docs/src/api/0_interface.md
@@ -1,3 +1,7 @@
+```@meta
+CollapsedDocStrings = true
+```
+
 # Interface
 
 ## Public
diff --git a/docs/src/api/argmax.md b/docs/src/api/argmax.md
index 6ea12e4..d3b8d29 100644
--- a/docs/src/api/argmax.md
+++ b/docs/src/api/argmax.md
@@ -1,3 +1,7 @@
+```@meta
+CollapsedDocStrings = true
+```
+
 # Argmax
 
 ## Public
diff --git a/docs/src/api/argmax_2d.md b/docs/src/api/argmax_2d.md
index 1b6b44e..ce28b54 100644
--- a/docs/src/api/argmax_2d.md
+++ b/docs/src/api/argmax_2d.md
@@ -1,3 +1,7 @@
+```@meta
+CollapsedDocStrings = true
+```
+
 # Argmax2D
 
 ## Public
diff --git a/docs/src/api/dvsp.md b/docs/src/api/dvsp.md
index 4ecdfd9..2922696 100644
--- a/docs/src/api/dvsp.md
+++ b/docs/src/api/dvsp.md
@@ -1,3 +1,7 @@
+```@meta
+CollapsedDocStrings = true
+```
+
 # Dynamic Vehicle Scheduling
 
 ## Public
diff --git a/docs/src/api/dynamic_assorment.md b/docs/src/api/dynamic_assorment.md
index d738692..847d184 100644
--- a/docs/src/api/dynamic_assorment.md
+++ b/docs/src/api/dynamic_assorment.md
@@ -1,3 +1,7 @@
+```@meta
+CollapsedDocStrings = true
+```
+
 # Dynamic Assortment
 
 ## Public
diff --git a/docs/src/api/fixed_shortest_path.md b/docs/src/api/fixed_shortest_path.md
index df50a9f..36a03b2 100644
--- a/docs/src/api/fixed_shortest_path.md
+++ b/docs/src/api/fixed_shortest_path.md
@@ -1,3 +1,7 @@
+```@meta
+CollapsedDocStrings = true
+```
+
 # Fixed-size shortest path
 
 ## Public
diff --git a/docs/src/api/portfolio_optimization.md b/docs/src/api/portfolio_optimization.md
index 5b0102b..6d198ac 100644
--- a/docs/src/api/portfolio_optimization.md
+++ b/docs/src/api/portfolio_optimization.md
@@ -1,3 +1,7 @@
+```@meta
+CollapsedDocStrings = true
+```
+
 # Subset selection
 
 ## Public
diff --git a/docs/src/api/ranking.md b/docs/src/api/ranking.md
index f249a48..82d0719 100644
--- a/docs/src/api/ranking.md
+++ b/docs/src/api/ranking.md
@@ -1,3 +1,7 @@
+```@meta
+CollapsedDocStrings = true
+```
+
 # Ranking
 
 ## Public
diff --git a/docs/src/api/subset_selection.md b/docs/src/api/subset_selection.md
index 76b686d..946eb3c 100644
--- a/docs/src/api/subset_selection.md
+++ b/docs/src/api/subset_selection.md
@@ -1,3 +1,7 @@
+```@meta
+CollapsedDocStrings = true
+```
+
 # Subset selection
 
 ## Public
diff --git a/docs/src/api/vsp.md b/docs/src/api/vsp.md
index 96e4cdb..119c9ba 100644
--- a/docs/src/api/vsp.md
+++ b/docs/src/api/vsp.md
@@ -1,3 +1,7 @@
+```@meta
+CollapsedDocStrings = true
+```
+
 # Stochastic Vehicle Scheduling
 
 ## Public
diff --git a/docs/src/api/warcraft.md b/docs/src/api/warcraft.md
index 3ff6824..c3bd480 100644
--- a/docs/src/api/warcraft.md
+++ b/docs/src/api/warcraft.md
@@ -1,3 +1,7 @@
+```@meta
+CollapsedDocStrings = true
+```
+
 # Warcraft
 
 ## Public
diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index a33c1de..be2c500 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -3,22 +3,22 @@ module DecisionFocusedLearningBenchmarks
 using DataDeps
 using Requires: @require
 
-function _euro_neurips_unpack(local_filepath)
-    directory = dirname(local_filepath)
-    unpack(local_filepath)
-    # Move instances and delete the rest
-    for filepath in readdir(
-        joinpath(directory, "euro-neurips-vrp-2022-quickstart-main", "instances"); join=true
-    )
-        if endswith(filepath, ".txt")
-            mv(filepath, joinpath(directory, basename(filepath)))
+function __init__()
+    function _euro_neurips_unpack(local_filepath)
+        directory = dirname(local_filepath)
+        unpack(local_filepath)
+        # Move instances and delete the rest
+        for filepath in readdir(
+            joinpath(directory, "euro-neurips-vrp-2022-quickstart-main", "instances");
+            join=true,
+        )
+            if endswith(filepath, ".txt")
+                mv(filepath, joinpath(directory, basename(filepath)))
+            end
         end
+        rm(joinpath(directory, "euro-neurips-vrp-2022-quickstart-main"); recursive=true)
+        return nothing
     end
-    rm(joinpath(directory, "euro-neurips-vrp-2022-quickstart-main"); recursive=true)
-    return nothing
-end
-
-function __init__()
     # Register the Warcraft dataset
     ENV["DATADEPS_ALWAYS_ACCEPT"] = "true"
     register(
@@ -64,7 +64,7 @@ using .Utils
 export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample
 export AbstractEnvironment, get_seed, is_terminated, observe, reset!, step!
 
-export Policy, run_policy!
+export Policy, evaluate_policy!
 
 export generate_sample, generate_dataset, generate_environments, generate_environment
 export generate_scenario
diff --git a/src/DynamicAssortment/environment.jl b/src/DynamicAssortment/environment.jl
index 5f62acc..8389a0c 100644
--- a/src/DynamicAssortment/environment.jl
+++ b/src/DynamicAssortment/environment.jl
@@ -43,7 +43,7 @@ function Environment(instance::Instance; seed=0, rng::AbstractRNG=MersenneTwiste
         features=full_features,
         d_features=zeros(2, N),
     )
-    Utils.reset!(env; reset_seed=true)
+    Utils.reset!(env; reset_rng=true)
     return env
 end
 
@@ -58,8 +58,8 @@ prices(b::Environment) = b.instance.prices
 ## Basic operations of environment
 
 # Reset the environment
-function Utils.reset!(env::Environment; reset_seed=false, seed=env.seed)
-    reset_seed && Random.seed!(env.rng, seed)
+function Utils.reset!(env::Environment; reset_rng=false, seed=env.seed)
+    reset_rng && Random.seed!(env.rng, seed)
 
     env.step = 1
 
diff --git a/src/DynamicVehicleScheduling/anticipative_solver.jl b/src/DynamicVehicleScheduling/anticipative_solver.jl
index 47a2c1f..5847808 100644
--- a/src/DynamicVehicleScheduling/anticipative_solver.jl
+++ b/src/DynamicVehicleScheduling/anticipative_solver.jl
@@ -49,8 +49,9 @@ function anticipative_solver(
     two_dimensional_features=env.instance.two_dimensional_features,
     reset_env=true,
     nb_epochs=typemax(Int),
+    seed=get_seed(env),
 )
-    reset_env && reset!(env; reset_seed=true)
+    reset_env && reset!(env; reset_rng=true, seed)
 
     start_epoch = current_epoch(env)
     end_epoch = min(last_epoch(env), start_epoch + nb_epochs - 1)
diff --git a/src/DynamicVehicleScheduling/environment.jl b/src/DynamicVehicleScheduling/environment.jl
index cd8d6a6..339bd78 100644
--- a/src/DynamicVehicleScheduling/environment.jl
+++ b/src/DynamicVehicleScheduling/environment.jl
@@ -70,10 +70,10 @@ Utils.is_terminated(env::DVSPEnv) = current_epoch(env) > last_epoch(env)
 $TYPEDSIGNATURES
 
 Reset the environment to its initial state.
-Also reset the seed if `reset_seed` is set to true.
+Also reset the rng to `seed` if `reset_rng` is set to true.
 """
-function Utils.reset!(env::DVSPEnv; seed=get_seed(env), reset_seed=false)
-    if reset_seed
+function Utils.reset!(env::DVSPEnv; seed=get_seed(env), reset_rng=false)
+    if reset_rng
         Random.seed!(env.rng, seed)
     end
     env.scenario = Utils.generate_scenario(env; rng=env.rng)
diff --git a/src/PortfolioOptimization/PortfolioOptimization.jl b/src/PortfolioOptimization/PortfolioOptimization.jl
index 7d1b577..e94262a 100644
--- a/src/PortfolioOptimization/PortfolioOptimization.jl
+++ b/src/PortfolioOptimization/PortfolioOptimization.jl
@@ -105,40 +105,6 @@ end
 """
 $TYPEDSIGNATURES
 
-Generate a dataset of labeled instances for the portfolio optimization problem.
-"""
-function Utils.generate_dataset(
-    bench::PortfolioOptimizationBenchmark,
-    dataset_size::Int=10;
-    seed::Int=0,
-    type::Type=Float32,
-)
-    (; d, p, deg, ν, L, f) = bench
-    rng = MersenneTwister(seed)
-
-    # Features
-    features = [randn(rng, type, p) for _ in 1:dataset_size]
-
-    # True weights
-    B = rand(rng, Bernoulli(0.5), d, p)
-    c̄ = [
-        (0.05 / type(sqrt(p)) .* B * features[i] .+ 0.1^(1 / deg)) .^ deg for
-        i in 1:dataset_size
-    ]
-    costs = [c̄ᵢ .+ L * f .+ 0.01 .* ν .* randn(rng, type, d) for c̄ᵢ in c̄]
-
-    maximizer = Utils.generate_maximizer(bench)
-    solutions = maximizer.(costs)
-
-    return [
-        DataSample(; x, θ_true, y_true) for
-        (x, θ_true, y_true) in zip(features, costs, solutions)
-    ]
-end
-
-"""
-$TYPEDSIGNATURES
-
 Initialize a linear model for `bench` using `Flux`.
 """
 function Utils.generate_statistical_model(bench::PortfolioOptimizationBenchmark)
diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl
index 00d9547..d738e31 100644
--- a/src/Utils/Utils.jl
+++ b/src/Utils/Utils.jl
@@ -21,7 +21,7 @@ include("misc.jl")
 include("model_builders.jl")
 
 export DataSample, Policy
-export run_policy!
+export evaluate_policy!
 export TopKMaximizer
 
 export AbstractEnvironment, get_seed, is_terminated, observe, reset!, step!
diff --git a/src/Utils/environment.jl b/src/Utils/environment.jl
index 38a3f34..88eff08 100644
--- a/src/Utils/environment.jl
+++ b/src/Utils/environment.jl
@@ -34,10 +34,10 @@ This function should return a tuple of two elements:
 function observe end
 
 """
-    reset!(env::AbstractEnvironment; reset_seed::Bool, seed=get_seed(env)) --> Nothing
+    reset!(env::AbstractEnvironment; reset_rng::Bool, seed=get_seed(env)) --> Nothing
 
 Reset the environment to its initial state.
-If `reset_seed` is true, the random number generator is reset to the given `seed`.
+If `reset_rng` is true, the random number generator is reset to the given `seed`.
 """
 function reset! end
 
diff --git a/src/Utils/policy.jl b/src/Utils/policy.jl
index 8f17f79..3057050 100644
--- a/src/Utils/policy.jl
+++ b/src/Utils/policy.jl
@@ -31,9 +31,9 @@ $TYPEDSIGNATURES
 Run the policy on the environment and return the total reward and a dataset of observations.
 By default, the environment is reset before running the policy.
 """
-function run_policy!(policy, env::AbstractEnvironment; kwargs...)
+function evaluate_policy!(policy, env::AbstractEnvironment; kwargs...)
     total_reward = 0.0
-    reset!(env; reset_seed=false)
+    reset!(env; reset_rng=false)
     local labeled_dataset
     while !is_terminated(env)
         y = policy(env; kwargs...)
@@ -52,35 +52,49 @@ function run_policy!(policy, env::AbstractEnvironment; kwargs...)
     return total_reward, labeled_dataset
 end
 
-function run_policy!(policy, envs::Vector{<:AbstractEnvironment}; kwargs...)
-    E = length(envs)
-    rewards = zeros(Float64, E)
-    datasets = map(1:E) do e
-        reward, dataset = run_policy!(policy, envs[e]; kwargs...)
-        rewards[e] = reward
-        return dataset
-    end
-    return rewards, vcat(datasets...)
-end
+# function evaluate_policy!(policy, envs::Vector{<:AbstractEnvironment}; kwargs...)
+#     E = length(envs)
+#     rewards = zeros(Float64, E)
+#     datasets = map(1:E) do e
+#         reward, dataset = evaluate_policy!(policy, envs[e]; kwargs...)
+#         rewards[e] = reward
+#         return dataset
+#     end
+#     return rewards, vcat(datasets...)
+# end
+
+"""
+$TYPEDSIGNATURES
 
-function run_policy!(
-    policy, env::AbstractEnvironment, episodes::Int; seed=get_seed(env), kwargs...
+Evaluate the policy on the environment and return the total reward and a dataset of observations.
+By default, the environment is reset before running the policy.
+"""
+function evaluate_policy!(
+    policy, env::AbstractEnvironment, episodes::Int=1; seed=get_seed(env), kwargs...
 )
-    reset!(env; reset_seed=true, seed)
+    reset!(env; reset_rng=true, seed)
     total_reward = 0.0
     datasets = map(1:episodes) do _i
-        reward, dataset = run_policy!(policy, env; kwargs...)
+        reward, dataset = evaluate_policy!(policy, env; kwargs...)
         total_reward += reward
         return dataset
     end
     return total_reward / episodes, vcat(datasets...)
 end
 
-function run_policy!(policy, envs::Vector{<:AbstractEnvironment}, episodes::Int; kwargs...)
+"""
+$TYPEDSIGNATURES
+
+Run the policy on the environments and return the total rewards and a dataset of observations.
+By default, the environments are reset before running the policy.
+"""
+function evaluate_policy!(
+    policy, envs::Vector{<:AbstractEnvironment}, episodes::Int=1; kwargs...
+)
     E = length(envs)
     rewards = zeros(Float64, E)
     datasets = map(1:E) do e
-        reward, dataset = run_policy!(policy, envs[e], episodes; kwargs...)
+        reward, dataset = evaluate_policy!(policy, envs[e], episodes; kwargs...)
         rewards[e] = reward
         return dataset
     end
diff --git a/test/dynamic_vsp.jl b/test/dynamic_vsp.jl
index 367db06..166778a 100644
--- a/test/dynamic_vsp.jl
+++ b/test/dynamic_vsp.jl
@@ -1,9 +1,14 @@
 @testitem "DVSP" begin
     using DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling
+    using Statistics: mean
 
     b = DynamicVehicleSchedulingBenchmark(; two_dimensional_features=true)
+
+    @test is_exogenous(b)
+    @test !is_endogenous(b)
+
     dataset = generate_dataset(b, 10)
-    environments = generate_environments(b, dataset)
+    environments = generate_environments(b, dataset; seed=0)
 
     env = environments[1]
     get_seed(env)
@@ -12,10 +17,12 @@
     lazy = policies[1]
     greedy = policies[2]
 
-    d = run_policy!(lazy, env, 1; seed=0)[2]
+    d = evaluate_policy!(lazy, env, 1; seed=0)[2]
+
+    r_lazy, d = evaluate_policy!(lazy, environments, 10)
+    r_greedy, d = evaluate_policy!(greedy, environments, 10)
 
-    r, d = run_policy!(lazy, environments, 10)
-    r, d = run_policy!(greedy, environments, 10)
+    @test mean(r_lazy) <= mean(r_greedy)
 
     env = environments[1]
     instance = dataset[1].instance
diff --git a/test/utils.jl b/test/utils.jl
index b071bd8..e0a4bac 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -23,3 +23,35 @@
         @test max(h, w) <= length(path) <= h + w
     end
 end
+
+@testitem "DataSample" begin
+    using DecisionFocusedLearningBenchmarks
+    using StableRNGs
+
+    rng = StableRNG(1234)
+
+    function random_sample()
+        return DataSample(;
+            x=randn(rng, 10, 5),
+            θ_true=rand(rng, 5),
+            y_true=rand(rng, 10),
+            instance="this is an instance",
+        )
+    end
+
+    sample = random_sample()
+    @test sample isa DataSample
+
+    io = IOBuffer()
+    show(io, sample)
+    @test String(take!(io)) ==
+        "DataSample(x=$(sample.x), θ_true=$(sample.θ_true), y_true=$(sample.y_true), instance=$(sample.instance))"
+end
+
+@testitem "Maximizers" begin
+    using DecisionFocusedLearningBenchmarks.Utils: TopKMaximizer
+    top_k = TopKMaximizer(3)
+    @test top_k([1, 2, 3, 4, 5]) == [0, 0, 1, 1, 1]
+    @test top_k([5, 4, 3, 2, 1]) == [1, 1, 1, 0, 0]
+    @test_throws(AssertionError, top_k([1, 2]))
+end

From 805a22be271386717e61babae0601fb137fcd7a2 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 21 Aug 2025 16:38:45 +0200
Subject: [PATCH 26/29] bugfix

---
 src/PortfolioOptimization/PortfolioOptimization.jl | 4 ++--
 src/Utils/policy.jl                                | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/PortfolioOptimization/PortfolioOptimization.jl b/src/PortfolioOptimization/PortfolioOptimization.jl
index e94262a..5fc7e8f 100644
--- a/src/PortfolioOptimization/PortfolioOptimization.jl
+++ b/src/PortfolioOptimization/PortfolioOptimization.jl
@@ -91,7 +91,7 @@ function Utils.generate_sample(
     bench::PortfolioOptimizationBenchmark, rng::AbstractRNG; type::Type=Float32
 )
     (; d, p, deg, ν, L, f) = bench
-    features = randn(rng, type, p, d)
+    features = randn(rng, type, p)
     B = rand(rng, Bernoulli(0.5), d, p)
     c̄ = (0.05 / type(sqrt(p)) .* B * features .+ 0.1^(1 / deg)) .^ deg
     costs = c̄ .+ L * f .+ 0.01 * ν * randn(rng, type, d)
@@ -99,7 +99,7 @@ function Utils.generate_sample(
     maximizer = Utils.generate_maximizer(bench)
     solution = maximizer(costs)
 
-    return DataSample(; x=features, θ_true=c̄, y_true=solution)
+    return DataSample(; x=features, θ_true=costs, y_true=solution)
 end
 
 """
diff --git a/src/Utils/policy.jl b/src/Utils/policy.jl
index 3057050..2b3c8e5 100644
--- a/src/Utils/policy.jl
+++ b/src/Utils/policy.jl
@@ -70,7 +70,7 @@ Evaluate the policy on the environment and return the total reward and a dataset
 By default, the environment is reset before running the policy.
 """
 function evaluate_policy!(
-    policy, env::AbstractEnvironment, episodes::Int=1; seed=get_seed(env), kwargs...
+    policy, env::AbstractEnvironment, episodes::Int; seed=get_seed(env), kwargs...
 )
     reset!(env; reset_rng=true, seed)
     total_reward = 0.0

From 522a91f1cabe9ad0f53b836b41025c18618640d8 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 21 Aug 2025 16:53:37 +0200
Subject: [PATCH 27/29] basic tests for dynamic assortment

---
 test/dynamic_assortment.jl | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 test/dynamic_assortment.jl

diff --git a/test/dynamic_assortment.jl b/test/dynamic_assortment.jl
new file mode 100644
index 0000000..d9787fb
--- /dev/null
+++ b/test/dynamic_assortment.jl
@@ -0,0 +1,25 @@
+@testitem "DVSP" begin
+    using DecisionFocusedLearningBenchmarks
+    using Statistics: mean
+
+    b = DynamicAssortmentBenchmark()
+
+    @test is_endogenous(b)
+    @test !is_exogenous(b)
+
+    dataset = generate_dataset(b, 10; seed=0)
+    environments = generate_environments(b, dataset)
+
+    env = environments[1]
+    get_seed(env)
+    env.seed
+
+    policies = generate_policies(b)
+    expert = policies[1]
+    greedy = policies[2]
+
+    r_expert, _ = evaluate_policy!(expert, environments)
+    r_greedy, _ = evaluate_policy!(greedy, environments)
+
+    @test mean(r_expert) >= mean(r_greedy)
+end

From 1905623fc8ae79bfc00e1327fddc911b1f327ca3 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 21 Aug 2025 17:14:59 +0200
Subject: [PATCH 28/29] improve coverage

---
 test/dynamic_assortment.jl | 11 ++++--
 test/utils.jl              | 68 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/test/dynamic_assortment.jl b/test/dynamic_assortment.jl
index d9787fb..54030fe 100644
--- a/test/dynamic_assortment.jl
+++ b/test/dynamic_assortment.jl
@@ -1,4 +1,4 @@
-@testitem "DVSP" begin
+@testitem "dynamic Assortment" begin
     using DecisionFocusedLearningBenchmarks
     using Statistics: mean
 
@@ -18,8 +18,15 @@
     expert = policies[1]
     greedy = policies[2]
 
-    r_expert, _ = evaluate_policy!(expert, environments)
+    r_expert, d = evaluate_policy!(expert, environments)
     r_greedy, _ = evaluate_policy!(greedy, environments)
 
     @test mean(r_expert) >= mean(r_greedy)
+
+    model = generate_statistical_model(b)
+    maximizer = generate_maximizer(b)
+    sample = d[1]
+    x = sample.x
+    θ = model(x)
+    y = maximizer(θ)
 end
diff --git a/test/utils.jl b/test/utils.jl
index e0a4bac..4fd4b4f 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -46,6 +46,74 @@ end
     show(io, sample)
     @test String(take!(io)) ==
         "DataSample(x=$(sample.x), θ_true=$(sample.θ_true), y_true=$(sample.y_true), instance=$(sample.instance))"
+
+    # Test StatsBase methods
+    using StatsBase:
+        ZScoreTransform,
+        UnitRangeTransform,
+        fit,
+        transform,
+        transform!,
+        reconstruct,
+        reconstruct!
+
+    # Create a dataset for testing
+    N = 5
+    dataset = [random_sample() for _ in 1:N]
+
+    # Test fit with ZScoreTransform
+    zt = fit(ZScoreTransform, dataset)
+    @test zt isa ZScoreTransform
+
+    # Test fit with UnitRangeTransform
+    ut = fit(UnitRangeTransform, dataset)
+    @test ut isa UnitRangeTransform
+
+    # Test transform (non-mutating)
+    dataset_zt = transform(zt, dataset)
+    @test length(dataset_zt) == length(dataset)
+    @test all(d -> d isa DataSample, dataset_zt)
+
+    # Check that other fields are preserved
+    for i in 1:N
+        @test dataset_zt[i].θ_true == dataset[i].θ_true
+        @test dataset_zt[i].y_true == dataset[i].y_true
+        @test dataset_zt[i].instance == dataset[i].instance
+    end
+
+    # Check that features are actually transformed
+    @test dataset_zt[1].x != dataset[1].x
+
+    # Test transform! (mutating)
+    dataset_copy = deepcopy(dataset)
+    original_x = copy(dataset_copy[1].x)
+    transform!(ut, dataset_copy)
+    @test dataset_copy[1].x != original_x
+
+    # Check that other fields remain unchanged after transform!
+    for i in 1:N
+        @test dataset_copy[i].θ_true == dataset[i].θ_true
+        @test dataset_copy[i].y_true == dataset[i].y_true
+        @test dataset_copy[i].instance == dataset[i].instance
+    end
+
+    # Test reconstruct (non-mutating)
+    dataset_reconstructed = reconstruct(zt, dataset_zt)
+    @test length(dataset_reconstructed) == length(dataset)
+
+    # Test round-trip consistency (should be close to original)
+    for i in 1:N
+        @test dataset_reconstructed[i].x ≈ dataset[i].x atol = 1e-10
+        @test dataset_reconstructed[i].θ_true == dataset[i].θ_true
+        @test dataset_reconstructed[i].y_true == dataset[i].y_true
+        @test dataset_reconstructed[i].instance == dataset[i].instance
+    end
+
+    # Test reconstruct! (mutating)
+    reconstruct!(zt, dataset_zt)
+    for i in 1:N
+        @test dataset_zt[i].x ≈ dataset[i].x atol = 1e-10
+    end
 end
 
 @testitem "Maximizers" begin

From eb5b67f893c3a199a7f074f68382c8b39d8c412a Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Fri, 22 Aug 2025 09:41:49 +0200
Subject: [PATCH 29/29] improve coverage

---
 .../DynamicVehicleScheduling.jl                |  5 +++++
 test/dynamic_vsp.jl                            | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index f76a43b..7421032 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -5,6 +5,7 @@ using ..Utils
 using Base: @kwdef
 using DataDeps: @datadep_str
 using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
+using Flux: Chain, Dense
 using Graphs
 using HiGHS
 using InferOpt: LinearMaximizer
@@ -110,6 +111,10 @@ function Utils.generate_policies(b::DynamicVehicleSchedulingBenchmark)
     return (lazy, greedy)
 end
 
+function Utils.generate_statistical_model(b::DynamicVehicleSchedulingBenchmark)
+    return Chain(Dense((b.two_dimensional_features ? 2 : 14) => 1), vec)
+end
+
 export DynamicVehicleSchedulingBenchmark
 
 end
diff --git a/test/dynamic_vsp.jl b/test/dynamic_vsp.jl
index 166778a..0f890c0 100644
--- a/test/dynamic_vsp.jl
+++ b/test/dynamic_vsp.jl
@@ -3,6 +3,7 @@
     using Statistics: mean
 
     b = DynamicVehicleSchedulingBenchmark(; two_dimensional_features=true)
+    b2 = DynamicVehicleSchedulingBenchmark(; two_dimensional_features=false)
 
     @test is_exogenous(b)
     @test !is_endogenous(b)
@@ -28,4 +29,21 @@
     instance = dataset[1].instance
     scenario = generate_scenario(b, instance)
     v, y = generate_anticipative_solution(b, env, scenario; nb_epochs=2, reset_env=true)
+
+    maximizer = generate_maximizer(b)
+
+    x, instance = observe(env)
+    model = generate_statistical_model(b)
+    θ = model(x)
+    y = maximizer(θ; instance)
+
+    dataset2 = generate_dataset(b2, 10)
+    environments2 = generate_environments(b2, dataset2; seed=0)
+    env2 = environments2[1]
+    x2, instance2 = observe(env2)
+    model2 = generate_statistical_model(b2)
+    θ2 = model2(x2)
+    y2 = maximizer(θ2; instance=instance2)
+    @test size(x, 1) == 2
+    @test size(x2, 1) == 14
 end