From 6fd1ff4a95d342f35f167d420894641b508b3e6f Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 13 Mar 2025 16:25:28 +0100 Subject: [PATCH 01/29] initialize the DynamicVehicleScheduling module --- src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl new file mode 100644 index 0000000..ef9cbb9 --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -0,0 +1,6 @@ +module DynamicVehicleScheduling + +using ..Utils +using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES + +end From c159731c4d9a6507a6ee4ac8d3711e1ec51b4ac7 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 3 Apr 2025 16:20:33 +0200 Subject: [PATCH 02/29] update --- src/DecisionFocusedLearningBenchmarks.jl | 28 +- .../algorithms/anticipative_solver.jl | 111 +++++++ .../algorithms/prize_collecting_vsp.jl | 215 +++++++++++++ .../DynamicVSP/environment/environment.jl | 293 ++++++++++++++++++ .../DynamicVSP/environment/plot.jl | 134 ++++++++ .../DynamicVSP/environment/state.jl | 89 ++++++ .../DynamicVSP/learning/2d_features.jl | 15 + .../DynamicVSP/learning/dataset.jl | 37 +++ .../DynamicVSP/learning/features.jl | 75 +++++ .../DynamicVSP/policy/abstract_vsp_policy.jl | 33 ++ .../DynamicVSP/policy/anticipative_policy.jl | 18 ++ .../DynamicVSP/policy/greedy_policy.jl | 34 ++ .../DynamicVSP/policy/kleopatra_policy.jl | 53 ++++ .../DynamicVSP/policy/lazy_policy.jl | 34 ++ .../DynamicVSP/utils.jl | 11 + .../DynamicVSP/vsp/instance.jl | 65 ++++ .../DynamicVSP/vsp/parsing.jl | 95 ++++++ .../DynamicVSP/vsp/plot.jl | 39 +++ .../DynamicVSP/vsp/solution.jl | 50 +++ .../DynamicVehicleScheduling.jl | 67 ++++ .../abstract_policy.jl | 5 + .../dynamic_config.jl | 24 ++ src/DynamicVehicleScheduling/utils.jl | 34 ++ src/Utils/model_builders.jl | 1 - 24 files changed, 1558 insertions(+), 2 deletions(-) create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/environment/environment.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/environment/plot.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/environment/state.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/utils.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/vsp/instance.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/vsp/parsing.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/vsp/plot.jl create mode 100644 src/DynamicVehicleScheduling/DynamicVSP/vsp/solution.jl create mode 100644 src/DynamicVehicleScheduling/abstract_policy.jl create mode 100644 src/DynamicVehicleScheduling/dynamic_config.jl create mode 100644 src/DynamicVehicleScheduling/utils.jl diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl index 2122788..18cd94f 100644 --- a/src/DecisionFocusedLearningBenchmarks.jl +++ b/src/DecisionFocusedLearningBenchmarks.jl @@ -3,18 +3,42 @@ module DecisionFocusedLearningBenchmarks using DataDeps using Requires: @require +function _euro_neurips_unpack(local_filepath) + directory = dirname(local_filepath) + unpack(local_filepath) + # Move instances and delete the rest + for filepath in readdir( + joinpath(directory, "euro-neurips-vrp-2022-quickstart-main", "instances"); join=true + ) + if endswith(filepath, ".txt") + mv(filepath, joinpath(directory, basename(filepath))) + end + end + rm(joinpath(directory, "euro-neurips-vrp-2022-quickstart-main"); recursive=true) + return nothing +end + function __init__() # Register the Warcraft dataset ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" register( DataDep( "warcraft", - "This is the warcraft dataset", + "Warcraft shortest path dataset", "http://cermics.enpc.fr/~bouvierl/warcraft_TP/data.zip"; post_fetch_method=unpack, ), ) + register( + DataDep( + "euro-neurips-2022", + "EURO-NeurIPs challenge 2022 dataset", + "https://github.com/ortec/euro-neurips-vrp-2022-quickstart/archive/refs/heads/main.zip"; + post_fetch_method=_euro_neurips_unpack, + ), + ) + # Gurobi setup @info "If you have Gurobi installed and want to use it, make sure to `using Gurobi` in order to enable it." @require Gurobi = "2e9cd046-0924-5485-92f1-d5272153d98b" include("gurobi_setup.jl") @@ -30,6 +54,7 @@ include("Warcraft/Warcraft.jl") include("FixedSizeShortestPath/FixedSizeShortestPath.jl") include("PortfolioOptimization/PortfolioOptimization.jl") include("StochasticVehicleScheduling/StochasticVehicleScheduling.jl") +include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl") using .Utils using .Argmax @@ -39,6 +64,7 @@ using .Warcraft using .FixedSizeShortestPath using .PortfolioOptimization using .StochasticVehicleScheduling +using .DynamicVehicleScheduling # Interface export AbstractBenchmark, DataSample diff --git a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl new file mode 100644 index 0000000..43a9edb --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl @@ -0,0 +1,111 @@ +""" +$TYPEDSIGNATURES + +Retrieve anticipative routes solution from the given MIP solution `y`. +Outputs a set of routes per epoch. +""" +function retrieve_routes_anticipative(y::AbstractArray, dvspenv::DVSPEnv) + nb_tasks = length(dvspenv.customer_index) + (; first_epoch, last_epoch) = dvspenv.config + job_indices = 2:(nb_tasks) + epoch_indices = first_epoch:last_epoch + + routes = [Vector{Int}[] for t in epoch_indices] + for t in epoch_indices + start = [i for i in job_indices if y[1, i, t] ≈ 1] + for task in start + route = Int[] + current_task = task + while current_task != 1 # < nb_tasks + push!(route, current_task) + local next_task + for i in 1:nb_tasks + if isapprox(y[current_task, i, t], 1; atol=0.1) + next_task = i + break + end + end + current_task = next_task + end + push!(routes[t], route) + end + end + return routes +end + +""" +$TYPEDSIGNATURES + +Solve the anticipative VSP problem for environment `env`. +For this, it uses the current environment history, so make sure that the environment is terminated before calling this method. +""" +function anticipative_solver(env::DVSPEnv; model_builder=highs_model, draw_epochs=true) + draw_epochs && draw_all_epochs!(env) + (; customer_index, service_time, start_time, request_epoch) = env + duration = env.config.static_instance.duration[customer_index, customer_index] + (; first_epoch, last_epoch, epoch_duration, Δ_dispatch) = env.config + + @assert first_epoch == 1 + + model = model_builder() + set_silent(model) + + nb_nodes = length(customer_index) + job_indices = 2:nb_nodes + epoch_indices = first_epoch:last_epoch + + @variable(model, y[i=1:nb_nodes, j=1:nb_nodes, t=epoch_indices]; binary=true) + + @objective( + model, + Max, + sum( + -duration[i, j] * y[i, j, t] for i in 1:nb_nodes, j in 1:nb_nodes, + t in epoch_indices + ) + ) + + # flow constraint per epoch + for t in epoch_indices, i in 1:nb_nodes + @constraint( + model, + sum(y[j, i, t] for j in 1:nb_nodes) == sum(y[i, j, t] for j in 1:nb_nodes) + ) + end + + # each task must be done once along the horizon + @constraint( + model, + demand[i in job_indices], + sum(y[j, i, t] for j in 1:nb_nodes, t in epoch_indices) == 1 + ) + + # a trip from i can be planned only after request appeared + for i in job_indices, t in epoch_indices, j in 1:nb_nodes + if t < request_epoch[i] + @constraint(model, y[i, j, t] <= 0) + end + end + + # a trip from i can be done only before limit date + for i in job_indices, t in epoch_indices, j in 1:nb_nodes + if (t - 1) * epoch_duration + duration[1, i] + Δ_dispatch > start_time[i] # ! this only works if first_epoch = 1 + @constraint(model, y[i, j, t] <= 0) + end + end + + # trips can be planned if start, service and transport times enable it + for i in job_indices, t in epoch_indices, j in job_indices + if start_time[i] <= start_time[j] + if start_time[i] + service_time[i] + duration[i, j] > start_time[j] + @constraint(model, y[i, j, t] <= 0) + end + else + @constraint(model, y[i, j, t] <= 0) + end + end + + optimize!(model) + + return retrieve_routes_anticipative(value.(y), env) +end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl new file mode 100644 index 0000000..d1fbcce --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl @@ -0,0 +1,215 @@ +""" +$TYPEDSIGNATURES + +Create the acyclic digraph associated with the given VSP `instance`. +""" +function create_graph(instance::VSPInstance) + (; duration, start_time, service_time) = instance + # Initialize directed graph + nb_vertices = nb_locations(instance) + graph = SimpleDiGraph(nb_vertices) + + depot = 1 # depot is always index 1 + customers = 2:nb_vertices # other vertices are customers + + # Create existing edges + for i₁ in customers + # link every task to depot + add_edge!(graph, depot, i₁) + add_edge!(graph, i₁, depot) + + t₁ = start_time[i₁] + for i₂ in (i₁ + 1):nb_vertices + t₂ = start_time[i₂] + + if t₁ <= t₂ + if t₁ + service_time[i₁] + duration[i₁, i₂] <= t₂ + add_edge!(graph, i₁, i₂) + end + else + if t₂ + service_time[i₂] + duration[i₂, i₁] <= t₁ + add_edge!(graph, i₂, i₁) + end + end + end + end + + return graph +end + +""" +$TYPEDSIGNATURES + +Create the acyclic digraph associated with the given VSP `state`. +""" +function create_graph(state::VSPState) + return create_graph(state.instance) +end + +""" +$TYPEDSIGNATURES + +Retrieve routes solution from the given MIP solution `y` matrix and `graph`. +""" +function retrieve_routes(y::AbstractArray, graph::AbstractGraph) + nb_tasks = nv(graph) + job_indices = 2:(nb_tasks) + routes = Vector{Int}[] + + start = [i for i in job_indices if y[1, i] ≈ 1] + for task in start + route = Int[] + current_task = task + while current_task != 1 # < nb_tasks + push!(route, current_task) + local next_task + for i in outneighbors(graph, current_task) + if isapprox(y[current_task, i], 1; atol=0.1) + next_task = i + break + end + end + current_task = next_task + end + push!(routes, route) + end + return routes +end + +""" +$TYPEDSIGNATURES + +Solve the Prize Collecting Vehicle Scheduling Problem defined by `instance` and prize vector `θ`. +""" +function prize_collecting_vsp( + θ::AbstractVector; instance::VSPState, model_builder=highs_model, kwargs... +) + (; duration) = instance.instance + graph = create_graph(instance) + + model = model_builder() + set_silent(model) + + nb_nodes = nv(graph) + job_indices = 2:(nb_nodes) + + @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0) + + θ_ext = fill(0.0, nb_locations(instance)) # no prize for must dispatch requests, only hard constraints + θ_ext[instance.is_postponable] .= θ + + @objective( + model, + Max, + sum( + (θ_ext[dst(edge)] - duration[src(edge), dst(edge)]) * y[src(edge), dst(edge)] + for edge in edges(graph) + ) + ) + @constraint( + model, + flow[i in 2:nb_nodes], + sum(y[j, i] for j in inneighbors(graph, i)) == + sum(y[i, j] for j in outneighbors(graph, i)) + ) + @constraint( + model, demand[i in job_indices], sum(y[j, i] for j in inneighbors(graph, i)) <= 1 + ) + # must dispatch constraints + @constraint( + model, + demand_must_dispatch[i in job_indices; instance.is_must_dispatch[i]], + sum(y[j, i] for j in inneighbors(graph, i)) == 1 + ) + + optimize!(model) + + return retrieve_routes(value.(y), graph) +end + +# ? +function prize_collecting_vsp_Q( + θ::AbstractVector, + vals::AbstractVector; + instance::VSPState, + model_builder=highs_model, + kwargs..., +) + (; duration) = instance.instance + graph = create_graph(instance) + model = model_builder() + set_silent(model) + nb_nodes = nv(graph) + job_indices = 2:(nb_nodes) + @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0) + θ_ext = fill(0.0, nb_locations(instance.instance)) # no prize for must dispatch requests, only hard constraints + θ_ext[instance.is_postponable] .= θ + # v_ext = fill(0.0, nb_locations(instance.instance)) # no prize for must dispatch requests, only hard constraints + # v_ext[instance.is_postponable] .= vals + @objective( + model, + Max, + sum( + (θ_ext[dst(edge)] + vals[dst(edge)] - duration[src(edge), dst(edge)]) * + y[src(edge), dst(edge)] for edge in edges(graph) + ) + ) + @constraint( + model, + flow[i in 2:nb_nodes], + sum(y[j, i] for j in inneighbors(graph, i)) == + sum(y[i, j] for j in outneighbors(graph, i)) + ) + @constraint( + model, demand[i in job_indices], sum(y[j, i] for j in inneighbors(graph, i)) <= 1 + ) + # must dispatch constraints + @constraint( + model, + demand_must_dispatch[i in job_indices; instance.is_must_dispatch[i]], + sum(y[j, i] for j in inneighbors(graph, i)) == 1 + ) + optimize!(model) + return retrieve_routes(value.(y), graph) +end + +function my_objective_value(θ, routes; instance) + (; duration) = instance.instance + total = 0.0 + θ_ext = fill(0.0, nb_locations(instance)) + θ_ext[instance.is_postponable] .= θ + for route in routes + for (u, v) in partition(vcat(1, route), 2, 1) + total += θ_ext[v] - duration[u, v] + end + end + return -total +end + +function _objective_value(θ, routes; instance) + (; duration) = instance.instance + total = 0.0 + θ_ext = fill(0.0, nb_locations(instance)) + θ_ext[instance.is_postponable] .= θ + mapping = cumsum(instance.is_postponable) + g = falses(length(θ)) + for route in routes + for (u, v) in partition(vcat(1, route), 2, 1) + total -= duration[u, v] + if instance.is_postponable[v] + total += θ_ext[v] + g[mapping[v]] = 1 + end + end + end + return -total, g +end + +function ChainRulesCore.rrule(::typeof(my_objective_value), θ, routes; instance) + total, g = _objective_value(θ, routes; instance) + function pullback(dy) + g = g .* dy + return NoTangent(), g, NoTangent() + end + return total, pullback +end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/environment/environment.jl b/src/DynamicVehicleScheduling/DynamicVSP/environment/environment.jl new file mode 100644 index 0000000..189afb4 --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/environment/environment.jl @@ -0,0 +1,293 @@ +""" +$TYPEDEF + +Environment data structure for the Dynamic Vehicle Scheduling Problem. + +# Fields +$TYPEDFIELDS +""" +@kwdef mutable struct DVSPEnv{C<:DynamicConfig,R<:AbstractRNG,T,S<:VSPState} + "instance config as a [`DynamicConfig`](@ref)" + config::C + "current epoch number" + current_epoch::Int + "random number generator" + rng::R + "index of each customer in the static instance from the config" + customer_index::Vector{Int} + "service time values of each customer" + service_time::Vector{T} + "start time values of each customer" + start_time::Vector{T} + "1 if the request was already dispatched in a previous epoch, 0 otherwise" + request_is_dispatched::BitVector + "epoch index at which each request appearred" + request_epoch::Vector{Int} + "current state of environment" + state::S +end + +""" +$TYPEDSIGNATURES + +Constructor for [`DVSPEnv`](@ref). +""" +function DVSPEnv( + static_instance::VSPInstance; + seed=0, + max_requests_per_epoch=10, + Δ_dispatch=1.0, + epoch_duration=1.0, +) + first_epoch = 1 + last_epoch = trunc(Int, maximum(static_instance.start_time) / epoch_duration) - 1 + + config = DynamicConfig(; + static_instance, + max_requests_per_epoch, + Δ_dispatch, + epoch_duration, + seed, + first_epoch, + last_epoch, + ) + return DVSPEnv(; + config, + customer_index=[1], + service_time=[0.0], + start_time=[0.0], + request_is_dispatched=falses(1), + state=VSPState(), + rng=MersenneTwister(seed), + current_epoch=0, + request_epoch=[first_epoch - 1], + ) +end + +""" +$TYPEDSIGNATURES + +Return the indices of the epochs in the environment. +""" +function get_epoch_indices(env::DVSPEnv) + return (env.config.first_epoch):(env.config.last_epoch) +end + +""" +$TYPEDSIGNATURES + +Return the number of epochs in the environment. +""" +function nb_epochs(env::DVSPEnv) + return length(get_epoch_indices(env)) +end + +""" +$TYPEDSIGNATURES + +Get the current state of the environment. +""" +get_state(env::DVSPEnv) = env.state + +""" +$TYPEDSIGNATURES + +Get the current time of the environment, i.e. the start time of the current_epoch. +""" +get_time(env::DVSPEnv) = (env.current_epoch - 1) * env.config.epoch_duration + +""" +$TYPEDSIGNATURES + +Get the planning start time of the environment, i.e. the time at which vehicles routes dispatched in current epoch can depart. +""" +get_planning_start_time(env::DVSPEnv) = get_time(env) + env.config.Δ_dispatch + +""" +$TYPEDSIGNATURES + +Check if the episode is terminated, i.e. if the current epoch is the last one. +""" +is_terminated(env::DVSPEnv) = env.current_epoch >= env.config.last_epoch + +""" +$TYPEDSIGNATURES + +Return the total number of locations in the environment history. +""" +nb_locations(env::DVSPEnv) = length(env.customer_index) + +""" +$TYPEDSIGNATURES + +Return a vector of env location indices that are still undispatched. +""" +get_undispatched_indices(env::DVSPEnv) = (1:nb_locations(env))[.!env.request_is_dispatched] + +""" +$TYPEDSIGNATURES + +Reset the environment to its initial state. +Also reset the seed if `reset_seed` is set to true. +""" +function reset!(env::DVSPEnv; reset_seed::Bool=true) + (; config) = env + env.current_epoch = config.first_epoch - 1 + depot = 1 + env.customer_index = [env.customer_index[depot]] + env.service_time = [env.service_time[depot]] + env.start_time = env.start_time[depot:depot] + env.request_is_dispatched = falses(1) + env.request_epoch = [env.current_epoch] + reset_seed && seed!(env.rng, config.seed) + return nothing +end + +""" +$TYPEDSIGNATURES + +Internal method that updates the state of the environment to correspond to env info. +This is an internal method and should not be called directly. +""" +function update_state!(env::DVSPEnv) + (; config) = env + (; epoch_duration, static_instance, last_epoch) = config + (; duration) = static_instance + depot = 1 + + planning_start_time = get_planning_start_time(env) + + # Must dispatch + undispatched_indices = get_undispatched_indices(env) + # If it's the last epoch, we must dispatch all remaining requests + is_must_dispatch = undispatched_indices .!= depot + # Else, only requests unreachable from the depot during next epoch are must dispatch + if env.current_epoch < last_epoch + is_must_dispatch = + planning_start_time .+ epoch_duration .+ + @view(duration[depot, env.customer_index[undispatched_indices]]) .> + @view(env.start_time[undispatched_indices]) + is_must_dispatch[1] = 0 + end + + is_postponable = falses(length(is_must_dispatch)) + is_postponable[2:end] .= .!is_must_dispatch[2:end] + + epoch_instance = VSPState(; + instance=VSPInstance(; + service_time=env.service_time[undispatched_indices], + start_time=env.start_time[undispatched_indices] .- planning_start_time, # shift start times to planning start time + coordinate=static_instance.coordinate[env.customer_index[undispatched_indices]], + duration=duration[ + env.customer_index[undispatched_indices], + env.customer_index[undispatched_indices], + ], + ), + is_must_dispatch, + is_postponable, + ) + + env.state = epoch_instance + return epoch_instance +end + +""" +$TYPEDSIGNATURES + +Update `env` by drawing the next epoch and returning a corresponding `EpochInstance`. +""" +function next_epoch!(env::DVSPEnv) + # Increment epoch number + env.current_epoch += 1 + + # Retrieve useful information + (; rng, config) = env + (; max_requests_per_epoch, static_instance) = config + (; duration, service_time, start_time) = config.static_instance + depot = 1 + + # Draw new requests uniformly from static instance + N = nb_customers(static_instance) + + planning_start_time = get_planning_start_time(env) + + coordinate_indices = sample_indices(rng, max_requests_per_epoch, N) + start_time_indices = sample_indices(rng, max_requests_per_epoch, N) + service_time_indices = sample_indices(rng, max_requests_per_epoch, N) + + # Only keep requests with feasible start times (rejection sampling) + # i.e. that are reachable from the depot before their start time + is_feasible = + planning_start_time .+ duration[depot, coordinate_indices] .<= + start_time[start_time_indices] + + # Update environment state + nb_new_requests = sum(is_feasible) + + # Update environment by adding new requests in + env.customer_index = vcat(env.customer_index, coordinate_indices[is_feasible]) + env.service_time = vcat( + env.service_time, service_time[service_time_indices[is_feasible]] + ) + env.start_time = vcat(env.start_time, start_time[start_time_indices[is_feasible]]) + env.request_is_dispatched = vcat(env.request_is_dispatched, falses(nb_new_requests)) + env.request_epoch = vcat(env.request_epoch, fill(env.current_epoch, nb_new_requests)) + + # Finally, update the state of the environment with these new requests + return update_state!(env) +end + +""" +$TYPEDSIGNATURES + +Transform state routes indices into env route indices. +""" +function env_routes_from_state_routes(env, routes) + undispatched_indices = get_undispatched_indices(env) + return [undispatched_indices[route] for route in routes] +end + +""" +$TYPEDSIGNATURES + +Transform env route indices into state route indices. +""" +function state_route_from_env_routes(env, routes) + nb_requests = length(env.customer_index) + undispatched_indices = (1:nb_requests)[.!env.request_is_dispatched] + global_to_local = zeros(Int, nb_requests) + for (local_i, global_i) in enumerate(undispatched_indices) + global_to_local[global_i] = local_i + end + return [global_to_local[route] for route in routes] +end + +""" +$TYPEDSIGNATURES + +Apply given `routes` as an action to `env`. + +Routes should be given with global indexation. +Use [`env_routes_from_state_routes`](@ref) if needed to convert the indices beforehand. +""" +function apply_decision!(env::DVSPEnv, routes::Vector{Vector{Int}}) + for route in routes + env.request_is_dispatched[route] .= true + end + duration = @view env.config.static_instance.duration[ + env.customer_index, env.customer_index + ] + return cost(routes, duration) +end + +""" +$TYPEDSIGNATURES + +Draw all epochs until the end of the environment, without any actions. +""" +function draw_all_epochs!(env::DVSPEnv; reset_env=true) + reset_env && reset!(env) + while !is_terminated(env) + next_epoch!(env) + end +end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/environment/plot.jl b/src/DynamicVehicleScheduling/DynamicVSP/environment/plot.jl new file mode 100644 index 0000000..60b6d90 --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/environment/plot.jl @@ -0,0 +1,134 @@ +""" +$TYPEDSIGNATURES + +Plot the environment of a DVSPEnv, restricted to the given `epoch_indices` (all epoch if not given). +""" +function plot_environment( + env::DVSPEnv; + customer_markersize=4, + depot_markersize=7, + alpha_depot=0.8, + depot_color=:lightgreen, + epoch_indices=nothing, + kwargs..., +) + draw_all_epochs!(env) + + epoch_appearance = env.request_epoch + coordinates = coordinate(get_state(env)) + + epoch_indices = isnothing(epoch_indices) ? get_epoch_indices(env) : epoch_indices + + xlims = (minimum(c.x for c in coordinates), maximum(c.x for c in coordinates)) + ylims = (minimum(c.y for c in coordinates), maximum(c.y for c in coordinates)) + + fig = plot(; + legend=:topleft, + xlabel="x coordinate", + ylabel="y coordinate", + xlims, + ylims, + kwargs..., + ) + + for epoch in epoch_indices + requests = findall(epoch_appearance .== epoch) + x = [coordinates[request].x for request in requests] + y = [coordinates[request].y for request in requests] + scatter!( + fig, x, y; label="Epoch $epoch", marker=:circle, markersize=customer_markersize + ) + end + scatter!( + fig, + [coordinates[1].x], + [coordinates[1].y]; + label="Depot", + markercolor=depot_color, + marker=:rect, + markersize=depot_markersize, + alpha=alpha_depot, + ) + + return fig +end + +""" +$TYPEDSIGNATURES + +Plot the given `routes`` for a VSP `state`. +""" +function plot_epoch(state::VSPState, routes; kwargs...) + (; coordinate, start_time) = state.instance + x_depot = coordinate[1].x + y_depot = coordinate[1].y + X = [p.x for p in coordinate] + Y = [p.y for p in coordinate] + markersize = 5 + fig = plot(; + legend=:topleft, xlabel="x", ylabel="y", clim=(0.0, maximum(start_time)), kwargs... + ) + for route in routes + x_points = vcat(x_depot, X[route], x_depot) + y_points = vcat(y_depot, Y[route], y_depot) + plot!(fig, x_points, y_points; label=nothing) + end + scatter!( + fig, + [x_depot], + [y_depot]; + label="depot", + markercolor=:lightgreen, + markersize, + marker=:rect, + ) + if sum(state.is_postponable) > 0 + scatter!( + fig, + X[state.is_postponable], + Y[state.is_postponable]; + label="Postponable customers", + marker_z=start_time[state.is_postponable], + markersize, + colormap=:turbo, + marker=:utriangle, + ) + end + if sum(state.is_must_dispatch) > 0 + scatter!( + fig, + X[state.is_must_dispatch], + Y[state.is_must_dispatch]; + label="Must-dispatch customers", + marker_z=start_time[state.is_must_dispatch], + markersize, + colormap=:turbo, + marker=:star5, + ) + end + return fig +end + +""" +$TYPEDSIGNATURES + +Create a plot of routes for each epoch. +""" +function plot_routes(env::DVSPEnv, routes; epoch_indices=nothing, kwargs...) + reset!(env) + epoch_indices = isnothing(epoch_indices) ? get_epoch_indices(env) : epoch_indices + + coordinates = env.config.static_instance.coordinate + xlims = (minimum(c.x for c in coordinates), maximum(c.x for c in coordinates)) + ylims = (minimum(c.y for c in coordinates), maximum(c.y for c in coordinates)) + + figs = map(epoch_indices) do epoch + s = next_epoch!(env) + fig = plot_epoch( + s, state_route_from_env_routes(env, routes[epoch]); xlims, ylims, kwargs... + ) + apply_decision!(env, routes[epoch]) + return fig + end + return figs +end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/environment/state.jl b/src/DynamicVehicleScheduling/DynamicVSP/environment/state.jl new file mode 100644 index 0000000..dddb076 --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/environment/state.jl @@ -0,0 +1,89 @@ +""" +$TYPEDSIGNATURES + +State data structure for the Dynamic Vehicle Scheduling Problem. +""" +@kwdef struct VSPState{I} + "associated (static) vehicle scheduling instance" + instance::I = VSPInstance() + "for each location, 1 if the request must be dispatched, 0 otherwise. The depot is always 0." + is_must_dispatch::BitVector = falses(0) + "for each location, 1 if the request can be postponed, 0 otherwise. The depot is always 0." + is_postponable::BitVector = falses(0) +end + +""" +$TYPEDSIGNATURES + +Return the number of locations in `state` (customers + depot). +""" +nb_locations(state::VSPState) = nb_locations(state.instance) + +""" +$TYPEDSIGNATURES + +Return the number of customers in `state`. +""" +nb_customers(state::VSPState) = nb_customers(state.instance) + +""" +$TYPEDSIGNATURES + +Get the service time vector +""" +service_time(state::VSPState) = service_time(state.instance) + +""" +$TYPEDSIGNATURES + +Get the coordinates vector. +""" +coordinate(state::VSPState) = coordinate(state.instance) + +""" +$TYPEDSIGNATURES + +Get the duration matrix. +""" +duration(state::VSPState) = duration(state.instance) + +""" +$TYPEDSIGNATURES + +Get the start time vector. +""" +start_time(state::VSPState) = start_time(state.instance) + +""" +$TYPEDSIGNATURES + +Check if the given routes are feasible. +Routes should be given with global indexation. +Use [`env_routes_from_state_routes`](@ref) if needed to convert the indices beforehand. +""" +function is_feasible(state::VSPState, routes::Vector{Vector{Int}}; verbose::Bool=false) + (; is_must_dispatch, instance) = state + (; duration, start_time, service_time) = instance + is_dispatched = falses(length(is_must_dispatch)) + + # Check that routes follow time constraints + for route in routes + is_dispatched[route] .= true + current = 1 # start at the depot + current_time = start_time[current] + for next in route + current_time += duration[current, next] + if current_time > start_time[next] + verbose && + @warn "Route $route is infeasible: time constraint violated at location $next" + return false + end + current_time += service_time[next] + current = next + end + end + + # Check that all must dispatch requests are dispatched + return all(is_dispatched[is_must_dispatch]) + return true +end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl b/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl new file mode 100644 index 0000000..941468a --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl @@ -0,0 +1,15 @@ +function get_features_meanTimeToRequests(env::DVSPEnv) + quantiles = [0.5] + a = env.config.static_instance.duration[ + env.customer_index[.!env.request_is_dispatched], 2:end + ] + quantileTimeToRequests = mapslices(x -> quantile(x, quantiles), a; dims=2) + return quantileTimeToRequests +end + +function compute_2D_features(env::DVSPEnv) + state = env.state + timeDepotRequest = state.instance.duration[:, 1][state.is_postponable] + quantileTimeToRequests = get_features_meanTimeToRequests(env)[state.is_postponable] + return hcat(timeDepotRequest, quantileTimeToRequests)' +end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl b/src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl new file mode 100644 index 0000000..8f80a44 --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl @@ -0,0 +1,37 @@ +function load_VSP_dataset( + datadir::String; model_builder=highs_model, use_2D_features=false, kwargs... +) + instances_files = filtered_readdir(datadir) + X = Tuple{Matrix{Float32},VSPState{VSPInstance{Float64}}}[] + Y = BitMatrix[] + + for (i, f) in enumerate(instances_files) + static_instance = read_vsp_instance((joinpath(datadir, f))) + env = DVSPEnv(static_instance; seed=i, kwargs...) + + # Compute the anticipative policy + routes_anticipative = anticipative_solver(env; model_builder) + reset!(env) + for e in eachindex(routes_anticipative) + next_epoch!(env) + # Store the state + state = env.state + features = Matrix( + use_2D_features ? compute_2D_features(env) : compute_features(env) + ) + push!(X, (features, state)) + routes = routes_anticipative[e] + # Store the solution + push!( + Y, + VSPSolution( + state_route_from_env_routes(env, routes); + max_index=nb_locations(state.instance), + ).edge_matrix, + ) + # Update the environment + apply_decision!(env, routes) + end + end + return X, Y +end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl b/src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl new file mode 100644 index 0000000..348b816 --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl @@ -0,0 +1,75 @@ +function get_features_quantileTimeToRequests(env::DVSPEnv) + quantiles = [i * 0.1 for i in 1:9] + a = env.config.static_instance.duration[ + env.customer_index[.!env.request_is_dispatched], 2:end + ] + quantileTimeToRequests = mapslices(x -> quantile(x, quantiles), a; dims=2) + return quantileTimeToRequests +end + +function compute_model_free_features(state::VSPState; env::DVSPEnv) + (; instance, is_postponable) = state + + startTimes = instance.start_time + endTimes = startTimes .+ instance.service_time + timeDepotRequest = instance.duration[:, 1] + timeRequestDepot = instance.duration[1, :] + + slack_next_epoch = startTimes .- env.config.epoch_duration + + model_free_features = hcat( + startTimes[is_postponable], # 1 + endTimes[is_postponable], # 2 + timeDepotRequest[is_postponable], # 3 + timeRequestDepot[is_postponable], # 4 + slack_next_epoch[is_postponable], # 5-14 + ) + return model_free_features +end + +function compute_model_aware_features(state::VSPState; env::DVSPEnv) + quantileTimeToRequests = get_features_quantileTimeToRequests(env) + model_aware_features = quantileTimeToRequests + return model_aware_features[state.is_postponable, :] +end + +function compute_features(env::DVSPEnv) + state = env.state + model_free_features = compute_model_free_features(state; env) + model_aware_features = compute_model_aware_features(state; env) + return hcat(model_free_features, model_aware_features)' +end + +# ? why is this needed +function model_free_features_critic(state::VSPState; env::DVSPEnv) + (; instance) = state + startTimes = instance.start_time + endTimes = instance.service_time .+ instance.start_time + timeDepotRequest = instance.duration[:, 1] + timeRequestDepot = instance.duration[1, :] + slack_next_epoch = startTimes .- env.config.epoch_duration + model_free_features = hcat( + startTimes, endTimes, timeDepotRequest, timeRequestDepot, slack_next_epoch + ) + return model_free_features +end + +# ? +function compute_critic_features(env::DVSPEnv) + state = env.state + model_free_features = model_free_features_critic(state; env) + model_aware_features = get_features_quantileTimeToRequests(env) + postpon = state.is_postponable + return hcat(model_free_features, model_aware_features, postpon)' +end + +# ? +function compute_critic_2D_features(env::DVSPEnv) + state = env.state + timeDepotRequest = state.instance.duration[:, 1] + quantileTimeToRequests = get_features_meanTimeToRequests(env) + postpon = state.is_postponable + # time_postpon = timeDepotRequest .* postpon + # quant_postpon = quantileTimeToRequests .* postpon + return hcat(timeDepotRequest, quantileTimeToRequests, postpon)' +end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl new file mode 100644 index 0000000..63d8030 --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl @@ -0,0 +1,33 @@ +""" +$TYPEDEF + +Abstract type for dynamic VSP policies. +""" +abstract type AbstractDynamicVSPPolicy <: AbstractDynamicPolicy end + +""" +$TYPEDSIGNATURES + +Apply the policy to the environment. +""" +function run_policy!( + π::AbstractDynamicVSPPolicy, env::DVSPEnv; check_feasibility=true, kwargs... +) + # reset environment, and initialize variables + reset!(env) + total_cost = 0 + epoch_routes = Vector{Vector{Int}}[] + + # epoch loop + while !is_terminated(env) + next_epoch!(env) + state_routes = π(env; kwargs...) + check_feasibility && @assert is_feasible(get_state(env), state_routes) + env_routes = env_routes_from_state_routes(env, state_routes) + push!(epoch_routes, env_routes) + local_cost = apply_decision!(env, env_routes) + total_cost += local_cost + end + + return total_cost, epoch_routes +end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl new file mode 100644 index 0000000..b6751c7 --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl @@ -0,0 +1,18 @@ +""" +$TYPEDEF + +Anticipative policy for the Dynamic Vehicle Scheduling Problem. +""" +struct AnticipativeVSPPolicy <: AbstractDynamicPolicy end + +""" +$TYPEDSIGNATURES + +Apply the anticipative policy to the environment. +""" +function run_policy!(::AnticipativeVSPPolicy, env::DVSPEnv; model_builder=highs_model) + routes_anticipative = anticipative_solver(env; model_builder) + duration = env.config.static_instance.duration[env.customer_index, env.customer_index] + anticipative_costs = [cost(routes, duration) for routes in routes_anticipative] + return sum(anticipative_costs), routes_anticipative +end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl new file mode 100644 index 0000000..f6c1654 --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl @@ -0,0 +1,34 @@ +""" +$TYPEDEF + +Greedy policy for the Dynamic Vehicle Scheduling Problem. +Dispatch customers as soon as they appear. +""" +struct GreedyVSPPolicy <: AbstractDynamicVSPPolicy end + +function (π::GreedyVSPPolicy)(env::DVSPEnv; model_builder=highs_model) + nb_postponable_requests = sum(get_state(env).is_postponable) + θ = ones(nb_postponable_requests) * 1e9 + routes = prize_collecting_vsp(θ; instance=get_state(env), model_builder) + return routes +end + +# function run_policy!(π::GreedyVSPPolicy, env::DVSPEnv; check_feasibility=true, kwargs...) +# # reset environment, and initialize variables +# reset!(env) +# total_cost = 0 +# epoch_routes = Vector{Vector{Int}}[] + +# # epoch loop +# while !is_terminated(env) +# next_epoch!(env) +# state_routes = π(env; kwargs...) +# check_feasibility && @assert is_feasible(get_state(env), state_routes) +# env_routes = env_routes_from_state_routes(env, state_routes) +# push!(epoch_routes, env_routes) +# local_cost = apply_decision!(env, env_routes) +# total_cost += local_cost +# end + +# return total_cost, epoch_routes +# end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl new file mode 100644 index 0000000..0a3708f --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl @@ -0,0 +1,53 @@ +""" +$TYPEDEF + +Kleopatra policy for the Dynamic Vehicle Scheduling Problem. +""" +struct KleopatraVSPPolicy{P} <: AbstractDynamicVSPPolicy + prize_predictor::P + has_2D_features::Bool +end + +""" +$TYPEDSIGNATURES + +Custom constructor for [`KleopatraVSPPolicy`](@ref). +""" +function KleopatraVSPPolicy(prize_predictor; has_2D_features=nothing) + has_2D_features = if isnothing(has_2D_features) + size(prize_predictor[1].weight, 2) == 2 + else + has_2D_features + end + return KleopatraVSPPolicy(prize_predictor, has_2D_features) +end + +function (π::KleopatraVSPPolicy)(env::DVSPEnv; model_builder=highs_model) + (; prize_predictor, has_2D_features) = π + x = has_2D_features ? compute_2D_features(env) : compute_features(env) + θ = prize_predictor(x) + routes = prize_collecting_vsp(θ; instance=get_state(env), model_builder) + return routes +end + +# function run_policy!( +# π::KleopatraVSP, env::DVSPEnv; check_feasibility=true, model_builder=highs_model +# ) +# # reset environment, and initialize variables +# reset!(env) +# total_cost = 0 +# epoch_routes = Vector{Vector{Int}}[] + +# # epoch loop +# while !is_terminated(env) +# next_epoch!(env) +# state_routes = π(env; model_builder) +# check_feasibility && @assert is_feasible(get_state(env), state_routes) +# env_routes = env_routes_from_state_routes(env, state_routes) +# push!(epoch_routes, env_routes) +# local_cost = apply_decision!(env, env_routes) +# total_cost += local_cost +# end + +# return total_cost, epoch_routes +# end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl new file mode 100644 index 0000000..5ce71ca --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl @@ -0,0 +1,34 @@ +""" +$TYPEDEF + +Lazy policy for the Dynamic Vehicle Scheduling Problem. +Dispatch customers only when necessary (i.e. must-dispatch). +""" +struct LazyVSPPolicy <: AbstractDynamicVSPPolicy end + +function (π::LazyVSPPolicy)(env::DVSPEnv; model_builder=highs_model) + nb_postponable_requests = sum(get_state(env).is_postponable) + θ = ones(nb_postponable_requests) * -1e9 + routes = prize_collecting_vsp(θ; instance=get_state(env), model_builder) + return routes +end + +# function run_policy!(π::LazyVSPPolicy, env::DVSPEnv; check_feasibility=true, kwargs...) +# # reset environment, and initialize variables +# reset!(env) +# total_cost = 0 +# epoch_routes = Vector{Vector{Int}}[] + +# # epoch loop +# while !is_terminated(env) +# next_epoch!(env) +# state_routes = π(env; kwargs...) +# check_feasibility && @assert is_feasible(get_state(env), state_routes) +# env_routes = env_routes_from_state_routes(env, state_routes) +# push!(epoch_routes, env_routes) +# local_cost = apply_decision!(env, env_routes) +# total_cost += local_cost +# end + +# return total_cost, epoch_routes +# end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/utils.jl b/src/DynamicVehicleScheduling/DynamicVSP/utils.jl new file mode 100644 index 0000000..1be5e4d --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/utils.jl @@ -0,0 +1,11 @@ +""" +$TYPEDEF + +Basic point structure. +""" +struct Point{T} + x::T + y::T +end + +Base.show(io::IO, p::Point) = print(io, "($(p.x), $(p.y))") diff --git a/src/DynamicVehicleScheduling/DynamicVSP/vsp/instance.jl b/src/DynamicVehicleScheduling/DynamicVSP/vsp/instance.jl new file mode 100644 index 0000000..512a0fe --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/vsp/instance.jl @@ -0,0 +1,65 @@ +""" +$TYPEDEF + +Instance data structure for the (deterministic and static) Vehicle Scheduling Problem. + +# Fields +$TYPEDFIELDS +""" +@kwdef struct VSPInstance{T} + "coordinates of the locations. The first one is always the depot." + coordinate::Vector{Point{T}} = Point{Float64}[] + "service time at each location" + service_time::Vector{T} = Float64[] + "start time at each location" + start_time::Vector{T} = Float64[] + "duration matrix between locations" + duration::Matrix{T} = zeros(Float64, 0, 0) +end + +function Base.show(io::IO, instance::VSPInstance) + N = nb_customers(instance) + return print(io, "VSPInstance with $N customers") +end + +""" +$TYPEDSIGNATURES + +Return the number of locations in `instance` (customers + depot). +""" +nb_locations(instance::VSPInstance) = length(instance.coordinate) + +""" +$TYPEDSIGNATURES + +Return the number of customers in `instance` (excluding the depot). +""" +nb_customers(instance::VSPInstance) = nb_locations(instance) - 1 + +""" +$TYPEDSIGNATURES + +Get the service time vector. +""" +service_time(instance::VSPInstance) = instance.service_time + +""" +$TYPEDSIGNATURES + +Get the coordinates vector. +""" +coordinate(instance::VSPInstance) = instance.coordinate + +""" +$TYPEDSIGNATURES + +Get the duration matrix. +""" +duration(instance::VSPInstance) = instance.duration + +""" +$TYPEDSIGNATURES + +Get the start time vector. +""" +start_time(instance::VSPInstance) = instance.start_time diff --git a/src/DynamicVehicleScheduling/DynamicVSP/vsp/parsing.jl b/src/DynamicVehicleScheduling/DynamicVSP/vsp/parsing.jl new file mode 100644 index 0000000..21589fd --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/vsp/parsing.jl @@ -0,0 +1,95 @@ +""" +$TYPEDSIGNATURES + +Create a `VSPInstance` from file `filepath` containing a VRPTW instance. +It uses time window values to compute task times as the middle of the interval. + +Round all values to `Int` if `rounded=true`. +Normalize all time values by the `normalization` parameter. +""" +function read_vsp_instance(filepath::String; rounded::Bool=false, normalization=3600.0) + type = rounded ? Int : Float64 + mode = "" + local edge_weight_type + local edge_weight_format + duration_matrix = Vector{type}[] + nb_locations = 0 + local demand + local service_time + local coordinates + local start_time + + file = open(filepath, "r") + for line in eachline(file) + line = strip(line, [' ', '\n', '\t']) + if line == "" + continue + elseif startswith(line, "DIMENSION") + nb_locations = parse(Int, split(line, " : ")[2]) + demand = zeros(type, nb_locations) + service_time = zeros(type, nb_locations) + coordinates = zeros(type, (nb_locations, 2)) + start_time = zeros(type, nb_locations) + elseif startswith(line, "EDGE_WEIGHT_TYPE") + edge_weight_type = split(line, " : ")[2] + elseif startswith(line, "EDGE_WEIGHT_FORMAT") + edge_weight_format = split(line, " : ")[2] + elseif startswith(line, "NODE_COORD_SECTION") + mode = "coord" + elseif line == "DEMAND_SECTION" + mode = "demand" + elseif line == "DEPOT_SECTION" + mode = "depot" + elseif line == "EDGE_WEIGHT_SECTION" + mode = "edge_weights" + @assert edge_weight_type == "EXPLICIT" + @assert edge_weight_format == "FULL_MATRIX" + elseif line == "TIME_WINDOW_SECTION" + mode = "time_windows" + elseif line == "SERVICE_TIME_SECTION" + mode = "service_t" + elseif line == "EOF" + break + elseif mode == "coord" + node, x, y = split(line) # Split by whitespace or \t, skip duplicate whitespace + node = parse(Int, node) + x, y = (parse(type, x), parse(type, y)) + coordinates[node, :] = [x, y] + elseif mode == "demand" + node, d = split(line) + node, d = parse(Int, node), parse(type, d) + if node == 1 # depot + @assert d == 0 + end + demand[node] = d + elseif mode == "edge_weights" + push!(duration_matrix, [parse(type, e) for e in split(line)]) + elseif mode == "service_t" + node, t = split(line) + node = parse(Int, node) + t = parse(type, t) + if node == 1 # depot + @assert t == 0 + end + service_time[node] = t + elseif mode == "time_windows" + node, l, u = split(line) + node = parse(Int, node) + l, u = parse(type, l), parse(type, u) + start_time[node] = (u + l) / 2 + end + end + close(file) + + duration = mapreduce(permutedims, vcat, duration_matrix) + + coordinate = [ + Point(x / normalization, y / normalization) for + (x, y) in zip(coordinates[:, 1], coordinates[:, 2]) + ] + service_time ./= normalization + start_time ./= normalization + duration ./= normalization + + return VSPInstance(; coordinate, service_time, start_time, duration) +end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/vsp/plot.jl b/src/DynamicVehicleScheduling/DynamicVSP/vsp/plot.jl new file mode 100644 index 0000000..a9f03de --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/vsp/plot.jl @@ -0,0 +1,39 @@ +""" +$TYPEDSIGNATURES + +Plot the given static VSP `instance`. +""" +function plot_instance( + instance::VSPInstance; + customer_markersize=4, + depot_markersize=7, + alpha_depot=0.8, + customer_color=:lightblue, + depot_color=:lightgreen, + kwargs..., +) + x = [p.x for p in instance.coordinate] + y = [p.y for p in instance.coordinate] + + fig = plot(; legend=:topleft, xlabel="x coordinate", ylabel="y coordinate", kwargs...) + scatter!( + fig, + x[2:end], + y[2:end]; + label="Customers", + markercolor=customer_color, + marker=:circle, + markersize=customer_markersize, + ) + scatter!( + fig, + [x[1]], + [y[1]]; + label="Depot", + markercolor=depot_color, + marker=:rect, + markersize=depot_markersize, + alpha=alpha_depot, + ) + return fig +end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/vsp/solution.jl b/src/DynamicVehicleScheduling/DynamicVSP/vsp/solution.jl new file mode 100644 index 0000000..d6fb25e --- /dev/null +++ b/src/DynamicVehicleScheduling/DynamicVSP/vsp/solution.jl @@ -0,0 +1,50 @@ +""" +$TYPEDEF + +Solution for the static Vehicle Scheduling Problem. + +# Fields +$TYPEDFIELDS +""" +struct VSPSolution + "list of routes, each route being a list of request indices in corresponding instance (excluding the depot)." + routes::Vector{Vector{Int}} + "size (nb_locations, nb_locations). `edge_matrix[i, j]` is equal to 1 if a route takes edge `(i, j)`." + edge_matrix::BitMatrix +end + +""" +$TYPEDSIGNATURES + +Get routes from `solution`. +""" +routes(solution::VSPSolution) = solution.routes + +""" +$TYPEDSIGNATURES + +Get edge matrix from `solution`. +""" +edge_matrix(solution::VSPSolution) = solution.edge_matrix + +""" +$TYPEDSIGNATURES + +Build a `VSPSolution` from routes. Set `max_index` to manually define the size of the `edge_index` matrix. +""" +function VSPSolution(routes::Vector{Vector{Int}}; max_index=nothing) + if length(routes) == 0 && isnothing(max_index) + return VSPSolution(routes, falses(0, 0)) + end + N = isnothing(max_index) ? maximum(maximum(route) for route in routes) : max_index + edge_matrix = falses(N, N) + for route in routes + old = 1 + for r in route + edge_matrix[old, r] = true + old = r + end + edge_matrix[old, 1] = true + end + return VSPSolution(routes, edge_matrix) +end diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index ef9cbb9..329ea44 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -1,6 +1,73 @@ module DynamicVehicleScheduling using ..Utils + +using Base: @kwdef +using ChainRulesCore using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES +using Graphs +using HiGHS +using InferOpt +using IterTools: partition +using JSON +using JuMP +using Plots: plot, plot!, scatter! +using Printf: @printf +using Random: AbstractRNG, MersenneTwister, seed!, randperm +using Requires: @require +using Statistics: mean, quantile + +include("utils.jl") +include("dynamic_config.jl") +include("abstract_policy.jl") + +# Dynamic Vehicle Scheduling +include("DynamicVSP/utils.jl") + +include("DynamicVSP/vsp/instance.jl") +include("DynamicVSP/vsp/parsing.jl") +include("DynamicVSP/vsp/solution.jl") +include("DynamicVSP/vsp/plot.jl") + +include("DynamicVSP/environment/state.jl") +include("DynamicVSP/environment/environment.jl") +include("DynamicVSP/environment/plot.jl") + +include("DynamicVSP/algorithms/prize_collecting_vsp.jl") +include("DynamicVSP/algorithms/anticipative_solver.jl") + +include("DynamicVSP/learning/features.jl") +include("DynamicVSP/learning/2d_features.jl") +include("DynamicVSP/learning/dataset.jl") + +include("DynamicVSP/policy/abstract_vsp_policy.jl") +include("DynamicVSP/policy/greedy_policy.jl") +include("DynamicVSP/policy/lazy_policy.jl") +include("DynamicVSP/policy/anticipative_policy.jl") +include("DynamicVSP/policy/kleopatra_policy.jl") + +export highs_model, filtered_readdir + +export solve_hindsight_problem + +export AbstractDynamicPolicy, BasicDynamicPolicy + +export GreedyPolicy, LazyPolicy, RandomPolicy, Kleopatra + +export run_policy + +export compute_features, + compute_2D_features, compute_critic_features, compute_critic_2D_features, load_dataset +export VSPInstance, + read_vsp_instance, start_time, env_routes_from_state_routes, state_route_from_env_routes +export DVSPEnv, prize_collecting_vsp +export anticipative_solver +export VSPSolution +export load_VSP_dataset +export GreedyVSPPolicy, + LazyVSPPolicy, AnticipativeVSPPolicy, run_policy!, KleopatraVSPPolicy +export plot_routes, plot_instance, plot_environment, plot_epoch +export get_state +export nb_epochs, get_epoch_indices end diff --git a/src/DynamicVehicleScheduling/abstract_policy.jl b/src/DynamicVehicleScheduling/abstract_policy.jl new file mode 100644 index 0000000..951efd0 --- /dev/null +++ b/src/DynamicVehicleScheduling/abstract_policy.jl @@ -0,0 +1,5 @@ +abstract type AbstractDynamicPolicy end + +function (π::AbstractDynamicPolicy)(env; kwargs...) + throw("Not implemented") +end diff --git a/src/DynamicVehicleScheduling/dynamic_config.jl b/src/DynamicVehicleScheduling/dynamic_config.jl new file mode 100644 index 0000000..3d052bf --- /dev/null +++ b/src/DynamicVehicleScheduling/dynamic_config.jl @@ -0,0 +1,24 @@ +""" +$TYPEDEF + +Config data structures for dynamic vehicle routing and scheduling problems. + +# Fields +$TYPEDFIELDS +""" +@kwdef struct DynamicConfig{I,S,T} + "static instance to sample arriving requests from" + static_instance::I + "max number of new requests per epoch (rejection sampling)" + max_requests_per_epoch::Int = 100 + "time distance between epoch start and routes start" + Δ_dispatch::T = 3600 + "duration of each epoch" + epoch_duration::T = 3600 + "first epoch index (time = epoch_duration x first_epoch)" + first_epoch::Int + "last epoch index" + last_epoch::Int + "seed for customer sampling" + seed::S +end diff --git a/src/DynamicVehicleScheduling/utils.jl b/src/DynamicVehicleScheduling/utils.jl new file mode 100644 index 0000000..1e17906 --- /dev/null +++ b/src/DynamicVehicleScheduling/utils.jl @@ -0,0 +1,34 @@ +""" +$TYPEDSIGNATURES + +Sample k random different indices from 2 to N+1. +""" +sample_indices(rng::AbstractRNG, k, N) = randperm(rng, N)[1:k] .+ 1 + +""" +$TYPEDSIGNATURES + +Compute the total cost of a set of routes given a distance matrix, i.e. the sum of the distances between each location in the route. +Note that the first location is implicitly assumed to be the depot, and should not appear in the route. +""" +function cost(routes::Vector{Vector{Int}}, duration::AbstractMatrix) + total = zero(eltype(duration)) + for route in routes + current_location = 1 + for r in route + total += duration[current_location, r] + current_location = r + end + total += duration[current_location, 1] + end + return total +end + +# """ +# $TYPEDSIGNATURES + +# Readdir all files in `data` with extension `file_extension`. +# """ +# function filtered_readdir(data, file_extension=".txt"; kwargs...) +# return filter(x -> endswith(x, file_extension), readdir(data; kwargs...)) +# end diff --git a/src/Utils/model_builders.jl b/src/Utils/model_builders.jl index 95df58b..4f0c838 100644 --- a/src/Utils/model_builders.jl +++ b/src/Utils/model_builders.jl @@ -5,7 +5,6 @@ Initialize a HiGHS model (with disabled logging). """ function highs_model() model = Model(HiGHS.Optimizer) - # set_attribute(model, "log_to_console", false) return model end From 2f406e0e85e8ea84f8985deadf30b1854e4d4190 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Tue, 22 Apr 2025 10:29:56 +0200 Subject: [PATCH 03/29] wip --- src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index 329ea44..8f24828 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -3,7 +3,7 @@ module DynamicVehicleScheduling using ..Utils using Base: @kwdef -using ChainRulesCore +# using ChainRulesCore using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES using Graphs using HiGHS From 5a2e85274c9fd685238ae2681e302676eeae10ae Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 3 Jul 2025 09:44:53 +0200 Subject: [PATCH 04/29] Implement generate_sample interface --- Project.toml | 4 ++ src/Argmax/Argmax.jl | 42 ++++++++++++------- src/DecisionFocusedLearningBenchmarks.jl | 6 +-- .../algorithms/prize_collecting_vsp.jl | 16 +++---- .../DynamicVehicleScheduling.jl | 2 +- .../FixedSizeShortestPath.jl | 41 +++++------------- .../PortfolioOptimization.jl | 17 +++++++- src/Ranking/Ranking.jl | 20 ++++----- .../StochasticVehicleScheduling.jl | 39 +++++++---------- src/SubsetSelection/SubsetSelection.jl | 42 ++++++++----------- src/Utils/Utils.jl | 7 ++-- src/Utils/interface.jl | 29 ++++++++++++- test/subset_selection.jl | 7 ++-- 13 files changed, 144 insertions(+), 128 deletions(-) diff --git a/Project.toml b/Project.toml index d9500c6..ed7fde9 100644 --- a/Project.toml +++ b/Project.toml @@ -13,6 +13,8 @@ Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b" Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0" Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9" +IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" JuMP = "4076af6c-e467-56ae-b986-b466b2749572" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Metalhead = "dbeba491-748d-5e0e-a39e-b530a07fa0cc" @@ -37,6 +39,8 @@ Graphs = "1.11" HiGHS = "1.9" Images = "0.26.1" Ipopt = "1.6" +IterTools = "1.10.0" +JSON = "0.21.4" JuMP = "1.22" LinearAlgebra = "1" Metalhead = "0.9.4" diff --git a/src/Argmax/Argmax.jl b/src/Argmax/Argmax.jl index 0864a96..a9590df 100644 --- a/src/Argmax/Argmax.jl +++ b/src/Argmax/Argmax.jl @@ -62,25 +62,39 @@ end """ $TYPEDSIGNATURES - -Generate a dataset of labeled instances for the argmax problem. """ -function Utils.generate_dataset( - bench::ArgmaxBenchmark, dataset_size::Int=10; seed::Int=0, noise_std=0.0 +function Utils.generate_sample( + bench::ArgmaxBenchmark, rng::AbstractRNG; noise_std::Float32=0.0f0 ) (; instance_dim, nb_features, encoder) = bench - rng = MersenneTwister(seed) - features = [randn(rng, Float32, nb_features, instance_dim) for _ in 1:dataset_size] - costs = encoder.(features) - noisy_solutions = [ - one_hot_argmax(θ + noise_std * randn(rng, Float32, instance_dim)) for θ in costs - ] - return [ - DataSample(; x, θ_true, y_true) for - (x, θ_true, y_true) in zip(features, costs, noisy_solutions) - ] + features = randn(rng, Float32, nb_features, instance_dim) + costs = encoder(features) + noisy_solution = one_hot_argmax(costs + noise_std * randn(rng, Float32, instance_dim)) + return DataSample(; x=features, θ_true=costs, y_true=noisy_solution) end +# """ +# $TYPEDSIGNATURES + +# Generate a dataset of labeled instances for the argmax problem. +# """ +# function Utils.generate_dataset( +# bench::ArgmaxBenchmark, dataset_size::Int; noise_std=0.0, kwargs... +# ) +# return Utils.generate_dataset(bench, dataset_size; noise_std=noise_std, kwargs...) +# # (; instance_dim, nb_features, encoder) = bench +# # rng = MersenneTwister(seed) +# # features = [randn(rng, Float32, nb_features, instance_dim) for _ in 1:dataset_size] +# # costs = encoder.(features) +# # noisy_solutions = [ +# # one_hot_argmax(θ + noise_std * randn(rng, Float32, instance_dim)) for θ in costs +# # ] +# # return [ +# # DataSample(; x, θ_true, y_true) for +# # (x, θ_true, y_true) in zip(features, costs, noisy_solutions) +# # ] +# end + """ $TYPEDSIGNATURES diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl index 18cd94f..74bb04a 100644 --- a/src/DecisionFocusedLearningBenchmarks.jl +++ b/src/DecisionFocusedLearningBenchmarks.jl @@ -54,7 +54,7 @@ include("Warcraft/Warcraft.jl") include("FixedSizeShortestPath/FixedSizeShortestPath.jl") include("PortfolioOptimization/PortfolioOptimization.jl") include("StochasticVehicleScheduling/StochasticVehicleScheduling.jl") -include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl") +# include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl") using .Utils using .Argmax @@ -64,10 +64,10 @@ using .Warcraft using .FixedSizeShortestPath using .PortfolioOptimization using .StochasticVehicleScheduling -using .DynamicVehicleScheduling +# using .DynamicVehicleScheduling # Interface -export AbstractBenchmark, DataSample +export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample export generate_dataset export generate_statistical_model export generate_maximizer, maximizer_kwargs diff --git a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl index d1fbcce..531169c 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl +++ b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl @@ -205,11 +205,11 @@ function _objective_value(θ, routes; instance) return -total, g end -function ChainRulesCore.rrule(::typeof(my_objective_value), θ, routes; instance) - total, g = _objective_value(θ, routes; instance) - function pullback(dy) - g = g .* dy - return NoTangent(), g, NoTangent() - end - return total, pullback -end +# function ChainRulesCore.rrule(::typeof(my_objective_value), θ, routes; instance) +# total, g = _objective_value(θ, routes; instance) +# function pullback(dy) +# g = g .* dy +# return NoTangent(), g, NoTangent() +# end +# return total, pullback +# end diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index 8f24828..e1fddbc 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -7,7 +7,7 @@ using Base: @kwdef using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES using Graphs using HiGHS -using InferOpt +# using InferOpt using IterTools: partition using JSON using JuMP diff --git a/src/FixedSizeShortestPath/FixedSizeShortestPath.jl b/src/FixedSizeShortestPath/FixedSizeShortestPath.jl index fd60de2..ed799ea 100644 --- a/src/FixedSizeShortestPath/FixedSizeShortestPath.jl +++ b/src/FixedSizeShortestPath/FixedSizeShortestPath.jl @@ -103,45 +103,24 @@ function Utils.generate_maximizer(bench::FixedSizeShortestPathBenchmark; use_dij return shortest_path_maximizer end -""" -$TYPEDSIGNATURES - -Generate dataset for the shortest path problem. -""" -function Utils.generate_dataset( - bench::FixedSizeShortestPathBenchmark, - dataset_size::Int=10; - seed::Int=0, - type::Type=Float32, +function Utils.generate_sample( + bench::FixedSizeShortestPathBenchmark, rng::AbstractRNG; type::Type=Float32 ) - # Set seed - rng = MersenneTwister(seed) (; graph, p, deg, ν) = bench - + features = randn(rng, Float32, bench.p) E = Graphs.ne(graph) - - # Features - features = [randn(rng, type, p) for _ in 1:dataset_size] - # True weights B = rand(rng, Bernoulli(0.5), E, p) ξ = if ν == 0.0 - [ones(type, E) for _ in 1:dataset_size] + ones(type, E) else - [rand(rng, Uniform{type}(1 - ν, 1 + ν), E) for _ in 1:dataset_size] + rand(rng, Uniform{type}(1 - ν, 1 + ν), E) end - costs = [ - -(1 .+ (3 .+ B * zᵢ ./ type(sqrt(p))) .^ deg) .* ξᵢ for (ξᵢ, zᵢ) in zip(ξ, features) - ] - - shortest_path_maximizer = Utils.generate_maximizer(bench) - - # Label solutions - solutions = shortest_path_maximizer.(costs) - return [ - DataSample(; x, θ_true, y_true) for - (x, θ_true, y_true) in zip(features, costs, solutions) - ] + costs = -(1 .+ (3 .+ B * features ./ type(sqrt(p))) .^ deg) .* ξ + + maximizer = Utils.generate_maximizer(bench) + solution = maximizer(costs) + return DataSample(; x=features, θ_true=costs, y_true=solution) end """ diff --git a/src/PortfolioOptimization/PortfolioOptimization.jl b/src/PortfolioOptimization/PortfolioOptimization.jl index 308770a..32c32bc 100644 --- a/src/PortfolioOptimization/PortfolioOptimization.jl +++ b/src/PortfolioOptimization/PortfolioOptimization.jl @@ -7,7 +7,7 @@ using Flux: Chain, Dense using Ipopt: Ipopt using JuMP: @variable, @objective, @constraint, optimize!, value, Model, set_silent using LinearAlgebra: I -using Random: MersenneTwister +using Random: AbstractRNG, MersenneTwister """ $TYPEDEF @@ -82,6 +82,21 @@ function Utils.generate_maximizer(bench::PortfolioOptimizationBenchmark) return portfolio_maximizer end +function Utils.generate_sample( + bench::PortfolioOptimizationBenchmark, rng::AbstractRNG; type::Type=Float32 +) + (; d, p, deg, ν, L, f) = bench + features = randn(rng, type, p, d) + B = rand(rng, Bernoulli(0.5), d, p) + c̄ = (0.05 / type(sqrt(p)) .* B * features .+ 0.1^(1 / deg)) .^ deg + costs = c̄ .+ L * f .+ 0.01 * ν * randn(rng, type, d) + + maximizer = Utils.generate_maximizer(bench) + solution = maximizer(costs) + + return DataSample(; x=features, θ_true=c̄, y_true=solution) +end + """ $TYPEDSIGNATURES diff --git a/src/Ranking/Ranking.jl b/src/Ranking/Ranking.jl index 8b93b8a..c6ec398 100644 --- a/src/Ranking/Ranking.jl +++ b/src/Ranking/Ranking.jl @@ -61,22 +61,16 @@ end """ $TYPEDSIGNATURES -Generate a dataset of labeled instances for the ranking problem. +Generate a labeled sample for the ranking problem. """ -function Utils.generate_dataset( - bench::RankingBenchmark, dataset_size::Int=10; seed::Int=0, noise_std=0.0 +function Utils.generate_sample( + bench::RankingBenchmark, rng::AbstractRNG; noise_std::Float32=0.0f0 ) (; instance_dim, nb_features, encoder) = bench - rng = MersenneTwister(seed) - features = [randn(rng, Float32, nb_features, instance_dim) for _ in 1:dataset_size] - costs = encoder.(features) - noisy_solutions = [ - ranking(θ .+ noise_std * randn(rng, Float32, instance_dim)) for θ in costs - ] - return [ - DataSample(; x, θ_true, y_true) for - (x, θ_true, y_true) in zip(features, costs, noisy_solutions) - ] + features = randn(rng, Float32, nb_features, instance_dim) + costs = encoder(features) + noisy_solution = ranking(costs .+ noise_std * randn(rng, Float32, instance_dim)) + return DataSample(; x=features, θ_true=costs, y_true=noisy_solution) end """ diff --git a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl index 150f147..83da492 100644 --- a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl +++ b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl @@ -73,43 +73,32 @@ end """ $TYPEDSIGNATURES -Create a dataset of `dataset_size` instances for the given `StochasticVehicleSchedulingBenchmark`. -If you want to not add label solutions in the dataset, set `compute_solutions=false`. +Generate a sample for the given `StochasticVehicleSchedulingBenchmark`. +If you want to not add label solutions in the sample, set `compute_solutions=false`. By default, they will be computed using column generation. Note that computing solutions can be time-consuming, especially for large instances. You can also use instead `compact_mip` or `compact_linearized_mip` as the algorithm to compute solutions. If you want to provide a custom algorithm to compute solutions, you can pass it as the `algorithm` keyword argument. If `algorithm` takes keyword arguments, you can pass them as well directly in `kwargs...`. -If `store_city=false`, the coordinates and unnecessary information about instances will not be stored in the dataset. +If `store_city=false`, the coordinates and unnecessary information about instances will not be stored in the sample. """ -function Utils.generate_dataset( +function Utils.generate_sample( benchmark::StochasticVehicleSchedulingBenchmark, - dataset_size::Int; + rng::AbstractRNG; + store_city=true, compute_solutions=true, - seed=nothing, - rng=MersenneTwister(0), algorithm=column_generation_algorithm, - store_city=true, kwargs..., ) (; nb_tasks, nb_scenarios) = benchmark - Random.seed!(rng, seed) - instances = [ - Instance(; nb_tasks, nb_scenarios, rng, store_city) for _ in 1:dataset_size - ] - features = get_features.(instances) - if compute_solutions - solutions = [algorithm(instance; kwargs...).value for instance in instances] - return [ - DataSample(; x=feature, instance, y_true=solution) for - (instance, feature, solution) in zip(instances, features, solutions) - ] + instance = Instance(; nb_tasks, nb_scenarios, rng, store_city) + x = get_features(instance) + y_true = if compute_solutions + algorithm(instance; kwargs...).value # TODO: modify algorithms to directly return the solution + else + nothing end - # else - return [ - DataSample(; x=feature, instance) for - (instance, feature) in zip(instances, features) - ] + return DataSample(; x, instance, y_true) end """ @@ -126,7 +115,7 @@ end $TYPEDSIGNATURES """ function Utils.generate_maximizer( - bench::StochasticVehicleSchedulingBenchmark; model_builder=highs_model + ::StochasticVehicleSchedulingBenchmark; model_builder=highs_model ) return StochasticVechicleSchedulingMaximizer(model_builder) end diff --git a/src/SubsetSelection/SubsetSelection.jl b/src/SubsetSelection/SubsetSelection.jl index 0e738a5..085324d 100644 --- a/src/SubsetSelection/SubsetSelection.jl +++ b/src/SubsetSelection/SubsetSelection.jl @@ -17,11 +17,13 @@ without knowing their values, but only observing some features. # Fields $TYPEDFIELDS """ -struct SubsetSelectionBenchmark <: AbstractBenchmark +struct SubsetSelectionBenchmark{M} <: AbstractBenchmark "total number of items" n::Int "number of items to select" k::Int + "hidden unknown mapping from features to costs" + mapping::M end function Base.show(io::IO, bench::SubsetSelectionBenchmark) @@ -29,9 +31,14 @@ function Base.show(io::IO, bench::SubsetSelectionBenchmark) return print(io, "SubsetSelectionBenchmark(n=$n, k=$k)") end -function SubsetSelectionBenchmark(; n::Int=25, k::Int=5) +function SubsetSelectionBenchmark(; n::Int=25, k::Int=5, identity_mapping::Bool=true) @assert n >= k "number of items n must be greater than k" - return SubsetSelectionBenchmark(n, k) + mapping = if identity_mapping + copy + else + Dense(n => n; bias=false) + end + return SubsetSelectionBenchmark(n, k, mapping) end function top_k(v::AbstractVector, k::Int) @@ -54,29 +61,14 @@ end """ $TYPEDSIGNATURES -Generate a dataset of labeled instances for the subset selection problem. -The mapping between features and cost is identity. +Generate a labeled instance for the subset selection problem. """ -function Utils.generate_dataset( - bench::SubsetSelectionBenchmark, - dataset_size::Int=10; - seed::Int=0, - identity_mapping=true, -) - (; n, k) = bench - rng = MersenneTwister(seed) - features = [randn(rng, Float32, n) for _ in 1:dataset_size] - costs = if identity_mapping - copy(features) # we assume that the cost is the same as the feature - else - mapping = Dense(n => n; bias=false) - mapping.(features) - end - solutions = top_k.(costs, k) - return [ - DataSample(; x, θ_true, y_true) for - (x, θ_true, y_true) in zip(features, costs, solutions) - ] +function Utils.generate_sample(bench::SubsetSelectionBenchmark, rng::AbstractRNG) + (; n, k, mapping) = bench + features = randn(rng, Float32, n) + costs = mapping(features) + solution = top_k(costs, k) + return DataSample(; x=features, θ_true=costs, y_true=solution) end """ diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl index 60b5b92..58bc161 100644 --- a/src/Utils/Utils.jl +++ b/src/Utils/Utils.jl @@ -5,6 +5,7 @@ using Flux: softplus using HiGHS: HiGHS using JuMP: Model using LinearAlgebra: dot +using Random: Random, MersenneTwister using SCIP: SCIP using SimpleWeightedGraphs: SimpleWeightedDiGraph using StatsBase: StatsBase @@ -18,9 +19,9 @@ include("model_builders.jl") export DataSample -export AbstractBenchmark -export generate_dataset, - generate_statistical_model, generate_maximizer, plot_data, compute_gap +export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark +export generate_dataset, generate_statistical_model, generate_maximizer, generate_sample +export plot_data, compute_gap export maximizer_kwargs export grid_graph, get_path, path_to_matrix export neg_tensor, squeeze_last_dims, average_tensor diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl index 96a2a39..5f4188c 100644 --- a/src/Utils/interface.jl +++ b/src/Utils/interface.jl @@ -15,13 +15,40 @@ The following methods are optional: """ abstract type AbstractBenchmark end +""" +$TYPEDEF +""" +abstract type AbstractStochasticBenchmark <: AbstractBenchmark end + +""" +$TYPEDEF +""" +abstract type AbstractDynamicBenchmark <: AbstractStochasticBenchmark end + +""" + generate_sample(::AbstractBenchmark, rng::AbstractRNG; kwargs...) + +Do not always exist, interface to make [`generate_dataset`](@ref) work. +Either implement this or generate_dataset. +""" +function generate_sample end + """ generate_dataset(::AbstractBenchmark, dataset_size::Int; kwargs...) -> Vector{<:DataSample} Generate a `Vector` of [`DataSample`](@ref) of length `dataset_size` for given benchmark. Content of the dataset can be visualized using [`plot_data`](@ref), when it applies. """ -function generate_dataset end +function generate_dataset( + bench::AbstractBenchmark, + dataset_size::Int; + seed=nothing, + rng=MersenneTwister(0), + kwargs..., +) + Random.seed!(rng, seed) + return [generate_sample(bench, rng; kwargs...) for _ in 1:dataset_size] +end """ generate_maximizer(::AbstractBenchmark; kwargs...) diff --git a/test/subset_selection.jl b/test/subset_selection.jl index 694f7f4..d59ae54 100644 --- a/test/subset_selection.jl +++ b/test/subset_selection.jl @@ -4,14 +4,15 @@ n = 25 k = 5 - b = SubsetSelectionBenchmark(; n=n, k=k) + b_identity = SubsetSelectionBenchmark(; n=n, k=k) + b = SubsetSelectionBenchmark(; n=n, k=k, identity_mapping=false) io = IOBuffer() show(io, b) @test String(take!(io)) == "SubsetSelectionBenchmark(n=25, k=5)" - dataset = generate_dataset(b, 50) - dataset2 = generate_dataset(b, 50; identity_mapping=false) + dataset = generate_dataset(b_identity, 50) + dataset2 = generate_dataset(b, 50) model = generate_statistical_model(b) maximizer = generate_maximizer(b) From fe9482fef8a4e7a1c9f16135959d1fbc464d243e Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 3 Jul 2025 10:06:13 +0200 Subject: [PATCH 05/29] update docstrings --- src/Argmax/Argmax.jl | 26 +++------------- .../FixedSizeShortestPath.jl | 5 ++++ .../PortfolioOptimization.jl | 5 ++++ .../StochasticVehicleScheduling.jl | 2 +- .../solution/algorithms/column_generation.jl | 2 +- .../solution/algorithms/deterministic_mip.jl | 2 +- .../solution/algorithms/local_search.jl | 2 +- .../solution/algorithms/mip.jl | 4 +-- src/Utils/interface.jl | 30 +++++++++++++++---- 9 files changed, 44 insertions(+), 34 deletions(-) diff --git a/src/Argmax/Argmax.jl b/src/Argmax/Argmax.jl index a9590df..27663c5 100644 --- a/src/Argmax/Argmax.jl +++ b/src/Argmax/Argmax.jl @@ -62,6 +62,10 @@ end """ $TYPEDSIGNATURES + +Generate a data sample for the argmax benchmark. +This function generates a random feature matrix, computes the costs using the encoder, +and adds noise to the costs before computing a target solution. """ function Utils.generate_sample( bench::ArgmaxBenchmark, rng::AbstractRNG; noise_std::Float32=0.0f0 @@ -73,28 +77,6 @@ function Utils.generate_sample( return DataSample(; x=features, θ_true=costs, y_true=noisy_solution) end -# """ -# $TYPEDSIGNATURES - -# Generate a dataset of labeled instances for the argmax problem. -# """ -# function Utils.generate_dataset( -# bench::ArgmaxBenchmark, dataset_size::Int; noise_std=0.0, kwargs... -# ) -# return Utils.generate_dataset(bench, dataset_size; noise_std=noise_std, kwargs...) -# # (; instance_dim, nb_features, encoder) = bench -# # rng = MersenneTwister(seed) -# # features = [randn(rng, Float32, nb_features, instance_dim) for _ in 1:dataset_size] -# # costs = encoder.(features) -# # noisy_solutions = [ -# # one_hot_argmax(θ + noise_std * randn(rng, Float32, instance_dim)) for θ in costs -# # ] -# # return [ -# # DataSample(; x, θ_true, y_true) for -# # (x, θ_true, y_true) in zip(features, costs, noisy_solutions) -# # ] -# end - """ $TYPEDSIGNATURES diff --git a/src/FixedSizeShortestPath/FixedSizeShortestPath.jl b/src/FixedSizeShortestPath/FixedSizeShortestPath.jl index ed799ea..46a22fe 100644 --- a/src/FixedSizeShortestPath/FixedSizeShortestPath.jl +++ b/src/FixedSizeShortestPath/FixedSizeShortestPath.jl @@ -103,6 +103,11 @@ function Utils.generate_maximizer(bench::FixedSizeShortestPathBenchmark; use_dij return shortest_path_maximizer end +""" +$TYPEDSIGNATURES + +Generate a labeled sample for the fixed size shortest path benchmark. +""" function Utils.generate_sample( bench::FixedSizeShortestPathBenchmark, rng::AbstractRNG; type::Type=Float32 ) diff --git a/src/PortfolioOptimization/PortfolioOptimization.jl b/src/PortfolioOptimization/PortfolioOptimization.jl index 32c32bc..7d1b577 100644 --- a/src/PortfolioOptimization/PortfolioOptimization.jl +++ b/src/PortfolioOptimization/PortfolioOptimization.jl @@ -82,6 +82,11 @@ function Utils.generate_maximizer(bench::PortfolioOptimizationBenchmark) return portfolio_maximizer end +""" +$TYPEDSIGNATURES + +Generate a labeled sample for the portfolio optimization problem. +""" function Utils.generate_sample( bench::PortfolioOptimizationBenchmark, rng::AbstractRNG; type::Type=Float32 ) diff --git a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl index 83da492..e148d5e 100644 --- a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl +++ b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl @@ -94,7 +94,7 @@ function Utils.generate_sample( instance = Instance(; nb_tasks, nb_scenarios, rng, store_city) x = get_features(instance) y_true = if compute_solutions - algorithm(instance; kwargs...).value # TODO: modify algorithms to directly return the solution + algorithm(instance; kwargs...) else nothing end diff --git a/src/StochasticVehicleScheduling/solution/algorithms/column_generation.jl b/src/StochasticVehicleScheduling/solution/algorithms/column_generation.jl index dbd2fd6..1bfbe1f 100644 --- a/src/StochasticVehicleScheduling/solution/algorithms/column_generation.jl +++ b/src/StochasticVehicleScheduling/solution/algorithms/column_generation.jl @@ -189,5 +189,5 @@ function column_generation_algorithm( end col_solution = solution_from_paths(sol, instance) - return col_solution + return col_solution.value end diff --git a/src/StochasticVehicleScheduling/solution/algorithms/deterministic_mip.jl b/src/StochasticVehicleScheduling/solution/algorithms/deterministic_mip.jl index 5f68190..9e14861 100644 --- a/src/StochasticVehicleScheduling/solution/algorithms/deterministic_mip.jl +++ b/src/StochasticVehicleScheduling/solution/algorithms/deterministic_mip.jl @@ -41,5 +41,5 @@ function deterministic_mip(instance::Instance; model_builder=highs_model, silent solution = value.(y) sol = solution_from_JuMP_array(solution, graph) - return sol + return sol.value end diff --git a/src/StochasticVehicleScheduling/solution/algorithms/local_search.jl b/src/StochasticVehicleScheduling/solution/algorithms/local_search.jl index b4f0f0f..49ae00c 100644 --- a/src/StochasticVehicleScheduling/solution/algorithms/local_search.jl +++ b/src/StochasticVehicleScheduling/solution/algorithms/local_search.jl @@ -153,5 +153,5 @@ Very simple heuristic, using [`local_search`](@ref) function local_search(instance::Instance; num_iterations=1000) _, initial_solution = solve_deterministic_VSP(instance) sol, _, _, _ = _local_search(initial_solution, instance; nb_it=num_iterations) - return sol + return sol.value end diff --git a/src/StochasticVehicleScheduling/solution/algorithms/mip.jl b/src/StochasticVehicleScheduling/solution/algorithms/mip.jl index e202569..10b0b40 100644 --- a/src/StochasticVehicleScheduling/solution/algorithms/mip.jl +++ b/src/StochasticVehicleScheduling/solution/algorithms/mip.jl @@ -78,7 +78,7 @@ function compact_linearized_mip( solution = value.(y) sol = solution_from_JuMP_array(solution, graph) - return sol + return sol.value end """ @@ -149,5 +149,5 @@ function compact_mip( solution = value.(y) sol = solution_from_JuMP_array(solution, graph) - return sol + return sol.value end diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl index 5f4188c..e6ecb17 100644 --- a/src/Utils/interface.jl +++ b/src/Utils/interface.jl @@ -1,10 +1,10 @@ """ $TYPEDEF -Abstract type interface for a benchmark problem. +Abstract type interface for benchmark problems. The following methods are mandatory for benchmarks: -- [`generate_dataset`](@ref) +- [`generate_dataset`](@ref) or [`generate_sample`](@ref) - [`generate_statistical_model`](@ref) - [`generate_maximizer`](@ref) @@ -17,27 +17,45 @@ abstract type AbstractBenchmark end """ $TYPEDEF + +Abstract type interface for stochastic benchmark problems. +This type should be used for benchmarks that involve single stage stochastic optimization problems. + +It follows the same interface as [`AbstractBenchmark`](@ref), with the addition of the following methods: +TODO """ abstract type AbstractStochasticBenchmark <: AbstractBenchmark end """ $TYPEDEF + +Abstract type interface for dynamic benchmark problems. +This type should be used for benchmarks that involve multi-stage stochastic optimization problems. + +It follows the same interface as [`AbstractStochasticBenchmark`](@ref), with the addition of the following methods: +TODO """ abstract type AbstractDynamicBenchmark <: AbstractStochasticBenchmark end """ - generate_sample(::AbstractBenchmark, rng::AbstractRNG; kwargs...) + generate_sample(::AbstractBenchmark, rng::AbstractRNG; kwargs...) -> DataSample -Do not always exist, interface to make [`generate_dataset`](@ref) work. -Either implement this or generate_dataset. +Generate a single [`DataSample`](@ref) for given benchmark. +This is a low-level function that is used by [`generate_dataset`](@ref) to create +a dataset of samples. It is not mandatory to implement this method, but it is +recommended for benchmarks that have a well-defined way to generate individual samples. +An alternative is to directly implement [`generate_dataset`](@ref) to create a dataset +without generating individual samples. """ function generate_sample end """ generate_dataset(::AbstractBenchmark, dataset_size::Int; kwargs...) -> Vector{<:DataSample} -Generate a `Vector` of [`DataSample`](@ref) of length `dataset_size` for given benchmark. +Generate a `Vector` of [`DataSample`](@ref) of length `dataset_size` for given benchmark. Content of the dataset can be visualized using [`plot_data`](@ref), when it applies. + +By default, it uses [`generate_sample`](@ref) to create each sample in the dataset, and passes any keyword arguments to it. """ function generate_dataset( bench::AbstractBenchmark, From 48c7a214b5dbecefe54c073752154d8a540312cf Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 3 Jul 2025 16:58:15 +0200 Subject: [PATCH 06/29] implement DVSP under the new interface --- Project.toml | 2 + src/DecisionFocusedLearningBenchmarks.jl | 11 +- .../algorithms/anticipative_solver.jl | 34 +- .../algorithms/prize_collecting_vsp.jl | 22 +- .../DynamicVSP/environment/environment.jl | 293 ------------------ .../DynamicVSP/environment/state.jl | 89 ------ .../DynamicVSP/learning/2d_features.jl | 6 +- .../DynamicVSP/learning/dataset.jl | 4 +- .../DynamicVSP/learning/features.jl | 66 ++-- .../DynamicVSP/policy/abstract_vsp_policy.jl | 17 +- .../DynamicVSP/policy/anticipative_policy.jl | 9 +- .../DynamicVSP/policy/greedy_policy.jl | 26 +- .../DynamicVSP/policy/kleopatra_policy.jl | 25 +- .../DynamicVSP/policy/lazy_policy.jl | 25 +- .../DynamicVSP/utils.jl | 11 - .../DynamicVehicleScheduling.jl | 73 +++-- .../dynamic_config.jl | 24 -- .../environment/environment.jl | 86 +++++ .../environment/instance.jl | 52 ++++ .../{DynamicVSP => }/environment/plot.jl | 2 +- .../environment/scenario.jl | 47 +++ .../environment/state.jl | 201 ++++++++++++ .../vsp => static_vsp}/instance.jl | 18 +- .../{DynamicVSP/vsp => static_vsp}/parsing.jl | 2 +- .../{DynamicVSP/vsp => static_vsp}/plot.jl | 2 +- .../vsp => static_vsp}/solution.jl | 0 src/DynamicVehicleScheduling/utils.jl | 12 + .../StochasticVehicleScheduling.jl | 6 +- src/Utils/interface.jl | 55 ++-- src/Warcraft/Warcraft.jl | 2 +- 30 files changed, 589 insertions(+), 633 deletions(-) delete mode 100644 src/DynamicVehicleScheduling/DynamicVSP/environment/environment.jl delete mode 100644 src/DynamicVehicleScheduling/DynamicVSP/environment/state.jl delete mode 100644 src/DynamicVehicleScheduling/DynamicVSP/utils.jl delete mode 100644 src/DynamicVehicleScheduling/dynamic_config.jl create mode 100644 src/DynamicVehicleScheduling/environment/environment.jl create mode 100644 src/DynamicVehicleScheduling/environment/instance.jl rename src/DynamicVehicleScheduling/{DynamicVSP => }/environment/plot.jl (98%) create mode 100644 src/DynamicVehicleScheduling/environment/scenario.jl create mode 100644 src/DynamicVehicleScheduling/environment/state.jl rename src/DynamicVehicleScheduling/{DynamicVSP/vsp => static_vsp}/instance.jl (65%) rename src/DynamicVehicleScheduling/{DynamicVSP/vsp => static_vsp}/parsing.jl (97%) rename src/DynamicVehicleScheduling/{DynamicVSP/vsp => static_vsp}/plot.jl (96%) rename src/DynamicVehicleScheduling/{DynamicVSP/vsp => static_vsp}/solution.jl (100%) diff --git a/Project.toml b/Project.toml index ed7fde9..b9695a6 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["Members of JuliaDecisionFocusedLearning"] version = "0.2.2" [deps] +CommonRLInterface = "d842c3ba-07a1-494f-bbec-f5741b0a3e98" ConstrainedShortestPaths = "b3798467-87dc-4d99-943d-35a1bd39e395" DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" @@ -30,6 +31,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] +CommonRLInterface = "0.3.3" ConstrainedShortestPaths = "0.6.0" DataDeps = "0.7" Distributions = "0.25" diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl index 74bb04a..252e5d4 100644 --- a/src/DecisionFocusedLearningBenchmarks.jl +++ b/src/DecisionFocusedLearningBenchmarks.jl @@ -32,8 +32,8 @@ function __init__() register( DataDep( - "euro-neurips-2022", - "EURO-NeurIPs challenge 2022 dataset", + "dvrptw", + "EURO-NeurIPS challenge 2022 dataset for the dynamic vehicle routing problem with time windows", "https://github.com/ortec/euro-neurips-vrp-2022-quickstart/archive/refs/heads/main.zip"; post_fetch_method=_euro_neurips_unpack, ), @@ -54,7 +54,7 @@ include("Warcraft/Warcraft.jl") include("FixedSizeShortestPath/FixedSizeShortestPath.jl") include("PortfolioOptimization/PortfolioOptimization.jl") include("StochasticVehicleScheduling/StochasticVehicleScheduling.jl") -# include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl") +include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl") using .Utils using .Argmax @@ -64,11 +64,11 @@ using .Warcraft using .FixedSizeShortestPath using .PortfolioOptimization using .StochasticVehicleScheduling -# using .DynamicVehicleScheduling +using .DynamicVehicleScheduling # Interface export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample -export generate_dataset +export generate_sample, generate_dataset, generate_scenario export generate_statistical_model export generate_maximizer, maximizer_kwargs export objective_value @@ -83,5 +83,6 @@ export WarcraftBenchmark export FixedSizeShortestPathBenchmark export PortfolioOptimizationBenchmark export StochasticVehicleSchedulingBenchmark +export DVSPBenchmark end # module DecisionFocusedLearningBenchmarks diff --git a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl index 43a9edb..ef897e5 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl +++ b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl @@ -4,13 +4,14 @@ $TYPEDSIGNATURES Retrieve anticipative routes solution from the given MIP solution `y`. Outputs a set of routes per epoch. """ -function retrieve_routes_anticipative(y::AbstractArray, dvspenv::DVSPEnv) - nb_tasks = length(dvspenv.customer_index) - (; first_epoch, last_epoch) = dvspenv.config +function retrieve_routes_anticipative(y::AbstractArray, dvspenv::DVSPEnv, customer_index) + nb_tasks = length(customer_index) + first_epoch = 1 + (; last_epoch) = dvspenv.instance job_indices = 2:(nb_tasks) epoch_indices = first_epoch:last_epoch - routes = [Vector{Int}[] for t in epoch_indices] + routes = [Vector{Int}[] for _ in epoch_indices] for t in epoch_indices start = [i for i in job_indices if y[1, i, t] ≈ 1] for task in start @@ -39,13 +40,21 @@ $TYPEDSIGNATURES Solve the anticipative VSP problem for environment `env`. For this, it uses the current environment history, so make sure that the environment is terminated before calling this method. """ -function anticipative_solver(env::DVSPEnv; model_builder=highs_model, draw_epochs=true) - draw_epochs && draw_all_epochs!(env) - (; customer_index, service_time, start_time, request_epoch) = env - duration = env.config.static_instance.duration[customer_index, customer_index] - (; first_epoch, last_epoch, epoch_duration, Δ_dispatch) = env.config +function anticipative_solver( + env::DVSPEnv, scenario=env.scenario; model_builder=highs_model, reset_env=false +) + reset_env && reset!(env) + request_epoch = [0] + for (epoch, indices) in enumerate(scenario.indices) + request_epoch = vcat(request_epoch, fill(epoch, length(indices))) + end + customer_index = vcat(1, scenario.indices...) + service_time = vcat(0.0, scenario.service_time...) + start_time = vcat(0.0, scenario.start_time...) - @assert first_epoch == 1 + duration = env.instance.static_instance.duration[customer_index, customer_index] + first_epoch = 1 + (; last_epoch, epoch_duration, Δ_dispatch) = env.instance model = model_builder() set_silent(model) @@ -80,7 +89,7 @@ function anticipative_solver(env::DVSPEnv; model_builder=highs_model, draw_epoch sum(y[j, i, t] for j in 1:nb_nodes, t in epoch_indices) == 1 ) - # a trip from i can be planned only after request appeared + # a trip from i can be planned only after request appeared (release times) for i in job_indices, t in epoch_indices, j in 1:nb_nodes if t < request_epoch[i] @constraint(model, y[i, j, t] <= 0) @@ -107,5 +116,6 @@ function anticipative_solver(env::DVSPEnv; model_builder=highs_model, draw_epoch optimize!(model) - return retrieve_routes_anticipative(value.(y), env) + return JuMP.objective_value(model), + retrieve_routes_anticipative(value.(y), env, customer_index) end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl index 531169c..75af6a4 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl +++ b/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl @@ -3,10 +3,10 @@ $TYPEDSIGNATURES Create the acyclic digraph associated with the given VSP `instance`. """ -function create_graph(instance::VSPInstance) +function create_graph(instance::StaticInstance) (; duration, start_time, service_time) = instance # Initialize directed graph - nb_vertices = nb_locations(instance) + nb_vertices = location_count(instance) graph = SimpleDiGraph(nb_vertices) depot = 1 # depot is always index 1 @@ -42,8 +42,8 @@ $TYPEDSIGNATURES Create the acyclic digraph associated with the given VSP `state`. """ -function create_graph(state::VSPState) - return create_graph(state.instance) +function create_graph(state::DVSPState) + return create_graph(state.state_instance) end """ @@ -82,9 +82,9 @@ $TYPEDSIGNATURES Solve the Prize Collecting Vehicle Scheduling Problem defined by `instance` and prize vector `θ`. """ function prize_collecting_vsp( - θ::AbstractVector; instance::VSPState, model_builder=highs_model, kwargs... + θ::AbstractVector; instance::DVSPState, model_builder=highs_model, kwargs... ) - (; duration) = instance.instance + (; duration) = instance.state_instance graph = create_graph(instance) model = model_builder() @@ -95,7 +95,7 @@ function prize_collecting_vsp( @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0) - θ_ext = fill(0.0, nb_locations(instance)) # no prize for must dispatch requests, only hard constraints + θ_ext = fill(0.0, location_count(instance)) # no prize for must dispatch requests, only hard constraints θ_ext[instance.is_postponable] .= θ @objective( @@ -131,7 +131,7 @@ end function prize_collecting_vsp_Q( θ::AbstractVector, vals::AbstractVector; - instance::VSPState, + instance::DVSPState, model_builder=highs_model, kwargs..., ) @@ -142,7 +142,7 @@ function prize_collecting_vsp_Q( nb_nodes = nv(graph) job_indices = 2:(nb_nodes) @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0) - θ_ext = fill(0.0, nb_locations(instance.instance)) # no prize for must dispatch requests, only hard constraints + θ_ext = fill(0.0, location_count(instance.instance)) # no prize for must dispatch requests, only hard constraints θ_ext[instance.is_postponable] .= θ # v_ext = fill(0.0, nb_locations(instance.instance)) # no prize for must dispatch requests, only hard constraints # v_ext[instance.is_postponable] .= vals @@ -176,7 +176,7 @@ end function my_objective_value(θ, routes; instance) (; duration) = instance.instance total = 0.0 - θ_ext = fill(0.0, nb_locations(instance)) + θ_ext = fill(0.0, location_count(instance)) θ_ext[instance.is_postponable] .= θ for route in routes for (u, v) in partition(vcat(1, route), 2, 1) @@ -189,7 +189,7 @@ end function _objective_value(θ, routes; instance) (; duration) = instance.instance total = 0.0 - θ_ext = fill(0.0, nb_locations(instance)) + θ_ext = fill(0.0, location_count(instance)) θ_ext[instance.is_postponable] .= θ mapping = cumsum(instance.is_postponable) g = falses(length(θ)) diff --git a/src/DynamicVehicleScheduling/DynamicVSP/environment/environment.jl b/src/DynamicVehicleScheduling/DynamicVSP/environment/environment.jl deleted file mode 100644 index 189afb4..0000000 --- a/src/DynamicVehicleScheduling/DynamicVSP/environment/environment.jl +++ /dev/null @@ -1,293 +0,0 @@ -""" -$TYPEDEF - -Environment data structure for the Dynamic Vehicle Scheduling Problem. - -# Fields -$TYPEDFIELDS -""" -@kwdef mutable struct DVSPEnv{C<:DynamicConfig,R<:AbstractRNG,T,S<:VSPState} - "instance config as a [`DynamicConfig`](@ref)" - config::C - "current epoch number" - current_epoch::Int - "random number generator" - rng::R - "index of each customer in the static instance from the config" - customer_index::Vector{Int} - "service time values of each customer" - service_time::Vector{T} - "start time values of each customer" - start_time::Vector{T} - "1 if the request was already dispatched in a previous epoch, 0 otherwise" - request_is_dispatched::BitVector - "epoch index at which each request appearred" - request_epoch::Vector{Int} - "current state of environment" - state::S -end - -""" -$TYPEDSIGNATURES - -Constructor for [`DVSPEnv`](@ref). -""" -function DVSPEnv( - static_instance::VSPInstance; - seed=0, - max_requests_per_epoch=10, - Δ_dispatch=1.0, - epoch_duration=1.0, -) - first_epoch = 1 - last_epoch = trunc(Int, maximum(static_instance.start_time) / epoch_duration) - 1 - - config = DynamicConfig(; - static_instance, - max_requests_per_epoch, - Δ_dispatch, - epoch_duration, - seed, - first_epoch, - last_epoch, - ) - return DVSPEnv(; - config, - customer_index=[1], - service_time=[0.0], - start_time=[0.0], - request_is_dispatched=falses(1), - state=VSPState(), - rng=MersenneTwister(seed), - current_epoch=0, - request_epoch=[first_epoch - 1], - ) -end - -""" -$TYPEDSIGNATURES - -Return the indices of the epochs in the environment. -""" -function get_epoch_indices(env::DVSPEnv) - return (env.config.first_epoch):(env.config.last_epoch) -end - -""" -$TYPEDSIGNATURES - -Return the number of epochs in the environment. -""" -function nb_epochs(env::DVSPEnv) - return length(get_epoch_indices(env)) -end - -""" -$TYPEDSIGNATURES - -Get the current state of the environment. -""" -get_state(env::DVSPEnv) = env.state - -""" -$TYPEDSIGNATURES - -Get the current time of the environment, i.e. the start time of the current_epoch. -""" -get_time(env::DVSPEnv) = (env.current_epoch - 1) * env.config.epoch_duration - -""" -$TYPEDSIGNATURES - -Get the planning start time of the environment, i.e. the time at which vehicles routes dispatched in current epoch can depart. -""" -get_planning_start_time(env::DVSPEnv) = get_time(env) + env.config.Δ_dispatch - -""" -$TYPEDSIGNATURES - -Check if the episode is terminated, i.e. if the current epoch is the last one. -""" -is_terminated(env::DVSPEnv) = env.current_epoch >= env.config.last_epoch - -""" -$TYPEDSIGNATURES - -Return the total number of locations in the environment history. -""" -nb_locations(env::DVSPEnv) = length(env.customer_index) - -""" -$TYPEDSIGNATURES - -Return a vector of env location indices that are still undispatched. -""" -get_undispatched_indices(env::DVSPEnv) = (1:nb_locations(env))[.!env.request_is_dispatched] - -""" -$TYPEDSIGNATURES - -Reset the environment to its initial state. -Also reset the seed if `reset_seed` is set to true. -""" -function reset!(env::DVSPEnv; reset_seed::Bool=true) - (; config) = env - env.current_epoch = config.first_epoch - 1 - depot = 1 - env.customer_index = [env.customer_index[depot]] - env.service_time = [env.service_time[depot]] - env.start_time = env.start_time[depot:depot] - env.request_is_dispatched = falses(1) - env.request_epoch = [env.current_epoch] - reset_seed && seed!(env.rng, config.seed) - return nothing -end - -""" -$TYPEDSIGNATURES - -Internal method that updates the state of the environment to correspond to env info. -This is an internal method and should not be called directly. -""" -function update_state!(env::DVSPEnv) - (; config) = env - (; epoch_duration, static_instance, last_epoch) = config - (; duration) = static_instance - depot = 1 - - planning_start_time = get_planning_start_time(env) - - # Must dispatch - undispatched_indices = get_undispatched_indices(env) - # If it's the last epoch, we must dispatch all remaining requests - is_must_dispatch = undispatched_indices .!= depot - # Else, only requests unreachable from the depot during next epoch are must dispatch - if env.current_epoch < last_epoch - is_must_dispatch = - planning_start_time .+ epoch_duration .+ - @view(duration[depot, env.customer_index[undispatched_indices]]) .> - @view(env.start_time[undispatched_indices]) - is_must_dispatch[1] = 0 - end - - is_postponable = falses(length(is_must_dispatch)) - is_postponable[2:end] .= .!is_must_dispatch[2:end] - - epoch_instance = VSPState(; - instance=VSPInstance(; - service_time=env.service_time[undispatched_indices], - start_time=env.start_time[undispatched_indices] .- planning_start_time, # shift start times to planning start time - coordinate=static_instance.coordinate[env.customer_index[undispatched_indices]], - duration=duration[ - env.customer_index[undispatched_indices], - env.customer_index[undispatched_indices], - ], - ), - is_must_dispatch, - is_postponable, - ) - - env.state = epoch_instance - return epoch_instance -end - -""" -$TYPEDSIGNATURES - -Update `env` by drawing the next epoch and returning a corresponding `EpochInstance`. -""" -function next_epoch!(env::DVSPEnv) - # Increment epoch number - env.current_epoch += 1 - - # Retrieve useful information - (; rng, config) = env - (; max_requests_per_epoch, static_instance) = config - (; duration, service_time, start_time) = config.static_instance - depot = 1 - - # Draw new requests uniformly from static instance - N = nb_customers(static_instance) - - planning_start_time = get_planning_start_time(env) - - coordinate_indices = sample_indices(rng, max_requests_per_epoch, N) - start_time_indices = sample_indices(rng, max_requests_per_epoch, N) - service_time_indices = sample_indices(rng, max_requests_per_epoch, N) - - # Only keep requests with feasible start times (rejection sampling) - # i.e. that are reachable from the depot before their start time - is_feasible = - planning_start_time .+ duration[depot, coordinate_indices] .<= - start_time[start_time_indices] - - # Update environment state - nb_new_requests = sum(is_feasible) - - # Update environment by adding new requests in - env.customer_index = vcat(env.customer_index, coordinate_indices[is_feasible]) - env.service_time = vcat( - env.service_time, service_time[service_time_indices[is_feasible]] - ) - env.start_time = vcat(env.start_time, start_time[start_time_indices[is_feasible]]) - env.request_is_dispatched = vcat(env.request_is_dispatched, falses(nb_new_requests)) - env.request_epoch = vcat(env.request_epoch, fill(env.current_epoch, nb_new_requests)) - - # Finally, update the state of the environment with these new requests - return update_state!(env) -end - -""" -$TYPEDSIGNATURES - -Transform state routes indices into env route indices. -""" -function env_routes_from_state_routes(env, routes) - undispatched_indices = get_undispatched_indices(env) - return [undispatched_indices[route] for route in routes] -end - -""" -$TYPEDSIGNATURES - -Transform env route indices into state route indices. -""" -function state_route_from_env_routes(env, routes) - nb_requests = length(env.customer_index) - undispatched_indices = (1:nb_requests)[.!env.request_is_dispatched] - global_to_local = zeros(Int, nb_requests) - for (local_i, global_i) in enumerate(undispatched_indices) - global_to_local[global_i] = local_i - end - return [global_to_local[route] for route in routes] -end - -""" -$TYPEDSIGNATURES - -Apply given `routes` as an action to `env`. - -Routes should be given with global indexation. -Use [`env_routes_from_state_routes`](@ref) if needed to convert the indices beforehand. -""" -function apply_decision!(env::DVSPEnv, routes::Vector{Vector{Int}}) - for route in routes - env.request_is_dispatched[route] .= true - end - duration = @view env.config.static_instance.duration[ - env.customer_index, env.customer_index - ] - return cost(routes, duration) -end - -""" -$TYPEDSIGNATURES - -Draw all epochs until the end of the environment, without any actions. -""" -function draw_all_epochs!(env::DVSPEnv; reset_env=true) - reset_env && reset!(env) - while !is_terminated(env) - next_epoch!(env) - end -end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/environment/state.jl b/src/DynamicVehicleScheduling/DynamicVSP/environment/state.jl deleted file mode 100644 index dddb076..0000000 --- a/src/DynamicVehicleScheduling/DynamicVSP/environment/state.jl +++ /dev/null @@ -1,89 +0,0 @@ -""" -$TYPEDSIGNATURES - -State data structure for the Dynamic Vehicle Scheduling Problem. -""" -@kwdef struct VSPState{I} - "associated (static) vehicle scheduling instance" - instance::I = VSPInstance() - "for each location, 1 if the request must be dispatched, 0 otherwise. The depot is always 0." - is_must_dispatch::BitVector = falses(0) - "for each location, 1 if the request can be postponed, 0 otherwise. The depot is always 0." - is_postponable::BitVector = falses(0) -end - -""" -$TYPEDSIGNATURES - -Return the number of locations in `state` (customers + depot). -""" -nb_locations(state::VSPState) = nb_locations(state.instance) - -""" -$TYPEDSIGNATURES - -Return the number of customers in `state`. -""" -nb_customers(state::VSPState) = nb_customers(state.instance) - -""" -$TYPEDSIGNATURES - -Get the service time vector -""" -service_time(state::VSPState) = service_time(state.instance) - -""" -$TYPEDSIGNATURES - -Get the coordinates vector. -""" -coordinate(state::VSPState) = coordinate(state.instance) - -""" -$TYPEDSIGNATURES - -Get the duration matrix. -""" -duration(state::VSPState) = duration(state.instance) - -""" -$TYPEDSIGNATURES - -Get the start time vector. -""" -start_time(state::VSPState) = start_time(state.instance) - -""" -$TYPEDSIGNATURES - -Check if the given routes are feasible. -Routes should be given with global indexation. -Use [`env_routes_from_state_routes`](@ref) if needed to convert the indices beforehand. -""" -function is_feasible(state::VSPState, routes::Vector{Vector{Int}}; verbose::Bool=false) - (; is_must_dispatch, instance) = state - (; duration, start_time, service_time) = instance - is_dispatched = falses(length(is_must_dispatch)) - - # Check that routes follow time constraints - for route in routes - is_dispatched[route] .= true - current = 1 # start at the depot - current_time = start_time[current] - for next in route - current_time += duration[current, next] - if current_time > start_time[next] - verbose && - @warn "Route $route is infeasible: time constraint violated at location $next" - return false - end - current_time += service_time[next] - current = next - end - end - - # Check that all must dispatch requests are dispatched - return all(is_dispatched[is_must_dispatch]) - return true -end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl b/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl index 941468a..7226e9c 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl +++ b/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl @@ -1,15 +1,13 @@ function get_features_meanTimeToRequests(env::DVSPEnv) quantiles = [0.5] - a = env.config.static_instance.duration[ - env.customer_index[.!env.request_is_dispatched], 2:end - ] + a = env.instance.static_instance.duration[env.state.location_indices, 2:end] quantileTimeToRequests = mapslices(x -> quantile(x, quantiles), a; dims=2) return quantileTimeToRequests end function compute_2D_features(env::DVSPEnv) state = env.state - timeDepotRequest = state.instance.duration[:, 1][state.is_postponable] + timeDepotRequest = state.state_instance.duration[:, 1][state.is_postponable] quantileTimeToRequests = get_features_meanTimeToRequests(env)[state.is_postponable] return hcat(timeDepotRequest, quantileTimeToRequests)' end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl b/src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl index 8f80a44..da37b59 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl +++ b/src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl @@ -2,7 +2,7 @@ function load_VSP_dataset( datadir::String; model_builder=highs_model, use_2D_features=false, kwargs... ) instances_files = filtered_readdir(datadir) - X = Tuple{Matrix{Float32},VSPState{VSPInstance{Float64}}}[] + X = Tuple{Matrix{Float32},DVSPState{VSPInstance{Float64}}}[] Y = BitMatrix[] for (i, f) in enumerate(instances_files) @@ -26,7 +26,7 @@ function load_VSP_dataset( Y, VSPSolution( state_route_from_env_routes(env, routes); - max_index=nb_locations(state.instance), + max_index=location_count(state.instance), ).edge_matrix, ) # Update the environment diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl b/src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl index 348b816..0cb4160 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl +++ b/src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl @@ -7,7 +7,7 @@ function get_features_quantileTimeToRequests(env::DVSPEnv) return quantileTimeToRequests end -function compute_model_free_features(state::VSPState; env::DVSPEnv) +function compute_model_free_features(state::DVSPState; env::DVSPEnv) (; instance, is_postponable) = state startTimes = instance.start_time @@ -27,7 +27,7 @@ function compute_model_free_features(state::VSPState; env::DVSPEnv) return model_free_features end -function compute_model_aware_features(state::VSPState; env::DVSPEnv) +function compute_model_aware_features(state::DVSPState; env::DVSPEnv) quantileTimeToRequests = get_features_quantileTimeToRequests(env) model_aware_features = quantileTimeToRequests return model_aware_features[state.is_postponable, :] @@ -40,36 +40,36 @@ function compute_features(env::DVSPEnv) return hcat(model_free_features, model_aware_features)' end -# ? why is this needed -function model_free_features_critic(state::VSPState; env::DVSPEnv) - (; instance) = state - startTimes = instance.start_time - endTimes = instance.service_time .+ instance.start_time - timeDepotRequest = instance.duration[:, 1] - timeRequestDepot = instance.duration[1, :] - slack_next_epoch = startTimes .- env.config.epoch_duration - model_free_features = hcat( - startTimes, endTimes, timeDepotRequest, timeRequestDepot, slack_next_epoch - ) - return model_free_features -end +# # ? why is this needed +# function model_free_features_critic(state::DVSPState; env::DVSPEnv) +# (; instance) = state +# startTimes = instance.start_time +# endTimes = instance.service_time .+ instance.start_time +# timeDepotRequest = instance.duration[:, 1] +# timeRequestDepot = instance.duration[1, :] +# slack_next_epoch = startTimes .- env.config.epoch_duration +# model_free_features = hcat( +# startTimes, endTimes, timeDepotRequest, timeRequestDepot, slack_next_epoch +# ) +# return model_free_features +# end -# ? -function compute_critic_features(env::DVSPEnv) - state = env.state - model_free_features = model_free_features_critic(state; env) - model_aware_features = get_features_quantileTimeToRequests(env) - postpon = state.is_postponable - return hcat(model_free_features, model_aware_features, postpon)' -end +# # ? +# function compute_critic_features(env::DVSPEnv) +# state = env.state +# model_free_features = model_free_features_critic(state; env) +# model_aware_features = get_features_quantileTimeToRequests(env) +# postpon = state.is_postponable +# return hcat(model_free_features, model_aware_features, postpon)' +# end -# ? -function compute_critic_2D_features(env::DVSPEnv) - state = env.state - timeDepotRequest = state.instance.duration[:, 1] - quantileTimeToRequests = get_features_meanTimeToRequests(env) - postpon = state.is_postponable - # time_postpon = timeDepotRequest .* postpon - # quant_postpon = quantileTimeToRequests .* postpon - return hcat(timeDepotRequest, quantileTimeToRequests, postpon)' -end +# # ? +# function compute_critic_2D_features(env::DVSPEnv) +# state = env.state +# timeDepotRequest = state.instance.duration[:, 1] +# quantileTimeToRequests = get_features_meanTimeToRequests(env) +# postpon = state.is_postponable +# # time_postpon = timeDepotRequest .* postpon +# # quant_postpon = quantileTimeToRequests .* postpon +# return hcat(timeDepotRequest, quantileTimeToRequests, postpon)' +# end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl index 63d8030..bd640fe 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl +++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl @@ -11,7 +11,11 @@ $TYPEDSIGNATURES Apply the policy to the environment. """ function run_policy!( - π::AbstractDynamicVSPPolicy, env::DVSPEnv; check_feasibility=true, kwargs... + π::AbstractDynamicVSPPolicy, + env::DVSPEnv, + scenario=env.scenario; + check_feasibility=true, + kwargs..., ) # reset environment, and initialize variables reset!(env) @@ -19,13 +23,12 @@ function run_policy!( epoch_routes = Vector{Vector{Int}}[] # epoch loop - while !is_terminated(env) - next_epoch!(env) + while !terminated(env) state_routes = π(env; kwargs...) - check_feasibility && @assert is_feasible(get_state(env), state_routes) - env_routes = env_routes_from_state_routes(env, state_routes) - push!(epoch_routes, env_routes) - local_cost = apply_decision!(env, env_routes) + check_feasibility && @assert is_feasible(observe(env), state_routes) + # env_routes = env_routes_from_state_routes(env, state_routes) + push!(epoch_routes, state_routes) + local_cost = act!(env, state_routes, scenario) total_cost += local_cost end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl index b6751c7..d7f2381 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl +++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl @@ -10,9 +10,8 @@ $TYPEDSIGNATURES Apply the anticipative policy to the environment. """ -function run_policy!(::AnticipativeVSPPolicy, env::DVSPEnv; model_builder=highs_model) - routes_anticipative = anticipative_solver(env; model_builder) - duration = env.config.static_instance.duration[env.customer_index, env.customer_index] - anticipative_costs = [cost(routes, duration) for routes in routes_anticipative] - return sum(anticipative_costs), routes_anticipative +function run_policy!( + ::AnticipativeVSPPolicy, env::DVSPEnv, scenario=env.scenario; model_builder=highs_model +) + return anticipative_solver(env, scenario; model_builder, reset_env=true) end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl index f6c1654..a15a3b9 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl +++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl @@ -7,28 +7,10 @@ Dispatch customers as soon as they appear. struct GreedyVSPPolicy <: AbstractDynamicVSPPolicy end function (π::GreedyVSPPolicy)(env::DVSPEnv; model_builder=highs_model) - nb_postponable_requests = sum(get_state(env).is_postponable) + state = observe(env) + (; is_postponable) = state + nb_postponable_requests = sum(is_postponable) θ = ones(nb_postponable_requests) * 1e9 - routes = prize_collecting_vsp(θ; instance=get_state(env), model_builder) + routes = prize_collecting_vsp(θ; instance=state, model_builder) return routes end - -# function run_policy!(π::GreedyVSPPolicy, env::DVSPEnv; check_feasibility=true, kwargs...) -# # reset environment, and initialize variables -# reset!(env) -# total_cost = 0 -# epoch_routes = Vector{Vector{Int}}[] - -# # epoch loop -# while !is_terminated(env) -# next_epoch!(env) -# state_routes = π(env; kwargs...) -# check_feasibility && @assert is_feasible(get_state(env), state_routes) -# env_routes = env_routes_from_state_routes(env, state_routes) -# push!(epoch_routes, env_routes) -# local_cost = apply_decision!(env, env_routes) -# total_cost += local_cost -# end - -# return total_cost, epoch_routes -# end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl index 0a3708f..8a7e8d1 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl +++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl @@ -23,31 +23,10 @@ function KleopatraVSPPolicy(prize_predictor; has_2D_features=nothing) end function (π::KleopatraVSPPolicy)(env::DVSPEnv; model_builder=highs_model) + state = observe(env) (; prize_predictor, has_2D_features) = π x = has_2D_features ? compute_2D_features(env) : compute_features(env) θ = prize_predictor(x) - routes = prize_collecting_vsp(θ; instance=get_state(env), model_builder) + routes = prize_collecting_vsp(θ; instance=state, model_builder) return routes end - -# function run_policy!( -# π::KleopatraVSP, env::DVSPEnv; check_feasibility=true, model_builder=highs_model -# ) -# # reset environment, and initialize variables -# reset!(env) -# total_cost = 0 -# epoch_routes = Vector{Vector{Int}}[] - -# # epoch loop -# while !is_terminated(env) -# next_epoch!(env) -# state_routes = π(env; model_builder) -# check_feasibility && @assert is_feasible(get_state(env), state_routes) -# env_routes = env_routes_from_state_routes(env, state_routes) -# push!(epoch_routes, env_routes) -# local_cost = apply_decision!(env, env_routes) -# total_cost += local_cost -# end - -# return total_cost, epoch_routes -# end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl b/src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl index 5ce71ca..50b44d3 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl +++ b/src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl @@ -7,28 +7,9 @@ Dispatch customers only when necessary (i.e. must-dispatch). struct LazyVSPPolicy <: AbstractDynamicVSPPolicy end function (π::LazyVSPPolicy)(env::DVSPEnv; model_builder=highs_model) - nb_postponable_requests = sum(get_state(env).is_postponable) + state = observe(env) + nb_postponable_requests = sum(state.is_postponable) θ = ones(nb_postponable_requests) * -1e9 - routes = prize_collecting_vsp(θ; instance=get_state(env), model_builder) + routes = prize_collecting_vsp(θ; instance=state, model_builder) return routes end - -# function run_policy!(π::LazyVSPPolicy, env::DVSPEnv; check_feasibility=true, kwargs...) -# # reset environment, and initialize variables -# reset!(env) -# total_cost = 0 -# epoch_routes = Vector{Vector{Int}}[] - -# # epoch loop -# while !is_terminated(env) -# next_epoch!(env) -# state_routes = π(env; kwargs...) -# check_feasibility && @assert is_feasible(get_state(env), state_routes) -# env_routes = env_routes_from_state_routes(env, state_routes) -# push!(epoch_routes, env_routes) -# local_cost = apply_decision!(env, env_routes) -# total_cost += local_cost -# end - -# return total_cost, epoch_routes -# end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/utils.jl b/src/DynamicVehicleScheduling/DynamicVSP/utils.jl deleted file mode 100644 index 1be5e4d..0000000 --- a/src/DynamicVehicleScheduling/DynamicVSP/utils.jl +++ /dev/null @@ -1,11 +0,0 @@ -""" -$TYPEDEF - -Basic point structure. -""" -struct Point{T} - x::T - y::T -end - -Base.show(io::IO, p::Point) = print(io, "($(p.x), $(p.y))") diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index e1fddbc..d003f40 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -3,6 +3,8 @@ module DynamicVehicleScheduling using ..Utils using Base: @kwdef +using CommonRLInterface: CommonRLInterface, AbstractEnv, reset!, terminated, observe, act! +using DataDeps: @datadep_str # using ChainRulesCore using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES using Graphs @@ -13,25 +15,26 @@ using JSON using JuMP using Plots: plot, plot!, scatter! using Printf: @printf -using Random: AbstractRNG, MersenneTwister, seed!, randperm +using Random: Random, AbstractRNG, MersenneTwister, seed!, randperm using Requires: @require using Statistics: mean, quantile include("utils.jl") -include("dynamic_config.jl") -include("abstract_policy.jl") -# Dynamic Vehicle Scheduling -include("DynamicVSP/utils.jl") +include("abstract_policy.jl") -include("DynamicVSP/vsp/instance.jl") -include("DynamicVSP/vsp/parsing.jl") -include("DynamicVSP/vsp/solution.jl") -include("DynamicVSP/vsp/plot.jl") +# static vsp stuff +include("static_vsp/instance.jl") +include("static_vsp/parsing.jl") +include("static_vsp/solution.jl") +include("static_vsp/plot.jl") -include("DynamicVSP/environment/state.jl") -include("DynamicVSP/environment/environment.jl") -include("DynamicVSP/environment/plot.jl") +# dynamic environment +include("environment/instance.jl") +include("environment/scenario.jl") +include("environment/state.jl") +include("environment/environment.jl") +include("environment/plot.jl") include("DynamicVSP/algorithms/prize_collecting_vsp.jl") include("DynamicVSP/algorithms/anticipative_solver.jl") @@ -46,28 +49,38 @@ include("DynamicVSP/policy/lazy_policy.jl") include("DynamicVSP/policy/anticipative_policy.jl") include("DynamicVSP/policy/kleopatra_policy.jl") -export highs_model, filtered_readdir +struct DVSPBenchmark <: AbstractDynamicBenchmark end + +function Utils.generate_sample(b::DVSPBenchmark, rng::AbstractRNG) + return Instance(read_vsp_instance(readdir(datadep"dvrptw"; join=true)[1])) +end + +export DVSPBenchmark, generate_sample, generate_scenario +export run_policy!, + GreedyVSPPolicy, LazyVSPPolicy, KleopatraVSPPolicy, AnticipativeVSPPolicy + +# export highs_model, filtered_readdir -export solve_hindsight_problem +# export solve_hindsight_problem -export AbstractDynamicPolicy, BasicDynamicPolicy +# export AbstractDynamicPolicy, BasicDynamicPolicy -export GreedyPolicy, LazyPolicy, RandomPolicy, Kleopatra +# export GreedyPolicy, LazyPolicy, RandomPolicy, Kleopatra -export run_policy +# export run_policy -export compute_features, - compute_2D_features, compute_critic_features, compute_critic_2D_features, load_dataset +# export compute_features, +# compute_2D_features, compute_critic_features, compute_critic_2D_features, load_dataset -export VSPInstance, - read_vsp_instance, start_time, env_routes_from_state_routes, state_route_from_env_routes -export DVSPEnv, prize_collecting_vsp -export anticipative_solver -export VSPSolution -export load_VSP_dataset -export GreedyVSPPolicy, - LazyVSPPolicy, AnticipativeVSPPolicy, run_policy!, KleopatraVSPPolicy -export plot_routes, plot_instance, plot_environment, plot_epoch -export get_state -export nb_epochs, get_epoch_indices +# export VSPInstance, +# read_vsp_instance, start_time, env_routes_from_state_routes, state_route_from_env_routes +# export DVSPEnv, prize_collecting_vsp +# export anticipative_solver +# export VSPSolution +# export load_VSP_dataset +# export GreedyVSPPolicy, +# LazyVSPPolicy, AnticipativeVSPPolicy, run_policy!, KleopatraVSPPolicy +# export plot_routes, plot_instance, plot_environment, plot_epoch +# export get_state +# export nb_epochs, get_epoch_indices end diff --git a/src/DynamicVehicleScheduling/dynamic_config.jl b/src/DynamicVehicleScheduling/dynamic_config.jl deleted file mode 100644 index 3d052bf..0000000 --- a/src/DynamicVehicleScheduling/dynamic_config.jl +++ /dev/null @@ -1,24 +0,0 @@ -""" -$TYPEDEF - -Config data structures for dynamic vehicle routing and scheduling problems. - -# Fields -$TYPEDFIELDS -""" -@kwdef struct DynamicConfig{I,S,T} - "static instance to sample arriving requests from" - static_instance::I - "max number of new requests per epoch (rejection sampling)" - max_requests_per_epoch::Int = 100 - "time distance between epoch start and routes start" - Δ_dispatch::T = 3600 - "duration of each epoch" - epoch_duration::T = 3600 - "first epoch index (time = epoch_duration x first_epoch)" - first_epoch::Int - "last epoch index" - last_epoch::Int - "seed for customer sampling" - seed::S -end diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl new file mode 100644 index 0000000..8109c0e --- /dev/null +++ b/src/DynamicVehicleScheduling/environment/environment.jl @@ -0,0 +1,86 @@ +struct DVSPEnv{S<:DVSPState} <: AbstractEnv + "associated instance" + instance::Instance + "current state" + state::S + "scenario the environment will use when not given a specific one" + scenario::Scenario +end + +""" +$TYPEDSIGNATURES + +Constructor for [`DVSPEnv`](@ref). +""" +function DVSPEnv(instance::Instance; seed=nothing, rng=MersenneTwister(seed)) + scenario = generate_scenario(instance; rng, seed) + initial_state = DVSPState(instance; scenario[1]...) + return DVSPEnv(instance, initial_state, scenario) +end + +currrent_epoch(env::DVSPEnv) = current_epoch(env.state) +epoch_duration(env::DVSPEnv) = epoch_duration(env.instance) +last_epoch(env::DVSPEnv) = last_epoch(env.instance) +Δ_dispatch(env::DVSPEnv) = Δ_dispatch(env.instance) + +""" +$TYPEDSIGNATURES + +Get the current state of the environment. +""" +CommonRLInterface.observe(env::DVSPEnv) = env.state + +current_epoch(env::DVSPEnv) = current_epoch(env.state) + +""" +$TYPEDSIGNATURES + +Get the current time of the environment, i.e. the start time of the current_epoch. +""" +time(env::DVSPEnv) = (current_epoch(env) - 1) * epoch_duration(env) + +""" +$TYPEDSIGNATURES + +Get the planning start time of the environment, i.e. the time at which vehicles routes dispatched in current epoch can depart. +""" +planning_start_time(env::DVSPEnv) = time(env) + Δ_dispatch(env) +""" +$TYPEDSIGNATURES + +Check if the episode is terminated, i.e. if the current epoch is the last one. +""" +CommonRLInterface.terminated(env::DVSPEnv) = current_epoch(env) >= last_epoch(env) + +""" +draw new customers in scenario +""" +function draw_next_epoch!(env::DVSPEnv, scenario=env.scenario) + env.state.current_epoch += 1 + + return nothing +end + +""" +$TYPEDSIGNATURES + +Reset the environment to its initial state. +Also reset the seed if `reset_seed` is set to true. +""" +function CommonRLInterface.reset!(env::DVSPEnv, scenario=env.scenario) + reset_state!(env.state, env.instance; scenario[1]...) + return nothing +end + +""" +remove dispatched customers, advance time, and add new requests to the environment. +""" +function CommonRLInterface.act!(env::DVSPEnv, routes, scenario=env.scenario) + reward = -apply_routes!(env.state, routes) + env.state.current_epoch += 1 + if current_epoch(env) > last_epoch(env) + return nothing + end + add_new_customers!(env.state, env.instance; scenario[current_epoch(env)]...) + return reward +end diff --git a/src/DynamicVehicleScheduling/environment/instance.jl b/src/DynamicVehicleScheduling/environment/instance.jl new file mode 100644 index 0000000..b375077 --- /dev/null +++ b/src/DynamicVehicleScheduling/environment/instance.jl @@ -0,0 +1,52 @@ +""" +$TYPEDEF + +Instance data structure for the dynamic vehicle scheduling problem. +""" +@kwdef struct Instance{I<:StaticInstance,T} + "static instance to sample arriving requests from" + static_instance::I + "max number of new requests per epoch (rejection sampling)" + max_requests_per_epoch::Int = 10 + "time distance between epoch start and routes start" + Δ_dispatch::T = 1.0 + "duration of each epoch" + epoch_duration::T = 1.0 + "last epoch index" + last_epoch::Int + # "seed for customer sampling" + # seed::S +end + +function Instance( + static_instance::StaticInstance; + max_requests_per_epoch::Int=10, + Δ_dispatch::Float64=1.0, + epoch_duration::Float64=1.0, +) + last_epoch = trunc( + Int, + ( + maximum(static_instance.start_time) - minimum(static_instance.duration[1, :]) - + Δ_dispatch + ) / epoch_duration, + ) + return Instance(; + static_instance=static_instance, + max_requests_per_epoch=max_requests_per_epoch, + Δ_dispatch=Δ_dispatch, + epoch_duration=epoch_duration, + last_epoch=last_epoch, + ) +end + +Δ_dispatch(instance::Instance) = instance.Δ_dispatch +epoch_duration(instance::Instance) = instance.epoch_duration +last_epoch(instance::Instance) = instance.last_epoch +max_requests_per_epoch(instance::Instance) = instance.max_requests_per_epoch +# static_instance(instance::Instance) = instance.static_instance + +# duration(instance::Instance) = duration(instance.static_instance) +# service_time(instance::Instance) = service_time(instance.static_instance) +# coordinate(instance::Instance) = coordinate(instance.static_instance) +# start_time(instance::Instance) = start_time(instance.static_instance) diff --git a/src/DynamicVehicleScheduling/DynamicVSP/environment/plot.jl b/src/DynamicVehicleScheduling/environment/plot.jl similarity index 98% rename from src/DynamicVehicleScheduling/DynamicVSP/environment/plot.jl rename to src/DynamicVehicleScheduling/environment/plot.jl index 60b6d90..409ad79 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/environment/plot.jl +++ b/src/DynamicVehicleScheduling/environment/plot.jl @@ -58,7 +58,7 @@ $TYPEDSIGNATURES Plot the given `routes`` for a VSP `state`. """ -function plot_epoch(state::VSPState, routes; kwargs...) +function plot_epoch(state::DVSPState, routes; kwargs...) (; coordinate, start_time) = state.instance x_depot = coordinate[1].x y_depot = coordinate[1].y diff --git a/src/DynamicVehicleScheduling/environment/scenario.jl b/src/DynamicVehicleScheduling/environment/scenario.jl new file mode 100644 index 0000000..de5d858 --- /dev/null +++ b/src/DynamicVehicleScheduling/environment/scenario.jl @@ -0,0 +1,47 @@ + +struct Scenario + "indices of the new requests in each epoch" + indices::Vector{Vector{Int}} + "service times of the new requests in each epoch" + service_time::Vector{Vector{Float64}} + "start times of the new requests in each epoch" + start_time::Vector{Vector{Float64}} +end + +function Base.getindex(scenario::Scenario, idx::Integer) + return (; + indices=scenario.indices[idx], + service_time=scenario.service_time[idx], + start_time=scenario.start_time[idx], + ) +end + +function generate_scenario( + instance::Instance; seed=nothing, rng::AbstractRNG=MersenneTwister(seed) +) + (; Δ_dispatch, static_instance, last_epoch, epoch_duration, max_requests_per_epoch) = + instance + (; duration, start_time, service_time) = static_instance + N = customer_count(static_instance) + depot = 1 + + new_indices = Vector{Int}[] + new_service_time = Vector{Float64}[] + new_start_time = Vector{Float64}[] + + for epoch in 1:last_epoch + time = epoch_duration * (epoch - 1) + Δ_dispatch + + coordinate_indices = sample_indices(rng, max_requests_per_epoch, N) + start_time_indices = sample_indices(rng, max_requests_per_epoch, N) + service_time_indices = sample_indices(rng, max_requests_per_epoch, N) + + is_feasible = + time .+ duration[depot, coordinate_indices] .<= start_time[start_time_indices] + + push!(new_indices, coordinate_indices[is_feasible]) + push!(new_service_time, service_time[service_time_indices[is_feasible]]) + push!(new_start_time, start_time[start_time_indices[is_feasible]]) + end + return Scenario(new_indices, new_service_time, new_start_time) +end diff --git a/src/DynamicVehicleScheduling/environment/state.jl b/src/DynamicVehicleScheduling/environment/state.jl new file mode 100644 index 0000000..ebac101 --- /dev/null +++ b/src/DynamicVehicleScheduling/environment/state.jl @@ -0,0 +1,201 @@ +""" +$TYPEDSIGNATURES + +State data structure for the Dynamic Vehicle Scheduling Problem. +""" +@kwdef mutable struct DVSPState{I} + "current epoch number" + current_epoch::Int = 1 + "list of location indices from the upper instance (useful for adding new customers)" + location_indices::Vector{Int} = Int[] + "associated (static) vehicle scheduling instance" + state_instance::I = StaticInstance() + "for each location, 1 if the request must be dispatched, 0 otherwise. The depot is always 0." + is_must_dispatch::BitVector = falses(0) + "for each location, 1 if the request can be postponed, 0 otherwise. The depot is always 0." + is_postponable::BitVector = falses(0) +end + +function reset_state!( + state::DVSPState, instance::Instance; indices, service_time, start_time +) + (; epoch_duration, Δ_dispatch, static_instance) = instance + indices_with_depot = vcat(1, indices) + service_time_with_depot = vcat(0.0, service_time) + start_time_with_depot = vcat(0.0, start_time) + + coordinates = coordinate(static_instance)[indices_with_depot] + duration_matrix = duration(static_instance)[indices_with_depot, indices_with_depot] + + is_must_dispatch = falses(length(indices_with_depot)) + is_must_dispatch[2:end] .= + Δ_dispatch .+ epoch_duration .+ @view(duration_matrix[1, 2:end]) .> start_time + + is_postponable = falses(length(is_must_dispatch)) + is_postponable[2:end] .= .!is_must_dispatch[2:end] + + state.current_epoch = 1 + state.state_instance = StaticInstance(; + service_time=service_time_with_depot, + start_time=start_time_with_depot, + coordinate=coordinates, + duration=duration_matrix, + ) + state.is_must_dispatch = is_must_dispatch + state.is_postponable = is_postponable + state.location_indices = indices_with_depot + return nothing +end + +function DVSPState(instance::Instance; indices, service_time, start_time) + state = DVSPState() + reset_state!(state, instance; indices=indices, service_time=service_time, start_time) + return state +end + +current_epoch(state::DVSPState) = state.current_epoch + +""" +$TYPEDSIGNATURES + +Return the number of locations in `state` (customers + depot). +""" +location_count(state::DVSPState) = location_count(state.state_instance) + +""" +$TYPEDSIGNATURES + +Return the number of customers in `state`. +""" +customer_count(state::DVSPState) = customer_count(state.state_instance) + +""" +$TYPEDSIGNATURES + +Get the service time vector +""" +service_time(state::DVSPState) = service_time(state.state_instance) + +""" +$TYPEDSIGNATURES + +Get the coordinates vector. +""" +coordinate(state::DVSPState) = coordinate(state.state_instance) + +""" +$TYPEDSIGNATURES + +Get the duration matrix. +""" +duration(state::DVSPState) = duration(state.state_instance) + +""" +$TYPEDSIGNATURES + +Get the start time vector. +""" +start_time(state::DVSPState) = start_time(state.state_instance) + +""" +$TYPEDSIGNATURES + +Check if the given routes are feasible. +Routes should be given with global indexation. +Use [`env_routes_from_state_routes`](@ref) if needed to convert the indices beforehand. +""" +function is_feasible(state::DVSPState, routes::Vector{Vector{Int}}; verbose::Bool=false) + (; is_must_dispatch, state_instance) = state + (; duration, start_time, service_time) = state_instance + is_dispatched = falses(length(is_must_dispatch)) + + # Check that routes follow time constraints + for route in routes + is_dispatched[route] .= true + current = 1 # start at the depot + current_time = start_time[current] + for next in route + current_time += duration[current, next] + if current_time > start_time[next] + verbose && + @warn "Route $route is infeasible: time constraint violated at location $next" + return false + end + current_time += service_time[next] + current = next + end + end + + # Check that all must dispatch requests are dispatched + if all(is_dispatched[is_must_dispatch]) + return true + else + verbose && @warn "Not all must-dispatch requests are dispatched" + return false + end +end + +""" +remove dispatched customers, and update must-dispatch and postponable flags. +""" +function apply_routes!( + state::DVSPState, routes::Vector{Vector{Int}}; check_feasibility::Bool=true +) + check_feasibility && @assert is_feasible(state, routes; verbose=true) + (; is_must_dispatch, is_postponable, state_instance, location_indices) = state + c = cost(state, routes) + + # Remove dispatched customers + N = location_count(state_instance) + undispatched_indices = trues(N) + undispatched_indices[vcat(routes...)] .= false + state.state_instance = StaticInstance(; + coordinate=state_instance.coordinate[undispatched_indices], + service_time=state_instance.service_time[undispatched_indices], + start_time=state_instance.start_time[undispatched_indices], + duration=state_instance.duration[undispatched_indices, undispatched_indices], + ) + state.is_must_dispatch = is_must_dispatch[undispatched_indices] + state.is_postponable = is_postponable[undispatched_indices] + state.location_indices = location_indices[undispatched_indices] + return c +end + +function cost(state::DVSPState, routes::Vector{Vector{Int}}) + return cost(routes, duration(state.state_instance)) +end + +function add_new_customers!( + state::DVSPState, instance::Instance; indices, service_time, start_time +) + (; state_instance, is_must_dispatch, is_postponable, location_indices) = state + + updated_indices = vcat(location_indices, indices) + updated_service_time = vcat(state_instance.service_time, service_time) + updated_start_time = vcat(state_instance.start_time, start_time) + updated_coordinates = instance.static_instance.coordinate[updated_indices] + updated_duration = instance.static_instance.duration[updated_indices, updated_indices] + is_must_dispatch = falses(length(updated_indices)) + is_postponable = falses(length(updated_indices)) + + state.state_instance = StaticInstance(; + coordinate=updated_coordinates, + service_time=updated_service_time, + start_time=updated_start_time, + duration=updated_duration, + ) + + # Compute must-dispatch flags + epoch_duration = instance.epoch_duration + Δ_dispatch = instance.Δ_dispatch + planning_start_time = (state.current_epoch - 1) * epoch_duration + Δ_dispatch + is_must_dispatch[2:end] .= + planning_start_time .+ epoch_duration .+ @view(updated_duration[1, 2:end]) .> + updated_start_time[2:end] + is_postponable[2:end] .= .!is_must_dispatch[2:end] + + state.is_must_dispatch = is_must_dispatch + state.is_postponable = is_postponable + state.location_indices = updated_indices + return nothing +end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/vsp/instance.jl b/src/DynamicVehicleScheduling/static_vsp/instance.jl similarity index 65% rename from src/DynamicVehicleScheduling/DynamicVSP/vsp/instance.jl rename to src/DynamicVehicleScheduling/static_vsp/instance.jl index 512a0fe..97091a0 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/vsp/instance.jl +++ b/src/DynamicVehicleScheduling/static_vsp/instance.jl @@ -6,7 +6,7 @@ Instance data structure for the (deterministic and static) Vehicle Scheduling Pr # Fields $TYPEDFIELDS """ -@kwdef struct VSPInstance{T} +@kwdef struct StaticInstance{T} "coordinates of the locations. The first one is always the depot." coordinate::Vector{Point{T}} = Point{Float64}[] "service time at each location" @@ -17,8 +17,8 @@ $TYPEDFIELDS duration::Matrix{T} = zeros(Float64, 0, 0) end -function Base.show(io::IO, instance::VSPInstance) - N = nb_customers(instance) +function Base.show(io::IO, instance::StaticInstance) + N = customer_count(instance) return print(io, "VSPInstance with $N customers") end @@ -27,39 +27,39 @@ $TYPEDSIGNATURES Return the number of locations in `instance` (customers + depot). """ -nb_locations(instance::VSPInstance) = length(instance.coordinate) +location_count(instance::StaticInstance) = length(instance.coordinate) """ $TYPEDSIGNATURES Return the number of customers in `instance` (excluding the depot). """ -nb_customers(instance::VSPInstance) = nb_locations(instance) - 1 +customer_count(instance::StaticInstance) = location_count(instance) - 1 """ $TYPEDSIGNATURES Get the service time vector. """ -service_time(instance::VSPInstance) = instance.service_time +service_time(instance::StaticInstance) = instance.service_time """ $TYPEDSIGNATURES Get the coordinates vector. """ -coordinate(instance::VSPInstance) = instance.coordinate +coordinate(instance::StaticInstance) = instance.coordinate """ $TYPEDSIGNATURES Get the duration matrix. """ -duration(instance::VSPInstance) = instance.duration +duration(instance::StaticInstance) = instance.duration """ $TYPEDSIGNATURES Get the start time vector. """ -start_time(instance::VSPInstance) = instance.start_time +start_time(instance::StaticInstance) = instance.start_time diff --git a/src/DynamicVehicleScheduling/DynamicVSP/vsp/parsing.jl b/src/DynamicVehicleScheduling/static_vsp/parsing.jl similarity index 97% rename from src/DynamicVehicleScheduling/DynamicVSP/vsp/parsing.jl rename to src/DynamicVehicleScheduling/static_vsp/parsing.jl index 21589fd..7bd7f92 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/vsp/parsing.jl +++ b/src/DynamicVehicleScheduling/static_vsp/parsing.jl @@ -91,5 +91,5 @@ function read_vsp_instance(filepath::String; rounded::Bool=false, normalization= start_time ./= normalization duration ./= normalization - return VSPInstance(; coordinate, service_time, start_time, duration) + return StaticInstance(; coordinate, service_time, start_time, duration) end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/vsp/plot.jl b/src/DynamicVehicleScheduling/static_vsp/plot.jl similarity index 96% rename from src/DynamicVehicleScheduling/DynamicVSP/vsp/plot.jl rename to src/DynamicVehicleScheduling/static_vsp/plot.jl index a9f03de..515ab3d 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/vsp/plot.jl +++ b/src/DynamicVehicleScheduling/static_vsp/plot.jl @@ -4,7 +4,7 @@ $TYPEDSIGNATURES Plot the given static VSP `instance`. """ function plot_instance( - instance::VSPInstance; + instance::StaticInstance; customer_markersize=4, depot_markersize=7, alpha_depot=0.8, diff --git a/src/DynamicVehicleScheduling/DynamicVSP/vsp/solution.jl b/src/DynamicVehicleScheduling/static_vsp/solution.jl similarity index 100% rename from src/DynamicVehicleScheduling/DynamicVSP/vsp/solution.jl rename to src/DynamicVehicleScheduling/static_vsp/solution.jl diff --git a/src/DynamicVehicleScheduling/utils.jl b/src/DynamicVehicleScheduling/utils.jl index 1e17906..36eebd2 100644 --- a/src/DynamicVehicleScheduling/utils.jl +++ b/src/DynamicVehicleScheduling/utils.jl @@ -24,6 +24,18 @@ function cost(routes::Vector{Vector{Int}}, duration::AbstractMatrix) return total end +""" +$TYPEDEF + +Basic point structure. +""" +struct Point{T} + x::T + y::T +end + +Base.show(io::IO, p::Point) = print(io, "($(p.x), $(p.y))") + # """ # $TYPEDSIGNATURES diff --git a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl index e148d5e..41801c5 100644 --- a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl +++ b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl @@ -145,10 +145,7 @@ end $TYPEDSIGNATURES """ function plot_instance( - ::StochasticVehicleSchedulingBenchmark, - sample::DataSample{<:Instance{City}}; - color_scheme=:lightrainbow, - kwargs..., + ::StochasticVehicleSchedulingBenchmark, sample::DataSample{<:Instance{City}}; kwargs... ) (; tasks, district_width, width) = sample.instance.city ticks = 0:district_width:width @@ -197,7 +194,6 @@ function plot_instance( marker_z=task.end_time, colormap=:turbo, label=nothing, - # color=palette[max(floor(Int, task.end_time), 1)], ) annotate!(fig, (points[1]..., text("$(i_task)", 10))) end diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl index e6ecb17..e443a15 100644 --- a/src/Utils/interface.jl +++ b/src/Utils/interface.jl @@ -15,28 +15,6 @@ The following methods are optional: """ abstract type AbstractBenchmark end -""" -$TYPEDEF - -Abstract type interface for stochastic benchmark problems. -This type should be used for benchmarks that involve single stage stochastic optimization problems. - -It follows the same interface as [`AbstractBenchmark`](@ref), with the addition of the following methods: -TODO -""" -abstract type AbstractStochasticBenchmark <: AbstractBenchmark end - -""" -$TYPEDEF - -Abstract type interface for dynamic benchmark problems. -This type should be used for benchmarks that involve multi-stage stochastic optimization problems. - -It follows the same interface as [`AbstractStochasticBenchmark`](@ref), with the addition of the following methods: -TODO -""" -abstract type AbstractDynamicBenchmark <: AbstractStochasticBenchmark end - """ generate_sample(::AbstractBenchmark, rng::AbstractRNG; kwargs...) -> DataSample @@ -199,3 +177,36 @@ function compute_gap( end, ) end + +""" +$TYPEDEF + +Abstract type interface for stochastic benchmark problems. +This type should be used for benchmarks that involve single stage stochastic optimization problems. + +It follows the same interface as [`AbstractBenchmark`](@ref), with the addition of the following methods: +- [`generate_anticipative_solver`](@ref) +""" +abstract type AbstractStochasticBenchmark <: AbstractBenchmark end + +# only works for exogenous noise +""" + generate_scenario(::AbstractStochasticBenchmark; kwargs...) +""" +function generate_scenario_generator end + +""" + generate_anticipative_solver(::AbstractStochasticBenchmark; kwargs...) +""" +function generate_anticipative_solver end + +""" +$TYPEDEF + +Abstract type interface for dynamic benchmark problems. +This type should be used for benchmarks that involve multi-stage stochastic optimization problems. + +It follows the same interface as [`AbstractStochasticBenchmark`](@ref), with the addition of the following methods: +TODO +""" +abstract type AbstractDynamicBenchmark <: AbstractStochasticBenchmark end diff --git a/src/Warcraft/Warcraft.jl b/src/Warcraft/Warcraft.jl index 669a828..c4dcbae 100644 --- a/src/Warcraft/Warcraft.jl +++ b/src/Warcraft/Warcraft.jl @@ -2,7 +2,7 @@ module Warcraft using ..Utils -using DataDeps +using DataDeps: @datadep_str using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES using Flux using Graphs From 149a2912d0c7545f47bf4c10e125c627c1aa24e6 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Fri, 4 Jul 2025 16:35:53 +0200 Subject: [PATCH 07/29] update --- src/DecisionFocusedLearningBenchmarks.jl | 3 +- .../DynamicVehicleScheduling.jl | 48 +++++++------------ .../environment/scenario.jl | 8 ++++ .../policy/abstract_vsp_policy.jl | 0 .../policy/anticipative_policy.jl | 0 .../{DynamicVSP => }/policy/greedy_policy.jl | 0 .../policy/kleopatra_policy.jl | 0 .../{DynamicVSP => }/policy/lazy_policy.jl | 0 src/Utils/Utils.jl | 7 ++- src/Utils/data_sample.jl | 7 ++- test/dynamic_vsp.jl | 8 ++++ 11 files changed, 46 insertions(+), 35 deletions(-) rename src/DynamicVehicleScheduling/{DynamicVSP => }/policy/abstract_vsp_policy.jl (100%) rename src/DynamicVehicleScheduling/{DynamicVSP => }/policy/anticipative_policy.jl (100%) rename src/DynamicVehicleScheduling/{DynamicVSP => }/policy/greedy_policy.jl (100%) rename src/DynamicVehicleScheduling/{DynamicVSP => }/policy/kleopatra_policy.jl (100%) rename src/DynamicVehicleScheduling/{DynamicVSP => }/policy/lazy_policy.jl (100%) create mode 100644 test/dynamic_vsp.jl diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl index 252e5d4..97f9ea6 100644 --- a/src/DecisionFocusedLearningBenchmarks.jl +++ b/src/DecisionFocusedLearningBenchmarks.jl @@ -68,7 +68,8 @@ using .DynamicVehicleScheduling # Interface export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample -export generate_sample, generate_dataset, generate_scenario +export generate_sample, + generate_dataset, generate_scenario_generator, generate_anticipative_solver export generate_statistical_model export generate_maximizer, maximizer_kwargs export objective_value diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index d003f40..e380d8e 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -31,9 +31,9 @@ include("static_vsp/plot.jl") # dynamic environment include("environment/instance.jl") -include("environment/scenario.jl") include("environment/state.jl") include("environment/environment.jl") +include("environment/scenario.jl") include("environment/plot.jl") include("DynamicVSP/algorithms/prize_collecting_vsp.jl") @@ -43,44 +43,30 @@ include("DynamicVSP/learning/features.jl") include("DynamicVSP/learning/2d_features.jl") include("DynamicVSP/learning/dataset.jl") -include("DynamicVSP/policy/abstract_vsp_policy.jl") -include("DynamicVSP/policy/greedy_policy.jl") -include("DynamicVSP/policy/lazy_policy.jl") -include("DynamicVSP/policy/anticipative_policy.jl") -include("DynamicVSP/policy/kleopatra_policy.jl") +include("policy/abstract_vsp_policy.jl") +include("policy/greedy_policy.jl") +include("policy/lazy_policy.jl") +include("policy/anticipative_policy.jl") +include("policy/kleopatra_policy.jl") struct DVSPBenchmark <: AbstractDynamicBenchmark end function Utils.generate_sample(b::DVSPBenchmark, rng::AbstractRNG) - return Instance(read_vsp_instance(readdir(datadep"dvrptw"; join=true)[1])) + return DataSample(; + instance=Instance(read_vsp_instance(readdir(datadep"dvrptw"; join=true)[1])) + ) +end + +function Utils.generate_scenario_generator(::DVSPBenchmark) + return generate_scenario +end + +function Utils.generate_anticipative_solver(::DVSPBenchmark; kwargs...) + return AnticipativeVSPPolicy(; kwargs...) end export DVSPBenchmark, generate_sample, generate_scenario export run_policy!, GreedyVSPPolicy, LazyVSPPolicy, KleopatraVSPPolicy, AnticipativeVSPPolicy -# export highs_model, filtered_readdir - -# export solve_hindsight_problem - -# export AbstractDynamicPolicy, BasicDynamicPolicy - -# export GreedyPolicy, LazyPolicy, RandomPolicy, Kleopatra - -# export run_policy - -# export compute_features, -# compute_2D_features, compute_critic_features, compute_critic_2D_features, load_dataset - -# export VSPInstance, -# read_vsp_instance, start_time, env_routes_from_state_routes, state_route_from_env_routes -# export DVSPEnv, prize_collecting_vsp -# export anticipative_solver -# export VSPSolution -# export load_VSP_dataset -# export GreedyVSPPolicy, -# LazyVSPPolicy, AnticipativeVSPPolicy, run_policy!, KleopatraVSPPolicy -# export plot_routes, plot_instance, plot_environment, plot_epoch -# export get_state -# export nb_epochs, get_epoch_indices end diff --git a/src/DynamicVehicleScheduling/environment/scenario.jl b/src/DynamicVehicleScheduling/environment/scenario.jl index de5d858..0e9e056 100644 --- a/src/DynamicVehicleScheduling/environment/scenario.jl +++ b/src/DynamicVehicleScheduling/environment/scenario.jl @@ -45,3 +45,11 @@ function generate_scenario( end return Scenario(new_indices, new_service_time, new_start_time) end + +function generate_scenario(sample::DataSample; kwargs...) + return generate_scenario(sample.instance; kwargs...) +end + +function generate_scenario(env::DVSPEnv; kwargs...) + return generate_scenario(env.instance; kwargs...) +end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl b/src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl similarity index 100% rename from src/DynamicVehicleScheduling/DynamicVSP/policy/abstract_vsp_policy.jl rename to src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl b/src/DynamicVehicleScheduling/policy/anticipative_policy.jl similarity index 100% rename from src/DynamicVehicleScheduling/DynamicVSP/policy/anticipative_policy.jl rename to src/DynamicVehicleScheduling/policy/anticipative_policy.jl diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl b/src/DynamicVehicleScheduling/policy/greedy_policy.jl similarity index 100% rename from src/DynamicVehicleScheduling/DynamicVSP/policy/greedy_policy.jl rename to src/DynamicVehicleScheduling/policy/greedy_policy.jl diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl b/src/DynamicVehicleScheduling/policy/kleopatra_policy.jl similarity index 100% rename from src/DynamicVehicleScheduling/DynamicVSP/policy/kleopatra_policy.jl rename to src/DynamicVehicleScheduling/policy/kleopatra_policy.jl diff --git a/src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl b/src/DynamicVehicleScheduling/policy/lazy_policy.jl similarity index 100% rename from src/DynamicVehicleScheduling/DynamicVSP/policy/lazy_policy.jl rename to src/DynamicVehicleScheduling/policy/lazy_policy.jl diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl index 58bc161..67c3d94 100644 --- a/src/Utils/Utils.jl +++ b/src/Utils/Utils.jl @@ -20,7 +20,12 @@ include("model_builders.jl") export DataSample export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark -export generate_dataset, generate_statistical_model, generate_maximizer, generate_sample +export generate_dataset, + generate_statistical_model, + generate_maximizer, + generate_sample, + generate_scenario_generator, + generate_anticipative_solver export plot_data, compute_gap export maximizer_kwargs export grid_graph, get_path, path_to_matrix diff --git a/src/Utils/data_sample.jl b/src/Utils/data_sample.jl index e9a8a3c..fde1bf3 100644 --- a/src/Utils/data_sample.jl +++ b/src/Utils/data_sample.jl @@ -7,10 +7,13 @@ Data sample data structure. $TYPEDFIELDS """ @kwdef struct DataSample{ - I,F<:AbstractArray,S<:Union{AbstractArray,Nothing},C<:Union{AbstractArray,Nothing} + I, + F<:Union{AbstractArray,Nothing}, + S<:Union{AbstractArray,Nothing}, + C<:Union{AbstractArray,Nothing}, } "features" - x::F + x::F = nothing "target cost parameters (optional)" θ_true::C = nothing "target solution (optional)" diff --git a/test/dynamic_vsp.jl b/test/dynamic_vsp.jl new file mode 100644 index 0000000..49c9b77 --- /dev/null +++ b/test/dynamic_vsp.jl @@ -0,0 +1,8 @@ +# @testitem "DVSP - parsing" begin +# using DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling: +# read_vsp_instance, location_count, customer_count +# path = joinpath(@__DIR__, "data", "vsp_instance.txt") +# instance = read_vsp_instance(path) +# @test location_count(instance) == 6 +# @test customer_count(instance) == 5 +# end From 7f9d322c226d8db8dd979e932693e587f6e8d7b0 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Mon, 7 Jul 2025 11:15:22 +0200 Subject: [PATCH 08/29] bugfix --- src/DecisionFocusedLearningBenchmarks.jl | 6 ++++-- .../DynamicVehicleScheduling.jl | 8 ++++++-- .../environment/environment.jl | 4 ++++ .../environment/scenario.jl | 4 ---- src/Utils/Utils.jl | 4 +++- src/Utils/interface.jl | 20 +++++++++++++++++++ 6 files changed, 37 insertions(+), 9 deletions(-) diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl index 97f9ea6..dfd0a42 100644 --- a/src/DecisionFocusedLearningBenchmarks.jl +++ b/src/DecisionFocusedLearningBenchmarks.jl @@ -68,10 +68,12 @@ using .DynamicVehicleScheduling # Interface export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample -export generate_sample, - generate_dataset, generate_scenario_generator, generate_anticipative_solver + +export generate_sample, generate_dataset, generate_environments +export generate_scenario_generator, generate_anticipative_solver export generate_statistical_model export generate_maximizer, maximizer_kwargs + export objective_value export plot_data, plot_instance, plot_solution export compute_gap diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index e380d8e..f8d2df6 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -32,8 +32,8 @@ include("static_vsp/plot.jl") # dynamic environment include("environment/instance.jl") include("environment/state.jl") -include("environment/environment.jl") include("environment/scenario.jl") +include("environment/environment.jl") include("environment/plot.jl") include("DynamicVSP/algorithms/prize_collecting_vsp.jl") @@ -65,7 +65,11 @@ function Utils.generate_anticipative_solver(::DVSPBenchmark; kwargs...) return AnticipativeVSPPolicy(; kwargs...) end -export DVSPBenchmark, generate_sample, generate_scenario +function Utils.generate_environment(::DVSPBenchmark, instance::Instance; kwargs...) + return DVSPEnv(instance; kwargs...) +end + +export DVSPBenchmark, generate_environment # , generate_sample, generate_anticipative_solver export run_policy!, GreedyVSPPolicy, LazyVSPPolicy, KleopatraVSPPolicy, AnticipativeVSPPolicy diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl index 8109c0e..a09db9a 100644 --- a/src/DynamicVehicleScheduling/environment/environment.jl +++ b/src/DynamicVehicleScheduling/environment/environment.jl @@ -84,3 +84,7 @@ function CommonRLInterface.act!(env::DVSPEnv, routes, scenario=env.scenario) add_new_customers!(env.state, env.instance; scenario[current_epoch(env)]...) return reward end + +function generate_scenario(env::DVSPEnv; kwargs...) + return generate_scenario(env.instance; kwargs...) +end diff --git a/src/DynamicVehicleScheduling/environment/scenario.jl b/src/DynamicVehicleScheduling/environment/scenario.jl index 0e9e056..cee4fe7 100644 --- a/src/DynamicVehicleScheduling/environment/scenario.jl +++ b/src/DynamicVehicleScheduling/environment/scenario.jl @@ -49,7 +49,3 @@ end function generate_scenario(sample::DataSample; kwargs...) return generate_scenario(sample.instance; kwargs...) end - -function generate_scenario(env::DVSPEnv; kwargs...) - return generate_scenario(env.instance; kwargs...) -end diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl index 67c3d94..7a1e804 100644 --- a/src/Utils/Utils.jl +++ b/src/Utils/Utils.jl @@ -25,7 +25,9 @@ export generate_dataset, generate_maximizer, generate_sample, generate_scenario_generator, - generate_anticipative_solver + generate_anticipative_solver, + generate_environment, + generate_environments export plot_data, compute_gap export maximizer_kwargs export grid_graph, get_path, path_to_matrix diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl index e443a15..3b70f9e 100644 --- a/src/Utils/interface.jl +++ b/src/Utils/interface.jl @@ -210,3 +210,23 @@ It follows the same interface as [`AbstractStochasticBenchmark`](@ref), with the TODO """ abstract type AbstractDynamicBenchmark <: AbstractStochasticBenchmark end + +""" + generate_environment(::AbstractDynamicBenchmark, instance; kwargs...) + +Initialize an environment for the given dynamic benchmark instance. +""" +function generate_environment end + +""" +$TYPEDSIGNATURES + +Generate a vector of environments for the given dynamic benchmark and dataset. +""" +function generate_environments( + bench::AbstractDynamicBenchmark, dataset::Vector{<:DataSample}, kwargs... +) + return map(dataset) do sample + generate_environment(bench, sample.instance; kwargs...) + end +end From 9fe5e86f3d124f0c6b5f1fe88d99c65a35e5dfaa Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Mon, 7 Jul 2025 17:08:24 +0200 Subject: [PATCH 09/29] now anticipative solver sirectly creates an epoch dataset --- .../DynamicVSP/learning/2d_features.jl | 13 -- .../DynamicVehicleScheduling.jl | 18 ++- .../algorithms/anticipative_solver.jl | 74 ++++++++- .../algorithms/prize_collecting_vsp.jl | 150 +++++++++--------- .../environment/environment.jl | 14 +- .../learning/2d_features.jl | 16 ++ .../{DynamicVSP => }/learning/dataset.jl | 0 .../{DynamicVSP => }/learning/features.jl | 0 8 files changed, 176 insertions(+), 109 deletions(-) delete mode 100644 src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl rename src/DynamicVehicleScheduling/{DynamicVSP => }/algorithms/anticipative_solver.jl (59%) rename src/DynamicVehicleScheduling/{DynamicVSP => }/algorithms/prize_collecting_vsp.jl (59%) create mode 100644 src/DynamicVehicleScheduling/learning/2d_features.jl rename src/DynamicVehicleScheduling/{DynamicVSP => }/learning/dataset.jl (100%) rename src/DynamicVehicleScheduling/{DynamicVSP => }/learning/features.jl (100%) diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl b/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl deleted file mode 100644 index 7226e9c..0000000 --- a/src/DynamicVehicleScheduling/DynamicVSP/learning/2d_features.jl +++ /dev/null @@ -1,13 +0,0 @@ -function get_features_meanTimeToRequests(env::DVSPEnv) - quantiles = [0.5] - a = env.instance.static_instance.duration[env.state.location_indices, 2:end] - quantileTimeToRequests = mapslices(x -> quantile(x, quantiles), a; dims=2) - return quantileTimeToRequests -end - -function compute_2D_features(env::DVSPEnv) - state = env.state - timeDepotRequest = state.state_instance.duration[:, 1][state.is_postponable] - quantileTimeToRequests = get_features_meanTimeToRequests(env)[state.is_postponable] - return hcat(timeDepotRequest, quantileTimeToRequests)' -end diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index f8d2df6..0b5649a 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -36,12 +36,12 @@ include("environment/scenario.jl") include("environment/environment.jl") include("environment/plot.jl") -include("DynamicVSP/algorithms/prize_collecting_vsp.jl") -include("DynamicVSP/algorithms/anticipative_solver.jl") +include("algorithms/prize_collecting_vsp.jl") +include("algorithms/anticipative_solver.jl") -include("DynamicVSP/learning/features.jl") -include("DynamicVSP/learning/2d_features.jl") -include("DynamicVSP/learning/dataset.jl") +include("learning/features.jl") +include("learning/2d_features.jl") +include("learning/dataset.jl") include("policy/abstract_vsp_policy.jl") include("policy/greedy_policy.jl") @@ -62,14 +62,18 @@ function Utils.generate_scenario_generator(::DVSPBenchmark) end function Utils.generate_anticipative_solver(::DVSPBenchmark; kwargs...) - return AnticipativeVSPPolicy(; kwargs...) + return anticipative_solver end function Utils.generate_environment(::DVSPBenchmark, instance::Instance; kwargs...) return DVSPEnv(instance; kwargs...) end -export DVSPBenchmark, generate_environment # , generate_sample, generate_anticipative_solver +function Utils.generate_maximizer(::DVSPBenchmark) + return prize_collecting_vsp +end + +export DVSPBenchmark #, generate_environment # , generate_sample, generate_anticipative_solver export run_policy!, GreedyVSPPolicy, LazyVSPPolicy, KleopatraVSPPolicy, AnticipativeVSPPolicy diff --git a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl similarity index 59% rename from src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl rename to src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl index ef897e5..4ff78e2 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/anticipative_solver.jl +++ b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl @@ -116,6 +116,76 @@ function anticipative_solver( optimize!(model) - return JuMP.objective_value(model), - retrieve_routes_anticipative(value.(y), env, customer_index) + obj = JuMP.objective_value(model) + epoch_routes = retrieve_routes_anticipative(value.(y), env, customer_index) + + epoch_indices = Vector{Int}[] + N = 1 + indices = [1] + for epoch in 1:last_epoch + M = length(scenario.indices[epoch]) + indices = vcat(indices, (N + 1):(N + M)) + push!(epoch_indices, copy(indices)) + N = N + M + epoch_routes[epoch] + dispatched = vcat(epoch_routes[epoch]...) + indices = setdiff(indices, dispatched) + end + + indices = vcat(1, scenario.indices...) + start_time = vcat(0.0, scenario.start_time...) + service_time = vcat(0.0, scenario.service_time...) + + dataset = map(1:last_epoch) do epoch + routes = epoch_routes[epoch] + epoch_customers = epoch_indices[epoch] + # y_true = [ + # map(idx -> findfirst(==(idx), epoch_customers), route) for route in routes + # ] + + y_true = + VSPSolution( + Vector{Int}[ + map(idx -> findfirst(==(idx), epoch_customers), route) for + route in routes + ]; + max_index=length(epoch_customers), + ).edge_matrix + + location_indices = indices[epoch_customers] + new_coordinates = env.instance.static_instance.coordinate[location_indices] + new_start_time = start_time[epoch_customers] + new_service_time = service_time[epoch_customers] + new_duration = env.instance.static_instance.duration[ + location_indices, location_indices + ] + static_instance = StaticInstance( + new_coordinates, new_service_time, new_start_time, new_duration + ) + + is_must_dispatch = falses(length(location_indices)) + is_postponable = falses(length(location_indices)) + + epoch_duration = env.instance.epoch_duration + Δ_dispatch = env.instance.Δ_dispatch + planning_start_time = (epoch - 1) * epoch_duration + Δ_dispatch + is_must_dispatch[2:end] .= + planning_start_time .+ epoch_duration .+ @view(new_duration[1, 2:end]) .> + new_start_time[2:end] + is_postponable[2:end] .= .!is_must_dispatch[2:end] + + state = DVSPState(; + state_instance=static_instance, + is_must_dispatch, + is_postponable, + location_indices, + current_epoch=epoch, + ) + + x = compute_2D_features(state, env.instance) + + return DataSample(; instance=state, y_true, x) + end + + return obj, dataset end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl b/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl similarity index 59% rename from src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl rename to src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl index 75af6a4..14c51f9 100644 --- a/src/DynamicVehicleScheduling/DynamicVSP/algorithms/prize_collecting_vsp.jl +++ b/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl @@ -127,83 +127,83 @@ function prize_collecting_vsp( return retrieve_routes(value.(y), graph) end -# ? -function prize_collecting_vsp_Q( - θ::AbstractVector, - vals::AbstractVector; - instance::DVSPState, - model_builder=highs_model, - kwargs..., -) - (; duration) = instance.instance - graph = create_graph(instance) - model = model_builder() - set_silent(model) - nb_nodes = nv(graph) - job_indices = 2:(nb_nodes) - @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0) - θ_ext = fill(0.0, location_count(instance.instance)) # no prize for must dispatch requests, only hard constraints - θ_ext[instance.is_postponable] .= θ - # v_ext = fill(0.0, nb_locations(instance.instance)) # no prize for must dispatch requests, only hard constraints - # v_ext[instance.is_postponable] .= vals - @objective( - model, - Max, - sum( - (θ_ext[dst(edge)] + vals[dst(edge)] - duration[src(edge), dst(edge)]) * - y[src(edge), dst(edge)] for edge in edges(graph) - ) - ) - @constraint( - model, - flow[i in 2:nb_nodes], - sum(y[j, i] for j in inneighbors(graph, i)) == - sum(y[i, j] for j in outneighbors(graph, i)) - ) - @constraint( - model, demand[i in job_indices], sum(y[j, i] for j in inneighbors(graph, i)) <= 1 - ) - # must dispatch constraints - @constraint( - model, - demand_must_dispatch[i in job_indices; instance.is_must_dispatch[i]], - sum(y[j, i] for j in inneighbors(graph, i)) == 1 - ) - optimize!(model) - return retrieve_routes(value.(y), graph) -end +# # ? +# function prize_collecting_vsp_Q( +# θ::AbstractVector, +# vals::AbstractVector; +# instance::DVSPState, +# model_builder=highs_model, +# kwargs..., +# ) +# (; duration) = instance.instance +# graph = create_graph(instance) +# model = model_builder() +# set_silent(model) +# nb_nodes = nv(graph) +# job_indices = 2:(nb_nodes) +# @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0) +# θ_ext = fill(0.0, location_count(instance.instance)) # no prize for must dispatch requests, only hard constraints +# θ_ext[instance.is_postponable] .= θ +# # v_ext = fill(0.0, nb_locations(instance.instance)) # no prize for must dispatch requests, only hard constraints +# # v_ext[instance.is_postponable] .= vals +# @objective( +# model, +# Max, +# sum( +# (θ_ext[dst(edge)] + vals[dst(edge)] - duration[src(edge), dst(edge)]) * +# y[src(edge), dst(edge)] for edge in edges(graph) +# ) +# ) +# @constraint( +# model, +# flow[i in 2:nb_nodes], +# sum(y[j, i] for j in inneighbors(graph, i)) == +# sum(y[i, j] for j in outneighbors(graph, i)) +# ) +# @constraint( +# model, demand[i in job_indices], sum(y[j, i] for j in inneighbors(graph, i)) <= 1 +# ) +# # must dispatch constraints +# @constraint( +# model, +# demand_must_dispatch[i in job_indices; instance.is_must_dispatch[i]], +# sum(y[j, i] for j in inneighbors(graph, i)) == 1 +# ) +# optimize!(model) +# return retrieve_routes(value.(y), graph) +# end -function my_objective_value(θ, routes; instance) - (; duration) = instance.instance - total = 0.0 - θ_ext = fill(0.0, location_count(instance)) - θ_ext[instance.is_postponable] .= θ - for route in routes - for (u, v) in partition(vcat(1, route), 2, 1) - total += θ_ext[v] - duration[u, v] - end - end - return -total -end +# function my_objective_value(θ, routes; instance) +# (; duration) = instance.instance +# total = 0.0 +# θ_ext = fill(0.0, location_count(instance)) +# θ_ext[instance.is_postponable] .= θ +# for route in routes +# for (u, v) in partition(vcat(1, route), 2, 1) +# total += θ_ext[v] - duration[u, v] +# end +# end +# return -total +# end -function _objective_value(θ, routes; instance) - (; duration) = instance.instance - total = 0.0 - θ_ext = fill(0.0, location_count(instance)) - θ_ext[instance.is_postponable] .= θ - mapping = cumsum(instance.is_postponable) - g = falses(length(θ)) - for route in routes - for (u, v) in partition(vcat(1, route), 2, 1) - total -= duration[u, v] - if instance.is_postponable[v] - total += θ_ext[v] - g[mapping[v]] = 1 - end - end - end - return -total, g -end +# function _objective_value(θ, routes; instance) +# (; duration) = instance.instance +# total = 0.0 +# θ_ext = fill(0.0, location_count(instance)) +# θ_ext[instance.is_postponable] .= θ +# mapping = cumsum(instance.is_postponable) +# g = falses(length(θ)) +# for route in routes +# for (u, v) in partition(vcat(1, route), 2, 1) +# total -= duration[u, v] +# if instance.is_postponable[v] +# total += θ_ext[v] +# g[mapping[v]] = 1 +# end +# end +# end +# return -total, g +# end # function ChainRulesCore.rrule(::typeof(my_objective_value), θ, routes; instance) # total, g = _objective_value(θ, routes; instance) diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl index a09db9a..ffab69b 100644 --- a/src/DynamicVehicleScheduling/environment/environment.jl +++ b/src/DynamicVehicleScheduling/environment/environment.jl @@ -52,15 +52,6 @@ Check if the episode is terminated, i.e. if the current epoch is the last one. """ CommonRLInterface.terminated(env::DVSPEnv) = current_epoch(env) >= last_epoch(env) -""" -draw new customers in scenario -""" -function draw_next_epoch!(env::DVSPEnv, scenario=env.scenario) - env.state.current_epoch += 1 - - return nothing -end - """ $TYPEDSIGNATURES @@ -78,10 +69,9 @@ remove dispatched customers, advance time, and add new requests to the environme function CommonRLInterface.act!(env::DVSPEnv, routes, scenario=env.scenario) reward = -apply_routes!(env.state, routes) env.state.current_epoch += 1 - if current_epoch(env) > last_epoch(env) - return nothing + if current_epoch(env) <= last_epoch(env) + add_new_customers!(env.state, env.instance; scenario[current_epoch(env)]...) end - add_new_customers!(env.state, env.instance; scenario[current_epoch(env)]...) return reward end diff --git a/src/DynamicVehicleScheduling/learning/2d_features.jl b/src/DynamicVehicleScheduling/learning/2d_features.jl new file mode 100644 index 0000000..6e23810 --- /dev/null +++ b/src/DynamicVehicleScheduling/learning/2d_features.jl @@ -0,0 +1,16 @@ +function get_features_meanTimeToRequests(state::DVSPState, instance::Instance) + quantiles = [0.5] + a = instance.static_instance.duration[state.location_indices, 2:end] + quantileTimeToRequests = mapslices(x -> quantile(x, quantiles), a; dims=2) + return quantileTimeToRequests +end + +function compute_2D_features(state::DVSPState, instance::Instance) + timeDepotRequest = state.state_instance.duration[:, 1][state.is_postponable] + quantileTimeToRequests = get_features_meanTimeToRequests(state, instance)[state.is_postponable] + return hcat(timeDepotRequest, quantileTimeToRequests)' +end + +function compute_2D_features(env::DVSPEnv) + return compute_2D_features(env.state, env.instance) +end diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl b/src/DynamicVehicleScheduling/learning/dataset.jl similarity index 100% rename from src/DynamicVehicleScheduling/DynamicVSP/learning/dataset.jl rename to src/DynamicVehicleScheduling/learning/dataset.jl diff --git a/src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl b/src/DynamicVehicleScheduling/learning/features.jl similarity index 100% rename from src/DynamicVehicleScheduling/DynamicVSP/learning/features.jl rename to src/DynamicVehicleScheduling/learning/features.jl From 67a0fa9ecc70243d64fab6bbf0bdd08e4006b44e Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Mon, 7 Jul 2025 18:00:39 +0200 Subject: [PATCH 10/29] fix tests and cleanup --- Project.toml | 2 + docs/src/warcraft.md | 155 +++++++++++ .../DynamicVehicleScheduling.jl | 39 ++- .../abstract_policy.jl | 5 - .../environment/environment.jl | 5 +- .../environment/instance.jl | 8 - .../environment/plot.jl | 242 +++++++++--------- .../learning/dataset.jl | 37 --- src/DynamicVehicleScheduling/maximizer.jl | 25 ++ .../policy/abstract_vsp_policy.jl | 6 + src/DynamicVehicleScheduling/utils.jl | 9 - 11 files changed, 340 insertions(+), 193 deletions(-) create mode 100644 docs/src/warcraft.md delete mode 100644 src/DynamicVehicleScheduling/abstract_policy.jl delete mode 100644 src/DynamicVehicleScheduling/learning/dataset.jl create mode 100644 src/DynamicVehicleScheduling/maximizer.jl diff --git a/Project.toml b/Project.toml index b9695a6..03e2d27 100644 --- a/Project.toml +++ b/Project.toml @@ -13,6 +13,7 @@ Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b" Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0" +InferOpt = "4846b161-c94e-4150-8dac-c7ae193c601f" Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9" IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" @@ -40,6 +41,7 @@ Flux = "0.14, 0.15, 0.16" Graphs = "1.11" HiGHS = "1.9" Images = "0.26.1" +InferOpt = "0.7.0" Ipopt = "1.6" IterTools = "1.10.0" JSON = "0.21.4" diff --git a/docs/src/warcraft.md b/docs/src/warcraft.md new file mode 100644 index 0000000..c3400e7 --- /dev/null +++ b/docs/src/warcraft.md @@ -0,0 +1,155 @@ +```@meta +EditURL = "tutorials/warcraft.jl" +``` + +# Path-finding on image maps + +In this tutorial, we showcase DecisionFocusedLearningBenchmarks.jl capabilities on one of its main benchmarks: the Warcraft benchmark. +This benchmark problem is a simple path-finding problem where the goal is to find the shortest path between the top left and bottom right corners of a given image map. +The map is represented as a 2D image representing a 12x12 grid, each cell having an unknown travel cost depending on the terrain type. + +First, let's load the package and create a benchmark object as follows: + +````@example warcraft +using DecisionFocusedLearningBenchmarks +b = WarcraftBenchmark() +```` + +## Dataset generation + +These benchmark objects behave as generators that can generate various needed elements in order to build an algorithm to tackle the problem. +First of all, all benchmarks are capable of generating datasets as needed, using the [`generate_dataset`](@ref) method. +This method takes as input the benchmark object for which the dataset is to be generated, and a second argument specifying the number of samples to generate: + +````@example warcraft +dataset = generate_dataset(b, 50); +nothing #hide +```` + +We obtain a vector of [`DataSample`](@ref) objects, containing all needed data for the problem. +Subdatasets can be created through regular slicing: + +````@example warcraft +train_dataset, test_dataset = dataset[1:45], dataset[46:50] +```` + +And getting an individual sample will return a [`DataSample`](@ref) with four fields: `x`, `instance`, `θ`, and `y`: + +````@example warcraft +sample = test_dataset[1] +```` + +`x` correspond to the input features, i.e. the input image (3D array) in the Warcraft benchmark case: + +````@example warcraft +x = sample.x +```` + +`θ_true` correspond to the true unknown terrain weights. We use the opposite of the true weights in order to formulate the optimization problem as a maximization problem: + +````@example warcraft +θ_true = sample.θ_true +```` + +`y_true` correspond to the optimal shortest path, encoded as a binary matrix: + +````@example warcraft +y_true = sample.y_true +```` + +`instance` is not used in this benchmark, therefore set to nothing: + +````@example warcraft +isnothing(sample.instance) +```` + +For some benchmarks, we provide the following plotting method [`plot_data`](@ref) to visualize the data: + +````@example warcraft +plot_data(b, sample) +```` + +We can see here the terrain image, the true terrain weights, and the true shortest path avoiding the high cost cells. + +## Building a pipeline + +DecisionFocusedLearningBenchmarks also provides methods to build an hybrid machine learning and combinatorial optimization pipeline for the benchmark. +First, the [`generate_statistical_model`](@ref) method generates a machine learning predictor to predict cell weights from the input image: + +````@example warcraft +model = generate_statistical_model(b) +```` + +In the case of the Warcraft benchmark, the model is a convolutional neural network built using the Flux.jl package. + +````@example warcraft +θ = model(x) +```` + +Note that the model is not trained yet, and its parameters are randomly initialized. + +Finally, the [`generate_maximizer`](@ref) method can be used to generate a combinatorial optimization algorithm that takes the predicted cell weights as input and returns the corresponding shortest path: + +````@example warcraft +maximizer = generate_maximizer(b; dijkstra=true) +```` + +In the case o fthe Warcraft benchmark, the method has an additional keyword argument to chose the algorithm to use: Dijkstra's algorithm or Bellman-Ford algorithm. + +````@example warcraft +y = maximizer(θ) +```` + +As we can see, currently the pipeline predicts random noise as cell weights, and therefore the maximizer returns a straight line path. + +````@example warcraft +plot_data(b, DataSample(; x, θ_true=θ, y_true=y)) +```` + +We can evaluate the current pipeline performance using the optimality gap metric: + +````@example warcraft +starting_gap = compute_gap(b, test_dataset, model, maximizer) +```` + +## Using a learning algorithm + +We can now train the model using the InferOpt.jl package: + +````@example warcraft +using InferOpt +using Flux +using Plots + +perturbed_maximizer = PerturbedMultiplicative(maximizer; ε=0.2, nb_samples=100) +loss = FenchelYoungLoss(perturbed_maximizer) + +starting_gap = compute_gap(b, test_dataset, model, maximizer) + +opt_state = Flux.setup(Adam(1e-3), model) +loss_history = Float64[] +for epoch in 1:50 + val, grads = Flux.withgradient(model) do m + sum(loss(m(x), y_true) for (; x, y_true) in train_dataset) / length(train_dataset) + end + Flux.update!(opt_state, model, grads[1]) + push!(loss_history, val) +end + +plot(loss_history; xlabel="Epoch", ylabel="Loss", title="Training loss") +```` + +````@example warcraft +final_gap = compute_gap(b, test_dataset, model, maximizer) +```` + +````@example warcraft +θ = model(x) +y = maximizer(θ) +plot_data(b, DataSample(; x, θ_true=θ, y_true=y)) +```` + +--- + +*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).* + diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index 0b5649a..34476b7 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -5,11 +5,10 @@ using ..Utils using Base: @kwdef using CommonRLInterface: CommonRLInterface, AbstractEnv, reset!, terminated, observe, act! using DataDeps: @datadep_str -# using ChainRulesCore using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES using Graphs using HiGHS -# using InferOpt +using InferOpt: LinearMaximizer using IterTools: partition using JSON using JuMP @@ -21,8 +20,6 @@ using Statistics: mean, quantile include("utils.jl") -include("abstract_policy.jl") - # static vsp stuff include("static_vsp/instance.jl") include("static_vsp/parsing.jl") @@ -41,7 +38,6 @@ include("algorithms/anticipative_solver.jl") include("learning/features.jl") include("learning/2d_features.jl") -include("learning/dataset.jl") include("policy/abstract_vsp_policy.jl") include("policy/greedy_policy.jl") @@ -49,12 +45,33 @@ include("policy/lazy_policy.jl") include("policy/anticipative_policy.jl") include("policy/kleopatra_policy.jl") -struct DVSPBenchmark <: AbstractDynamicBenchmark end +include("maximizer.jl") + +""" +$TYPEDEF + +Abstract type for dynamic vehicle scheduling benchmarks. +""" +@kwdef struct DVSPBenchmark <: AbstractDynamicBenchmark + max_requests_per_epoch::Int = 10 + Δ_dispatch::Float64 = 1.0 + epoch_duration::Float64 = 1.0 +end -function Utils.generate_sample(b::DVSPBenchmark, rng::AbstractRNG) - return DataSample(; - instance=Instance(read_vsp_instance(readdir(datadep"dvrptw"; join=true)[1])) - ) +function Utils.generate_dataset(b::DVSPBenchmark, dataset_size::Int=1) + (; max_requests_per_epoch, Δ_dispatch, epoch_duration) = b + files = readdir(datadep"dvrptw"; join=true) + dataset_size = min(dataset_size, length(files)) + return [ + DataSample(; + instance=Instance( + read_vsp_instance(files[i]); + max_requests_per_epoch, + Δ_dispatch, + epoch_duration, + ), + ) for i in 1:dataset_size + ] end function Utils.generate_scenario_generator(::DVSPBenchmark) @@ -70,7 +87,7 @@ function Utils.generate_environment(::DVSPBenchmark, instance::Instance; kwargs. end function Utils.generate_maximizer(::DVSPBenchmark) - return prize_collecting_vsp + return LinearMaximizer(oracle; g, h) end export DVSPBenchmark #, generate_environment # , generate_sample, generate_anticipative_solver diff --git a/src/DynamicVehicleScheduling/abstract_policy.jl b/src/DynamicVehicleScheduling/abstract_policy.jl deleted file mode 100644 index 951efd0..0000000 --- a/src/DynamicVehicleScheduling/abstract_policy.jl +++ /dev/null @@ -1,5 +0,0 @@ -abstract type AbstractDynamicPolicy end - -function (π::AbstractDynamicPolicy)(env; kwargs...) - throw("Not implemented") -end diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl index ffab69b..0bbc4ae 100644 --- a/src/DynamicVehicleScheduling/environment/environment.jl +++ b/src/DynamicVehicleScheduling/environment/environment.jl @@ -45,12 +45,13 @@ $TYPEDSIGNATURES Get the planning start time of the environment, i.e. the time at which vehicles routes dispatched in current epoch can depart. """ planning_start_time(env::DVSPEnv) = time(env) + Δ_dispatch(env) + """ $TYPEDSIGNATURES Check if the episode is terminated, i.e. if the current epoch is the last one. """ -CommonRLInterface.terminated(env::DVSPEnv) = current_epoch(env) >= last_epoch(env) +CommonRLInterface.terminated(env::DVSPEnv) = current_epoch(env) > last_epoch(env) """ $TYPEDSIGNATURES @@ -69,7 +70,7 @@ remove dispatched customers, advance time, and add new requests to the environme function CommonRLInterface.act!(env::DVSPEnv, routes, scenario=env.scenario) reward = -apply_routes!(env.state, routes) env.state.current_epoch += 1 - if current_epoch(env) <= last_epoch(env) + if !CommonRLInterface.terminated(env) add_new_customers!(env.state, env.instance; scenario[current_epoch(env)]...) end return reward diff --git a/src/DynamicVehicleScheduling/environment/instance.jl b/src/DynamicVehicleScheduling/environment/instance.jl index b375077..17d5e9d 100644 --- a/src/DynamicVehicleScheduling/environment/instance.jl +++ b/src/DynamicVehicleScheduling/environment/instance.jl @@ -14,8 +14,6 @@ Instance data structure for the dynamic vehicle scheduling problem. epoch_duration::T = 1.0 "last epoch index" last_epoch::Int - # "seed for customer sampling" - # seed::S end function Instance( @@ -44,9 +42,3 @@ end epoch_duration(instance::Instance) = instance.epoch_duration last_epoch(instance::Instance) = instance.last_epoch max_requests_per_epoch(instance::Instance) = instance.max_requests_per_epoch -# static_instance(instance::Instance) = instance.static_instance - -# duration(instance::Instance) = duration(instance.static_instance) -# service_time(instance::Instance) = service_time(instance.static_instance) -# coordinate(instance::Instance) = coordinate(instance.static_instance) -# start_time(instance::Instance) = start_time(instance.static_instance) diff --git a/src/DynamicVehicleScheduling/environment/plot.jl b/src/DynamicVehicleScheduling/environment/plot.jl index 409ad79..53831e8 100644 --- a/src/DynamicVehicleScheduling/environment/plot.jl +++ b/src/DynamicVehicleScheduling/environment/plot.jl @@ -1,134 +1,134 @@ -""" -$TYPEDSIGNATURES +# """ +# $TYPEDSIGNATURES -Plot the environment of a DVSPEnv, restricted to the given `epoch_indices` (all epoch if not given). -""" -function plot_environment( - env::DVSPEnv; - customer_markersize=4, - depot_markersize=7, - alpha_depot=0.8, - depot_color=:lightgreen, - epoch_indices=nothing, - kwargs..., -) - draw_all_epochs!(env) +# Plot the environment of a DVSPEnv, restricted to the given `epoch_indices` (all epoch if not given). +# """ +# function plot_environment( +# env::DVSPEnv; +# customer_markersize=4, +# depot_markersize=7, +# alpha_depot=0.8, +# depot_color=:lightgreen, +# epoch_indices=nothing, +# kwargs..., +# ) +# draw_all_epochs!(env) - epoch_appearance = env.request_epoch - coordinates = coordinate(get_state(env)) +# epoch_appearance = env.request_epoch +# coordinates = coordinate(get_state(env)) - epoch_indices = isnothing(epoch_indices) ? get_epoch_indices(env) : epoch_indices +# epoch_indices = isnothing(epoch_indices) ? get_epoch_indices(env) : epoch_indices - xlims = (minimum(c.x for c in coordinates), maximum(c.x for c in coordinates)) - ylims = (minimum(c.y for c in coordinates), maximum(c.y for c in coordinates)) +# xlims = (minimum(c.x for c in coordinates), maximum(c.x for c in coordinates)) +# ylims = (minimum(c.y for c in coordinates), maximum(c.y for c in coordinates)) - fig = plot(; - legend=:topleft, - xlabel="x coordinate", - ylabel="y coordinate", - xlims, - ylims, - kwargs..., - ) +# fig = plot(; +# legend=:topleft, +# xlabel="x coordinate", +# ylabel="y coordinate", +# xlims, +# ylims, +# kwargs..., +# ) - for epoch in epoch_indices - requests = findall(epoch_appearance .== epoch) - x = [coordinates[request].x for request in requests] - y = [coordinates[request].y for request in requests] - scatter!( - fig, x, y; label="Epoch $epoch", marker=:circle, markersize=customer_markersize - ) - end - scatter!( - fig, - [coordinates[1].x], - [coordinates[1].y]; - label="Depot", - markercolor=depot_color, - marker=:rect, - markersize=depot_markersize, - alpha=alpha_depot, - ) +# for epoch in epoch_indices +# requests = findall(epoch_appearance .== epoch) +# x = [coordinates[request].x for request in requests] +# y = [coordinates[request].y for request in requests] +# scatter!( +# fig, x, y; label="Epoch $epoch", marker=:circle, markersize=customer_markersize +# ) +# end +# scatter!( +# fig, +# [coordinates[1].x], +# [coordinates[1].y]; +# label="Depot", +# markercolor=depot_color, +# marker=:rect, +# markersize=depot_markersize, +# alpha=alpha_depot, +# ) - return fig -end +# return fig +# end -""" -$TYPEDSIGNATURES +# """ +# $TYPEDSIGNATURES -Plot the given `routes`` for a VSP `state`. -""" -function plot_epoch(state::DVSPState, routes; kwargs...) - (; coordinate, start_time) = state.instance - x_depot = coordinate[1].x - y_depot = coordinate[1].y - X = [p.x for p in coordinate] - Y = [p.y for p in coordinate] - markersize = 5 - fig = plot(; - legend=:topleft, xlabel="x", ylabel="y", clim=(0.0, maximum(start_time)), kwargs... - ) - for route in routes - x_points = vcat(x_depot, X[route], x_depot) - y_points = vcat(y_depot, Y[route], y_depot) - plot!(fig, x_points, y_points; label=nothing) - end - scatter!( - fig, - [x_depot], - [y_depot]; - label="depot", - markercolor=:lightgreen, - markersize, - marker=:rect, - ) - if sum(state.is_postponable) > 0 - scatter!( - fig, - X[state.is_postponable], - Y[state.is_postponable]; - label="Postponable customers", - marker_z=start_time[state.is_postponable], - markersize, - colormap=:turbo, - marker=:utriangle, - ) - end - if sum(state.is_must_dispatch) > 0 - scatter!( - fig, - X[state.is_must_dispatch], - Y[state.is_must_dispatch]; - label="Must-dispatch customers", - marker_z=start_time[state.is_must_dispatch], - markersize, - colormap=:turbo, - marker=:star5, - ) - end - return fig -end +# Plot the given `routes`` for a VSP `state`. +# """ +# function plot_epoch(state::DVSPState, routes; kwargs...) +# (; coordinate, start_time) = state.instance +# x_depot = coordinate[1].x +# y_depot = coordinate[1].y +# X = [p.x for p in coordinate] +# Y = [p.y for p in coordinate] +# markersize = 5 +# fig = plot(; +# legend=:topleft, xlabel="x", ylabel="y", clim=(0.0, maximum(start_time)), kwargs... +# ) +# for route in routes +# x_points = vcat(x_depot, X[route], x_depot) +# y_points = vcat(y_depot, Y[route], y_depot) +# plot!(fig, x_points, y_points; label=nothing) +# end +# scatter!( +# fig, +# [x_depot], +# [y_depot]; +# label="depot", +# markercolor=:lightgreen, +# markersize, +# marker=:rect, +# ) +# if sum(state.is_postponable) > 0 +# scatter!( +# fig, +# X[state.is_postponable], +# Y[state.is_postponable]; +# label="Postponable customers", +# marker_z=start_time[state.is_postponable], +# markersize, +# colormap=:turbo, +# marker=:utriangle, +# ) +# end +# if sum(state.is_must_dispatch) > 0 +# scatter!( +# fig, +# X[state.is_must_dispatch], +# Y[state.is_must_dispatch]; +# label="Must-dispatch customers", +# marker_z=start_time[state.is_must_dispatch], +# markersize, +# colormap=:turbo, +# marker=:star5, +# ) +# end +# return fig +# end -""" -$TYPEDSIGNATURES +# """ +# $TYPEDSIGNATURES -Create a plot of routes for each epoch. -""" -function plot_routes(env::DVSPEnv, routes; epoch_indices=nothing, kwargs...) - reset!(env) - epoch_indices = isnothing(epoch_indices) ? get_epoch_indices(env) : epoch_indices +# Create a plot of routes for each epoch. +# """ +# function plot_routes(env::DVSPEnv, routes; epoch_indices=nothing, kwargs...) +# reset!(env) +# epoch_indices = isnothing(epoch_indices) ? get_epoch_indices(env) : epoch_indices - coordinates = env.config.static_instance.coordinate - xlims = (minimum(c.x for c in coordinates), maximum(c.x for c in coordinates)) - ylims = (minimum(c.y for c in coordinates), maximum(c.y for c in coordinates)) +# coordinates = env.config.static_instance.coordinate +# xlims = (minimum(c.x for c in coordinates), maximum(c.x for c in coordinates)) +# ylims = (minimum(c.y for c in coordinates), maximum(c.y for c in coordinates)) - figs = map(epoch_indices) do epoch - s = next_epoch!(env) - fig = plot_epoch( - s, state_route_from_env_routes(env, routes[epoch]); xlims, ylims, kwargs... - ) - apply_decision!(env, routes[epoch]) - return fig - end - return figs -end +# figs = map(epoch_indices) do epoch +# s = next_epoch!(env) +# fig = plot_epoch( +# s, state_route_from_env_routes(env, routes[epoch]); xlims, ylims, kwargs... +# ) +# apply_decision!(env, routes[epoch]) +# return fig +# end +# return figs +# end diff --git a/src/DynamicVehicleScheduling/learning/dataset.jl b/src/DynamicVehicleScheduling/learning/dataset.jl deleted file mode 100644 index da37b59..0000000 --- a/src/DynamicVehicleScheduling/learning/dataset.jl +++ /dev/null @@ -1,37 +0,0 @@ -function load_VSP_dataset( - datadir::String; model_builder=highs_model, use_2D_features=false, kwargs... -) - instances_files = filtered_readdir(datadir) - X = Tuple{Matrix{Float32},DVSPState{VSPInstance{Float64}}}[] - Y = BitMatrix[] - - for (i, f) in enumerate(instances_files) - static_instance = read_vsp_instance((joinpath(datadir, f))) - env = DVSPEnv(static_instance; seed=i, kwargs...) - - # Compute the anticipative policy - routes_anticipative = anticipative_solver(env; model_builder) - reset!(env) - for e in eachindex(routes_anticipative) - next_epoch!(env) - # Store the state - state = env.state - features = Matrix( - use_2D_features ? compute_2D_features(env) : compute_features(env) - ) - push!(X, (features, state)) - routes = routes_anticipative[e] - # Store the solution - push!( - Y, - VSPSolution( - state_route_from_env_routes(env, routes); - max_index=location_count(state.instance), - ).edge_matrix, - ) - # Update the environment - apply_decision!(env, routes) - end - end - return X, Y -end diff --git a/src/DynamicVehicleScheduling/maximizer.jl b/src/DynamicVehicleScheduling/maximizer.jl new file mode 100644 index 0000000..eecbf57 --- /dev/null +++ b/src/DynamicVehicleScheduling/maximizer.jl @@ -0,0 +1,25 @@ +function oracle(θ; instance::DVSPState, kwargs...) + routes = prize_collecting_vsp(θ; instance=instance, kwargs...) + return VSPSolution( + routes; max_index=location_count(instance.state_instance) + ).edge_matrix +end + +function g(y; instance, kwargs...) + return vec(sum(y[:, instance.is_postponable]; dims=1)) +end + +function h(y, duration) + value = 0.0 + N = size(duration, 1) + for i in 1:N + for j in 1:N + value -= y[i, j] * duration[i, j] + end + end + return value +end + +function h(y; instance, kwargs...) + return h(y, instance.state_instance.duration) +end diff --git a/src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl b/src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl index bd640fe..0a1f755 100644 --- a/src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl +++ b/src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl @@ -1,3 +1,9 @@ +abstract type AbstractDynamicPolicy end + +function (π::AbstractDynamicPolicy)(env; kwargs...) + throw("Not implemented") +end + """ $TYPEDEF diff --git a/src/DynamicVehicleScheduling/utils.jl b/src/DynamicVehicleScheduling/utils.jl index 36eebd2..bd1dfe8 100644 --- a/src/DynamicVehicleScheduling/utils.jl +++ b/src/DynamicVehicleScheduling/utils.jl @@ -35,12 +35,3 @@ struct Point{T} end Base.show(io::IO, p::Point) = print(io, "($(p.x), $(p.y))") - -# """ -# $TYPEDSIGNATURES - -# Readdir all files in `data` with extension `file_extension`. -# """ -# function filtered_readdir(data, file_extension=".txt"; kwargs...) -# return filter(x -> endswith(x, file_extension), readdir(data; kwargs...)) -# end From 20e8a4ffaedb5439d57a4ef3c4336375bda7876b Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Mon, 7 Jul 2025 18:27:19 +0200 Subject: [PATCH 11/29] fix features --- .../algorithms/anticipative_solver.jl | 6 +- .../learning/features.jl | 67 +++++-------------- 2 files changed, 19 insertions(+), 54 deletions(-) diff --git a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl index 4ff78e2..d0404b2 100644 --- a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl +++ b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl @@ -139,9 +139,6 @@ function anticipative_solver( dataset = map(1:last_epoch) do epoch routes = epoch_routes[epoch] epoch_customers = epoch_indices[epoch] - # y_true = [ - # map(idx -> findfirst(==(idx), epoch_customers), route) for route in routes - # ] y_true = VSPSolution( @@ -182,7 +179,8 @@ function anticipative_solver( current_epoch=epoch, ) - x = compute_2D_features(state, env.instance) + # x = compute_2D_features(state, env.instance) + x = compute_features(state, env.instance) return DataSample(; instance=state, y_true, x) end diff --git a/src/DynamicVehicleScheduling/learning/features.jl b/src/DynamicVehicleScheduling/learning/features.jl index 0cb4160..c9470c3 100644 --- a/src/DynamicVehicleScheduling/learning/features.jl +++ b/src/DynamicVehicleScheduling/learning/features.jl @@ -1,21 +1,19 @@ -function get_features_quantileTimeToRequests(env::DVSPEnv) +function get_features_quantileTimeToRequests(state::DVSPState, instance::Instance) quantiles = [i * 0.1 for i in 1:9] - a = env.config.static_instance.duration[ - env.customer_index[.!env.request_is_dispatched], 2:end - ] + a = instance.static_instance.duration[state.location_indices, 2:end] quantileTimeToRequests = mapslices(x -> quantile(x, quantiles), a; dims=2) return quantileTimeToRequests end -function compute_model_free_features(state::DVSPState; env::DVSPEnv) - (; instance, is_postponable) = state +function compute_model_free_features(state::DVSPState, instance::Instance) + (; state_instance, is_postponable) = state - startTimes = instance.start_time - endTimes = startTimes .+ instance.service_time - timeDepotRequest = instance.duration[:, 1] - timeRequestDepot = instance.duration[1, :] + startTimes = state_instance.start_time + endTimes = startTimes .+ state_instance.service_time + timeDepotRequest = state_instance.duration[:, 1] + timeRequestDepot = state_instance.duration[1, :] - slack_next_epoch = startTimes .- env.config.epoch_duration + slack_next_epoch = startTimes .- instance.epoch_duration model_free_features = hcat( startTimes[is_postponable], # 1 @@ -27,49 +25,18 @@ function compute_model_free_features(state::DVSPState; env::DVSPEnv) return model_free_features end -function compute_model_aware_features(state::DVSPState; env::DVSPEnv) - quantileTimeToRequests = get_features_quantileTimeToRequests(env) +function compute_model_aware_features(state::DVSPState, instance::Instance) + quantileTimeToRequests = get_features_quantileTimeToRequests(state, instance) model_aware_features = quantileTimeToRequests return model_aware_features[state.is_postponable, :] end -function compute_features(env::DVSPEnv) - state = env.state - model_free_features = compute_model_free_features(state; env) - model_aware_features = compute_model_aware_features(state; env) +function compute_features(state::DVSPState, instance::Instance) + model_free_features = compute_model_free_features(state, instance) + model_aware_features = compute_model_aware_features(state, instance) return hcat(model_free_features, model_aware_features)' end -# # ? why is this needed -# function model_free_features_critic(state::DVSPState; env::DVSPEnv) -# (; instance) = state -# startTimes = instance.start_time -# endTimes = instance.service_time .+ instance.start_time -# timeDepotRequest = instance.duration[:, 1] -# timeRequestDepot = instance.duration[1, :] -# slack_next_epoch = startTimes .- env.config.epoch_duration -# model_free_features = hcat( -# startTimes, endTimes, timeDepotRequest, timeRequestDepot, slack_next_epoch -# ) -# return model_free_features -# end - -# # ? -# function compute_critic_features(env::DVSPEnv) -# state = env.state -# model_free_features = model_free_features_critic(state; env) -# model_aware_features = get_features_quantileTimeToRequests(env) -# postpon = state.is_postponable -# return hcat(model_free_features, model_aware_features, postpon)' -# end - -# # ? -# function compute_critic_2D_features(env::DVSPEnv) -# state = env.state -# timeDepotRequest = state.instance.duration[:, 1] -# quantileTimeToRequests = get_features_meanTimeToRequests(env) -# postpon = state.is_postponable -# # time_postpon = timeDepotRequest .* postpon -# # quant_postpon = quantileTimeToRequests .* postpon -# return hcat(timeDepotRequest, quantileTimeToRequests, postpon)' -# end +function compute_features(env::DVSPEnv) + return compute_features(env.state, env.instance) +end From b30fe3316c356967dbfe92c855d3156712930c53 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 10 Jul 2025 18:07:27 +0200 Subject: [PATCH 12/29] update --- src/DynamicAssortment/DynamicAssortment.jl | 0 .../DynamicVehicleScheduling.jl | 24 ++++++++++++------- .../algorithms/anticipative_solver.jl | 24 +++++++++++++++++-- .../environment/plot.jl | 4 ++++ src/Utils/interface.jl | 4 +++- 5 files changed, 45 insertions(+), 11 deletions(-) create mode 100644 src/DynamicAssortment/DynamicAssortment.jl diff --git a/src/DynamicAssortment/DynamicAssortment.jl b/src/DynamicAssortment/DynamicAssortment.jl new file mode 100644 index 0000000..e69de29 diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index 34476b7..d57644b 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -51,11 +51,19 @@ include("maximizer.jl") $TYPEDEF Abstract type for dynamic vehicle scheduling benchmarks. + +# Fields +$TYPEDFIELDS """ @kwdef struct DVSPBenchmark <: AbstractDynamicBenchmark + "todo" max_requests_per_epoch::Int = 10 + "todo" Δ_dispatch::Float64 = 1.0 + "todo" epoch_duration::Float64 = 1.0 + "todo" + two_dimensional_features::Bool = false end function Utils.generate_dataset(b::DVSPBenchmark, dataset_size::Int=1) @@ -74,14 +82,6 @@ function Utils.generate_dataset(b::DVSPBenchmark, dataset_size::Int=1) ] end -function Utils.generate_scenario_generator(::DVSPBenchmark) - return generate_scenario -end - -function Utils.generate_anticipative_solver(::DVSPBenchmark; kwargs...) - return anticipative_solver -end - function Utils.generate_environment(::DVSPBenchmark, instance::Instance; kwargs...) return DVSPEnv(instance; kwargs...) end @@ -90,6 +90,14 @@ function Utils.generate_maximizer(::DVSPBenchmark) return LinearMaximizer(oracle; g, h) end +function Utils.generate_scenario_generator(::DVSPBenchmark) + return generate_scenario +end + +function Utils.generate_anticipative_solver(b::DVSPBenchmark; kwargs...) + return AnticipativeSolver(b.two_dimensional_features) +end + export DVSPBenchmark #, generate_environment # , generate_sample, generate_anticipative_solver export run_policy!, GreedyVSPPolicy, LazyVSPPolicy, KleopatraVSPPolicy, AnticipativeVSPPolicy diff --git a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl index d0404b2..dd52d19 100644 --- a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl +++ b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl @@ -41,7 +41,11 @@ Solve the anticipative VSP problem for environment `env`. For this, it uses the current environment history, so make sure that the environment is terminated before calling this method. """ function anticipative_solver( - env::DVSPEnv, scenario=env.scenario; model_builder=highs_model, reset_env=false + env::DVSPEnv, + scenario=env.scenario; + model_builder=highs_model, + reset_env=false, + two_dimensional_features=false, ) reset_env && reset!(env) request_epoch = [0] @@ -180,10 +184,26 @@ function anticipative_solver( ) # x = compute_2D_features(state, env.instance) - x = compute_features(state, env.instance) + x = if two_dimensional_features + compute_2D_features(state, env.instance) + else + compute_features(state, env.instance) + end return DataSample(; instance=state, y_true, x) end return obj, dataset end + +@kwdef struct AnticipativeSolver + is_2D::Bool = false +end + +function (solver::AnticipativeSolver)(env::DVSPEnv, scenario=env.scenario; reset_env=false) + if solver.is_2D + return anticipative_solver(env, scenario; model_builder=highs_model_2d, reset_env) + else + return anticipative_solver(env, scenario; model_builder=highs_model, reset_env) + end +end diff --git a/src/DynamicVehicleScheduling/environment/plot.jl b/src/DynamicVehicleScheduling/environment/plot.jl index 53831e8..adb0fa6 100644 --- a/src/DynamicVehicleScheduling/environment/plot.jl +++ b/src/DynamicVehicleScheduling/environment/plot.jl @@ -1,3 +1,7 @@ +function plot_instance(env::DVSPEnv; kwargs...) + return plot_instance(env.instance.static_instance; kwargs...) +end + # """ # $TYPEDSIGNATURES diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl index 3b70f9e..ce9b6ed 100644 --- a/src/Utils/interface.jl +++ b/src/Utils/interface.jl @@ -189,9 +189,11 @@ It follows the same interface as [`AbstractBenchmark`](@ref), with the addition """ abstract type AbstractStochasticBenchmark <: AbstractBenchmark end +function generate_scenario end + # only works for exogenous noise """ - generate_scenario(::AbstractStochasticBenchmark; kwargs...) + generate_scenario_generator(::AbstractStochasticBenchmark; kwargs...) """ function generate_scenario_generator end From 0c7e20add763cfd0f259729dc091d3be3558aed7 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Fri, 11 Jul 2025 17:30:38 +0200 Subject: [PATCH 13/29] first version of DynamicAssortmentBenchmark --- Project.toml | 2 + src/DecisionFocusedLearningBenchmarks.jl | 3 + src/DynamicAssortment/DynamicAssortment.jl | 36 +++ src/DynamicAssortment/environment.jl | 271 +++++++++++++++++++++ 4 files changed, 312 insertions(+) create mode 100644 src/DynamicAssortment/environment.jl diff --git a/Project.toml b/Project.toml index 03e2d27..68a66e4 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["Members of JuliaDecisionFocusedLearning"] version = "0.2.2" [deps] +Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" CommonRLInterface = "d842c3ba-07a1-494f-bbec-f5741b0a3e98" ConstrainedShortestPaths = "b3798467-87dc-4d99-943d-35a1bd39e395" DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe" @@ -32,6 +33,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] +Combinatorics = "1.0.3" CommonRLInterface = "0.3.3" ConstrainedShortestPaths = "0.6.0" DataDeps = "0.7" diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl index dfd0a42..4199b49 100644 --- a/src/DecisionFocusedLearningBenchmarks.jl +++ b/src/DecisionFocusedLearningBenchmarks.jl @@ -55,6 +55,7 @@ include("FixedSizeShortestPath/FixedSizeShortestPath.jl") include("PortfolioOptimization/PortfolioOptimization.jl") include("StochasticVehicleScheduling/StochasticVehicleScheduling.jl") include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl") +include("DynamicAssortment/DynamicAssortment.jl") using .Utils using .Argmax @@ -65,6 +66,7 @@ using .FixedSizeShortestPath using .PortfolioOptimization using .StochasticVehicleScheduling using .DynamicVehicleScheduling +using .DynamicAssortment # Interface export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample @@ -87,5 +89,6 @@ export FixedSizeShortestPathBenchmark export PortfolioOptimizationBenchmark export StochasticVehicleSchedulingBenchmark export DVSPBenchmark +export DynamicAssortmentBenchmark end # module DecisionFocusedLearningBenchmarks diff --git a/src/DynamicAssortment/DynamicAssortment.jl b/src/DynamicAssortment/DynamicAssortment.jl index e69de29..fd6c80e 100644 --- a/src/DynamicAssortment/DynamicAssortment.jl +++ b/src/DynamicAssortment/DynamicAssortment.jl @@ -0,0 +1,36 @@ +module DynamicAssortment + +using ..Utils + +using CommonRLInterface: CommonRLInterface, AbstractEnv +using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES +using Distributions: Uniform, Categorical +using LinearAlgebra: dot +using Random: Random, AbstractRNG, MersenneTwister +using Statistics: mean + +using Flux: Chain, Dense +# using Flux.Optimise +# using InferOpt +# using Random +# using JLD2 +# using Plots +# using Distributions +# using LinearAlgebra +using Combinatorics: combinations + +include("environment.jl") + +struct DynamicAssortmentBenchmark <: AbstractDynamicBenchmark end + +function Utils.generate_sample(::DynamicAssortmentBenchmark) + return DataSample(; instance=Instance()) +end + +function Utils.generate_maximizer(::DynamicAssortmentBenchmark) + return DAP_optimization +end + +export DynamicAssortmentBenchmark + +end diff --git a/src/DynamicAssortment/environment.jl b/src/DynamicAssortment/environment.jl new file mode 100644 index 0000000..9272a8f --- /dev/null +++ b/src/DynamicAssortment/environment.jl @@ -0,0 +1,271 @@ +""" +$TYPEDEF + +Feature 1:d Random static feature +Feature 3: Hype +Feature 4: Satisfaction +Feature 5: Price + +# Fields +$TYPEDFIELDS +""" +@kwdef struct Instance{M} + "customer choice model" + customer_choice_model::M = Chain(Dense([0.3 0.5 0.6 -0.4 -0.8 0.0]), vec) + "number of items" + N::Int = 20 + "dimension of feature vectors (in addition to hype, satisfaction, and price)" + d::Int = 2 + "assortment size constraint" + K::Int = 4 + "number of steps per episode" + max_steps::Int = 80 + "flags if the environment is endogenous" + endogenous::Bool = true +end + +@kwdef mutable struct Environment{R<:AbstractRNG} <: AbstractEnv + "associated instance" + instance::Instance + "current step" + step::Int + "purchase history" + purchase_hist::Vector{Int} + "rng" + rng::R + "seed for RNG" + seed::Int + "customer utility for each item" + utility::Vector{Float64} + "prices for each item" + prices::Vector{Float64} + "current full features" + features::Matrix{Float64} + "starting satisfaction + hype features" + start_features::Matrix{Float64} + "satisfaction + hype feature change from the last step" + d_features::Matrix{Float64} +end + +function Environment( + instance::Instance; seed::Int=0, rng::AbstractRNG=MersenneTwister(seed) +) + return Environment(; + instance=instance, + step=1, + purchase_hist=Int[], + rng=rng, + seed=seed, + utility=zeros(instance.N), + prices=zeros(instance.N + 1), + features=zeros(instance.d + 4, instance.N), + start_features=zeros(2, instance.N), + d_features=zeros(2, instance.N), + ) +end + +## Basic operations of environment + +# Reset the environment +function CommonRLInterface.reset!(env::Environment; reset_seed=false, seed=env.seed) + env.seed = seed + if reset_seed + Random.seed!(env.rng, env.seed) + end + (; d, N, customer_choice_model) = env.instance + features = rand(env.rng, Uniform(1.0, 10.0), (d + 3, N)) + env.prices = vcat(features[end, :], 0.0) + features = vcat(features, ones(1, N)) + env.d_features .= 0.0 + env.step = 1 + env.utility .= customer_choice_model(features) + env.features .= features + env.start_features .= features[(d + 1):(d + 2), :] + env.purchase_hist = Int[] + return nothing +end + +# Update the hype vector +function hype_update!(env::Environment) + hype_vector = ones(env.instance.N) + env.purchase_hist[end] != 0 ? hype_vector[env.purchase_hist[end]] += 0.02 : nothing + if length(env.purchase_hist) >= 2 + if env.purchase_hist[end - 1] != 0 + hype_vector[env.purchase_hist[end - 1]] -= 0.005 + else + nothing + end + if length(env.purchase_hist) >= 3 + if env.purchase_hist[end - 2] != 0 + hype_vector[env.purchase_hist[end - 2]] -= 0.005 + else + nothing + end + if length(env.purchase_hist) >= 4 + if env.purchase_hist[end - 3] != 0 + hype_vector[env.purchase_hist[end - 3]] -= 0.005 + else + nothing + end + if length(env.purchase_hist) >= 5 + if env.purchase_hist[end - 4] != 0 + hype_vector[env.purchase_hist[end - 4]] -= 0.005 + else + nothing + end + end + end + end + end + return hype_vector +end + +# Step function +function step!(env::Environment, item) + old_features = copy(env.features) + push!(env.purchase_hist, item) + if env.instance.endogenous + hype_vector = hype_update!(env) + env.features[3, :] .*= hype_vector + item != 0 ? env.features[4, item] *= 1.01 : nothing + env.features[6, :] .+= 9 / env.instance.max_steps # ?? + end + env.d_features = env.features[3:4, :] - old_features[3:4, :] # ! hardcoded everywhere :( + env.step += 1 + return nothing +end + +# Choice probabilities +function choice_probabilities(env::Environment, S) + θ = env.utility + exp_values = [exp(θ[i]) * S[i] for i in 1:(env.instance.N)] + denominator = 1 + sum(exp_values) + probs = [exp_values[i] / denominator for i in 1:(env.instance.N)] + push!(probs, 1 / denominator) # Probability of no purchase + return probs +end + +# Purchase decision +function purchase!(env::Environment, S) + r = env.prices + probs = choice_probabilities(env, S) + item = rand(env.rng, Categorical(probs)) + item == env.instance.N + 1 ? item = 0 : item # TODO: cleanup this, not really needed and confusing + item != 0 ? revenue = r[item] : revenue = 0.0 + return item, revenue +end + +# enumerate all possible assortments of size K and return the best one +# ? can't we do better than that, probably +function expert_solution(env::Environment) + r = env.prices + local best_S + best_revenue = 0.0 + for S in combinations(1:(env.instance.N), env.instance.K) + S_vec = zeros(env.instance.N) + S_vec[S] .= 1.0 + probs = choice_probabilities(env, S_vec) + expected_revenue = dot(probs, r) + if expected_revenue > best_revenue + best_S, best_revenue = S_vec, expected_revenue + end + end + return best_S +end + +# DAP CO-layer +function DAP_optimization(θ; instance::Instance) + solution = partialsortperm(θ, 1:(instance.K); rev=true) # It never makes sense not to show k items + S = zeros(instance.N) + S[solution] .= 1 + return S +end + +## Solution functions + +# Anticipative (fixed) +function expert_policy(env::Environment, episodes; first_seed=1, use_oracle=false) + dataset = [] + rev_global = Float64[] + for i in 1:episodes + rev_episode = 0.0 + CommonRLInterface.reset!(env; seed=first_seed - 1 + i, reset_seed=true) + done = false + training_instances = [] + while !done + S = expert_solution(env) + + delta_features = env.features[3:4, :] .- env.start_features # ! hardcoded + feature_vector = vcat(env.features, env.d_features, delta_features) + push!(training_instances, (features=feature_vector, S_t=S)) + + item, revenue = purchase!(env, S) + rev_episode += revenue + step!(env, item) + + env.step > env.instance.max_steps ? done = true : done = false + end + push!(rev_global, rev_episode) + push!(dataset, training_instances) + end + return mean(rev_global), rev_global, dataset +end + +# Greedy heuristic +function model_greedy(features) + model = Chain(Dense([0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0]), vec) + return model(features) +end + +# Random heuristic +function model_random(features) + rand_seed = Int(round(sum(features))) + return rand(MersenneTwister(rand_seed), Uniform(0.0, 1.0), size(features)[2]) +end + +# Episode generation +function generate_episode(env::Environment, model, customer_model, sigma, random_seed) + buffer = [] + start_features, d_features = reset!(env; seed=random_seed) + features = copy(start_features) + done = false + while !done + delta_features = features[3:4, :] .- start_features[3:4, :] + r = features[5, :] + feature_vector = vcat(features, d_features, delta_features) + θ = model(feature_vector) + η = rand(MersenneTwister(random_seed * env.step), p(θ, sigma), 1)[:, 1] + S = DAP_optimization(η; instance=env.instance) + θ_0 = customer_model(features) + item, revenue = purchase!(env, S) + features, d_features = step!(env, features, item) + feat_next = vcat(features, d_features, features[3:4, :] .- start_features[3:4, :]) + push!( + buffer, + ( + t=env.step - 1, + feat_t=feature_vector, + theta=θ, + eta=η, + S_t=S, + a_T=item, + rev_t=revenue, + ret_t=0.0, + feat_next=feat_next, + ), + ) + count(!iszero, inventory) < env.instance.K ? break : nothing + env.step > env.instance.max_steps ? done = true : done = false + end + for i in (length(buffer) - 1):-1:1 + if i == length(buffer) - 1 + ret = buffer[i].rev_t + else + ret = buffer[i].rev_t + 0.99 * buffer[i + 1].ret_t + end + traj = buffer[i] + traj_updated = (; traj..., ret_t=ret) + buffer[i] = traj_updated + end + return buffer +end From e8057cf35af6cf027de04ade458897e80ed633f6 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Tue, 15 Jul 2025 08:02:04 +0200 Subject: [PATCH 14/29] wip --- src/DynamicAssortment/environment.jl | 111 ++++++++++++++------------- 1 file changed, 56 insertions(+), 55 deletions(-) diff --git a/src/DynamicAssortment/environment.jl b/src/DynamicAssortment/environment.jl index 9272a8f..26e900c 100644 --- a/src/DynamicAssortment/environment.jl +++ b/src/DynamicAssortment/environment.jl @@ -11,7 +11,7 @@ $TYPEDFIELDS """ @kwdef struct Instance{M} "customer choice model" - customer_choice_model::M = Chain(Dense([0.3 0.5 0.6 -0.4 -0.8 0.0]), vec) + customer_choice_model::M = Chain(Dense([0.3 0.5 0.6 -0.4 -0.8]), vec) "number of items" N::Int = 20 "dimension of feature vectors (in addition to hype, satisfaction, and price)" @@ -22,6 +22,7 @@ $TYPEDFIELDS max_steps::Int = 80 "flags if the environment is endogenous" endogenous::Bool = true + # start_features? end @kwdef mutable struct Environment{R<:AbstractRNG} <: AbstractEnv @@ -58,7 +59,7 @@ function Environment( seed=seed, utility=zeros(instance.N), prices=zeros(instance.N + 1), - features=zeros(instance.d + 4, instance.N), + features=zeros(instance.d + 3, instance.N), start_features=zeros(2, instance.N), d_features=zeros(2, instance.N), ) @@ -75,7 +76,7 @@ function CommonRLInterface.reset!(env::Environment; reset_seed=false, seed=env.s (; d, N, customer_choice_model) = env.instance features = rand(env.rng, Uniform(1.0, 10.0), (d + 3, N)) env.prices = vcat(features[end, :], 0.0) - features = vcat(features, ones(1, N)) + # features = vcat(features, ones(1, N)) # TODO env.d_features .= 0.0 env.step = 1 env.utility .= customer_choice_model(features) @@ -128,7 +129,7 @@ function step!(env::Environment, item) hype_vector = hype_update!(env) env.features[3, :] .*= hype_vector item != 0 ? env.features[4, item] *= 1.01 : nothing - env.features[6, :] .+= 9 / env.instance.max_steps # ?? + # env.features[6, :] .+= 9 / env.instance.max_steps # ?? end env.d_features = env.features[3:4, :] - old_features[3:4, :] # ! hardcoded everywhere :( env.step += 1 @@ -146,13 +147,14 @@ function choice_probabilities(env::Environment, S) end # Purchase decision -function purchase!(env::Environment, S) +function CommonRLInterface.act!(env::Environment, S) r = env.prices probs = choice_probabilities(env, S) item = rand(env.rng, Categorical(probs)) + reward = r[item] item == env.instance.N + 1 ? item = 0 : item # TODO: cleanup this, not really needed and confusing - item != 0 ? revenue = r[item] : revenue = 0.0 - return item, revenue + step!(env, item) + return reward end # enumerate all possible assortments of size K and return the best one @@ -199,9 +201,8 @@ function expert_policy(env::Environment, episodes; first_seed=1, use_oracle=fals feature_vector = vcat(env.features, env.d_features, delta_features) push!(training_instances, (features=feature_vector, S_t=S)) - item, revenue = purchase!(env, S) - rev_episode += revenue - step!(env, item) + reward = CommonRLInterface.act!(env, S) + rev_episode += reward env.step > env.instance.max_steps ? done = true : done = false end @@ -224,48 +225,48 @@ function model_random(features) end # Episode generation -function generate_episode(env::Environment, model, customer_model, sigma, random_seed) - buffer = [] - start_features, d_features = reset!(env; seed=random_seed) - features = copy(start_features) - done = false - while !done - delta_features = features[3:4, :] .- start_features[3:4, :] - r = features[5, :] - feature_vector = vcat(features, d_features, delta_features) - θ = model(feature_vector) - η = rand(MersenneTwister(random_seed * env.step), p(θ, sigma), 1)[:, 1] - S = DAP_optimization(η; instance=env.instance) - θ_0 = customer_model(features) - item, revenue = purchase!(env, S) - features, d_features = step!(env, features, item) - feat_next = vcat(features, d_features, features[3:4, :] .- start_features[3:4, :]) - push!( - buffer, - ( - t=env.step - 1, - feat_t=feature_vector, - theta=θ, - eta=η, - S_t=S, - a_T=item, - rev_t=revenue, - ret_t=0.0, - feat_next=feat_next, - ), - ) - count(!iszero, inventory) < env.instance.K ? break : nothing - env.step > env.instance.max_steps ? done = true : done = false - end - for i in (length(buffer) - 1):-1:1 - if i == length(buffer) - 1 - ret = buffer[i].rev_t - else - ret = buffer[i].rev_t + 0.99 * buffer[i + 1].ret_t - end - traj = buffer[i] - traj_updated = (; traj..., ret_t=ret) - buffer[i] = traj_updated - end - return buffer -end +# function generate_episode(env::Environment, model, customer_model, sigma, random_seed) +# buffer = [] +# start_features, d_features = reset!(env; seed=random_seed) +# features = copy(start_features) +# done = false +# while !done +# delta_features = features[3:4, :] .- start_features[3:4, :] +# r = features[5, :] +# feature_vector = vcat(features, d_features, delta_features) +# θ = model(feature_vector) +# η = rand(MersenneTwister(random_seed * env.step), p(θ, sigma), 1)[:, 1] +# S = DAP_optimization(η; instance=env.instance) +# θ_0 = customer_model(features) +# item, revenue = purchase!(env, S) +# features, d_features = step!(env, item) +# feat_next = vcat(features, d_features, features[3:4, :] .- start_features[3:4, :]) +# push!( +# buffer, +# ( +# t=env.step - 1, +# feat_t=feature_vector, +# theta=θ, +# eta=η, +# S_t=S, +# a_T=item, +# rev_t=revenue, +# ret_t=0.0, +# feat_next=feat_next, +# ), +# ) +# count(!iszero, inventory) < env.instance.K ? break : nothing +# env.step > env.instance.max_steps ? done = true : done = false +# end +# for i in (length(buffer) - 1):-1:1 +# if i == length(buffer) - 1 +# ret = buffer[i].rev_t +# else +# ret = buffer[i].rev_t + 0.99 * buffer[i + 1].ret_t +# end +# traj = buffer[i] +# traj_updated = (; traj..., ret_t=ret) +# buffer[i] = traj_updated +# end +# return buffer +# end From 5f29047b187534fc0aa35912647e798545be1f19 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Wed, 6 Aug 2025 15:41:53 +0200 Subject: [PATCH 15/29] Advance dynamic assortment --- src/DecisionFocusedLearningBenchmarks.jl | 3 +- src/DynamicAssortment/DynamicAssortment.jl | 68 +++- src/DynamicAssortment/environment.jl | 306 +++++++----------- src/DynamicAssortment/instance.jl | 33 ++ .../DynamicVehicleScheduling.jl | 2 +- src/Utils/Utils.jl | 4 + src/Utils/interface.jl | 17 +- src/Utils/maximizers.jl | 22 ++ 8 files changed, 250 insertions(+), 205 deletions(-) create mode 100644 src/DynamicAssortment/instance.jl create mode 100644 src/Utils/maximizers.jl diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl index 4199b49..14fd2af 100644 --- a/src/DecisionFocusedLearningBenchmarks.jl +++ b/src/DecisionFocusedLearningBenchmarks.jl @@ -71,10 +71,11 @@ using .DynamicAssortment # Interface export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample -export generate_sample, generate_dataset, generate_environments +export generate_sample, generate_dataset, generate_environments, generate_environment export generate_scenario_generator, generate_anticipative_solver export generate_statistical_model export generate_maximizer, maximizer_kwargs +export is_exogenous, is_endogenous export objective_value export plot_data, plot_instance, plot_solution diff --git a/src/DynamicAssortment/DynamicAssortment.jl b/src/DynamicAssortment/DynamicAssortment.jl index fd6c80e..f5abbbe 100644 --- a/src/DynamicAssortment/DynamicAssortment.jl +++ b/src/DynamicAssortment/DynamicAssortment.jl @@ -10,25 +10,69 @@ using Random: Random, AbstractRNG, MersenneTwister using Statistics: mean using Flux: Chain, Dense -# using Flux.Optimise -# using InferOpt -# using Random -# using JLD2 -# using Plots -# using Distributions -# using LinearAlgebra using Combinatorics: combinations +""" +$TYPEDEF + +Benchmark for the dynamic assortment problem. + +# Fields +$TYPEDFIELDS +""" +struct DynamicAssortmentBenchmark{exogenous,M} <: AbstractDynamicBenchmark{exogenous} + "customer choice model (price, hype, saturation, and features)" + customer_choice_model::M + "number of items" + N::Int + "dimension of feature vectors (in addition to hype, satisfaction, and price)" + d::Int + "assortment size constraint" + K::Int + "number of steps per episode" + max_steps::Int +end + +function DynamicAssortmentBenchmark(; + N=20, + d=2, + K=4, + max_steps=80, + customer_choice_model=Chain(Dense([-0.8 0.6 -0.4 0.3 0.5]), vec), + exogenous=false, +) + return DynamicAssortmentBenchmark{exogenous,typeof(customer_choice_model)}( + customer_choice_model, N, d, K, max_steps + ) +end + +include("instance.jl") include("environment.jl") -struct DynamicAssortmentBenchmark <: AbstractDynamicBenchmark end +customer_choice_model(b::DynamicAssortmentBenchmark) = b.customer_choice_model +item_count(b::DynamicAssortmentBenchmark) = b.N +feature_count(b::DynamicAssortmentBenchmark) = b.d +assortment_size(b::DynamicAssortmentBenchmark) = b.K +max_steps(b::DynamicAssortmentBenchmark) = b.max_steps + +function Utils.generate_sample( + b::DynamicAssortmentBenchmark, rng::AbstractRNG=MersenneTwister(0); seed=nothing +) + Random.seed!(rng, seed) + return DataSample(; instance=Instance(b, rng)) +end -function Utils.generate_sample(::DynamicAssortmentBenchmark) - return DataSample(; instance=Instance()) +function Utils.generate_maximizer(b::DynamicAssortmentBenchmark) + return TopKMaximizer(assortment_size(b)) end -function Utils.generate_maximizer(::DynamicAssortmentBenchmark) - return DAP_optimization +function Utils.generate_environment( + ::DynamicAssortmentBenchmark, + instance::Instance; + seed=nothing, + rng::AbstractRNG=MersenneTwister(seed), +) + return Environment(instance; seed=seed, rng=rng) end export DynamicAssortmentBenchmark diff --git a/src/DynamicAssortment/environment.jl b/src/DynamicAssortment/environment.jl index 26e900c..f8617c0 100644 --- a/src/DynamicAssortment/environment.jl +++ b/src/DynamicAssortment/environment.jl @@ -1,272 +1,210 @@ """ $TYPEDEF -Feature 1:d Random static feature -Feature 3: Hype -Feature 4: Satisfaction -Feature 5: Price +Environment for the dynamic assortment problem. # Fields $TYPEDFIELDS """ -@kwdef struct Instance{M} - "customer choice model" - customer_choice_model::M = Chain(Dense([0.3 0.5 0.6 -0.4 -0.8]), vec) - "number of items" - N::Int = 20 - "dimension of feature vectors (in addition to hype, satisfaction, and price)" - d::Int = 2 - "assortment size constraint" - K::Int = 4 - "number of steps per episode" - max_steps::Int = 80 - "flags if the environment is endogenous" - endogenous::Bool = true - # start_features? -end - -@kwdef mutable struct Environment{R<:AbstractRNG} <: AbstractEnv +@kwdef mutable struct Environment{I<:Instance,R<:AbstractRNG,S<:Union{Nothing,Int}} <: + AbstractEnv "associated instance" - instance::Instance + instance::I "current step" step::Int - "purchase history" + "purchase history (used to update hype feature)" purchase_hist::Vector{Int} "rng" rng::R "seed for RNG" - seed::Int + seed::S "customer utility for each item" utility::Vector{Float64} - "prices for each item" - prices::Vector{Float64} "current full features" features::Matrix{Float64} - "starting satisfaction + hype features" - start_features::Matrix{Float64} "satisfaction + hype feature change from the last step" d_features::Matrix{Float64} end -function Environment( - instance::Instance; seed::Int=0, rng::AbstractRNG=MersenneTwister(seed) -) - return Environment(; - instance=instance, +function Environment(instance::Instance; seed=0, rng::AbstractRNG=MersenneTwister(seed)) + N = item_count(instance) + (; prices, features, starting_hype_and_saturation) = instance + full_features = vcat( + reshape(prices[1:(end - 1)], 1, :), starting_hype_and_saturation, features + ) + model = customer_choice_model(instance) + env = Environment(; + instance, step=1, purchase_hist=Int[], rng=rng, seed=seed, - utility=zeros(instance.N), - prices=zeros(instance.N + 1), - features=zeros(instance.d + 3, instance.N), - start_features=zeros(2, instance.N), - d_features=zeros(2, instance.N), + utility=model(full_features), + features=full_features, + d_features=zeros(2, N), ) + CommonRLInterface.reset!(env; reset_seed=true) + return env end +customer_choice_model(b::Environment) = customer_choice_model(b.instance) +item_count(b::Environment) = item_count(b.instance) +feature_count(b::Environment) = feature_count(b.instance) +assortment_size(b::Environment) = assortment_size(b.instance) +max_steps(b::Environment) = max_steps(b.instance) +prices(b::Environment) = b.instance.prices +# features(b::Environment) = b.instance.features +# starting_hype_and_saturation(b::Environment) = b.instance.starting_hype_and_saturation + ## Basic operations of environment # Reset the environment function CommonRLInterface.reset!(env::Environment; reset_seed=false, seed=env.seed) - env.seed = seed - if reset_seed - Random.seed!(env.rng, env.seed) - end - (; d, N, customer_choice_model) = env.instance - features = rand(env.rng, Uniform(1.0, 10.0), (d + 3, N)) - env.prices = vcat(features[end, :], 0.0) - # features = vcat(features, ones(1, N)) # TODO - env.d_features .= 0.0 + reset_seed && Random.seed!(env.rng, seed) + env.step = 1 - env.utility .= customer_choice_model(features) + + (; prices, starting_hype_and_saturation, features) = env.instance + features = vcat( + reshape(prices[1:(end - 1)], 1, :), starting_hype_and_saturation, features + ) env.features .= features - env.start_features .= features[(d + 1):(d + 2), :] - env.purchase_hist = Int[] + + env.d_features .= 0.0 + + model = customer_choice_model(env) + env.utility .= model(features) + + empty!(env.purchase_hist) return nothing end -# Update the hype vector -function hype_update!(env::Environment) - hype_vector = ones(env.instance.N) - env.purchase_hist[end] != 0 ? hype_vector[env.purchase_hist[end]] += 0.02 : nothing - if length(env.purchase_hist) >= 2 - if env.purchase_hist[end - 1] != 0 - hype_vector[env.purchase_hist[end - 1]] -= 0.005 - else - nothing - end - if length(env.purchase_hist) >= 3 - if env.purchase_hist[end - 2] != 0 - hype_vector[env.purchase_hist[end - 2]] -= 0.005 - else - nothing - end - if length(env.purchase_hist) >= 4 - if env.purchase_hist[end - 3] != 0 - hype_vector[env.purchase_hist[end - 3]] -= 0.005 - else - nothing - end - if length(env.purchase_hist) >= 5 - if env.purchase_hist[end - 4] != 0 - hype_vector[env.purchase_hist[end - 4]] -= 0.005 - else - nothing - end - end +function CommonRLInterface.terminated(env::Environment) + return env.step > max_steps(env) +end + +function CommonRLInterface.observe(env::Environment) + delta_features = env.features[2:3, :] .- env.instance.starting_hype_and_saturation + return vcat( + env.features, + env.d_features, + delta_features, + ones(1, item_count(env)) .* (env.step / max_steps(env) * 10), + ) #./ 10 +end + +# Compute the hype vector +function hype_update(env::Environment) + N = item_count(env) + hype_vector = ones(N) + hist = env.purchase_hist + + # Define decay factors for each time step + factors = [0.02, -0.005, -0.005, -0.005, -0.005] + + # Apply updates for the last 5 purchases + for (i, factor) in enumerate(factors) + if length(hist) >= i + item = hist[end - i + 1] + if item <= N + hype_vector[item] += factor end end end + return hype_vector end # Step function -function step!(env::Environment, item) - old_features = copy(env.features) +function buy_item!(env::Environment, item::Int) push!(env.purchase_hist, item) - if env.instance.endogenous - hype_vector = hype_update!(env) - env.features[3, :] .*= hype_vector - item != 0 ? env.features[4, item] *= 1.01 : nothing - # env.features[6, :] .+= 9 / env.instance.max_steps # ?? - end - env.d_features = env.features[3:4, :] - old_features[3:4, :] # ! hardcoded everywhere :( env.step += 1 + + if is_endogenous(env.instance.config) + old_features = copy(env.features[2:3, :]) + # update hype feature + hype_vector = hype_update(env) + env.features[2, :] .*= hype_vector + + # update saturation feature + if item <= item_count(env) + env.features[3, item] *= 1.01 + end + + env.utility .= customer_choice_model(env)(env.features) + env.d_features = env.features[2:3, :] - old_features + end return nothing end # Choice probabilities function choice_probabilities(env::Environment, S) + N = item_count(env) θ = env.utility - exp_values = [exp(θ[i]) * S[i] for i in 1:(env.instance.N)] - denominator = 1 + sum(exp_values) - probs = [exp_values[i] / denominator for i in 1:(env.instance.N)] - push!(probs, 1 / denominator) # Probability of no purchase + exp_values = [exp(θ[i]) * S[i] for i in 1:N] + push!(exp_values, 1.0) # No purchase action + denominator = sum(exp_values) + probs = exp_values ./ denominator return probs end # Purchase decision function CommonRLInterface.act!(env::Environment, S) - r = env.prices + r = prices(env) probs = choice_probabilities(env, S) item = rand(env.rng, Categorical(probs)) reward = r[item] - item == env.instance.N + 1 ? item = 0 : item # TODO: cleanup this, not really needed and confusing - step!(env, item) + buy_item!(env, item) return reward end +## Solution functions # enumerate all possible assortments of size K and return the best one -# ? can't we do better than that, probably +function compute_expected_revenue(env::Environment, S) + r = prices(env) + probs = choice_probabilities(env, S) + expected_revenue = dot(probs, r) + return expected_revenue +end + function expert_solution(env::Environment) - r = env.prices - local best_S - best_revenue = 0.0 - for S in combinations(1:(env.instance.N), env.instance.K) - S_vec = zeros(env.instance.N) - S_vec[S] .= 1.0 - probs = choice_probabilities(env, S_vec) - expected_revenue = dot(probs, r) + N = item_count(env) + K = assortment_size(env) + best_S = falses(N) + best_revenue = -1.0 + S_vec = falses(N) + for S in combinations(1:N, K) + S_vec .= false + S_vec[S] .= true + expected_revenue = compute_expected_revenue(env, S_vec) if expected_revenue > best_revenue - best_S, best_revenue = S_vec, expected_revenue + best_S, best_revenue = copy(S_vec), expected_revenue end end return best_S end -# DAP CO-layer -function DAP_optimization(θ; instance::Instance) - solution = partialsortperm(θ, 1:(instance.K); rev=true) # It never makes sense not to show k items - S = zeros(instance.N) - S[solution] .= 1 - return S +function greedy_solution(env::Environment) + maximizer = generate_maximizer(env.instance.config) + return maximizer(prices(env)) end -## Solution functions - -# Anticipative (fixed) -function expert_policy(env::Environment, episodes; first_seed=1, use_oracle=false) +function run_policy(env::Environment, episodes::Int; first_seed=1, policy=expert_solution) dataset = [] rev_global = Float64[] for i in 1:episodes rev_episode = 0.0 CommonRLInterface.reset!(env; seed=first_seed - 1 + i, reset_seed=true) - done = false training_instances = [] - while !done - S = expert_solution(env) - - delta_features = env.features[3:4, :] .- env.start_features # ! hardcoded - feature_vector = vcat(env.features, env.d_features, delta_features) - push!(training_instances, (features=feature_vector, S_t=S)) - + while !CommonRLInterface.terminated(env) + S = policy(env) + features = CommonRLInterface.observe(env) + push!(training_instances, DataSample(; x=features, y_true=S)) reward = CommonRLInterface.act!(env, S) rev_episode += reward - - env.step > env.instance.max_steps ? done = true : done = false end push!(rev_global, rev_episode) push!(dataset, training_instances) end return mean(rev_global), rev_global, dataset end - -# Greedy heuristic -function model_greedy(features) - model = Chain(Dense([0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0]), vec) - return model(features) -end - -# Random heuristic -function model_random(features) - rand_seed = Int(round(sum(features))) - return rand(MersenneTwister(rand_seed), Uniform(0.0, 1.0), size(features)[2]) -end - -# Episode generation -# function generate_episode(env::Environment, model, customer_model, sigma, random_seed) -# buffer = [] -# start_features, d_features = reset!(env; seed=random_seed) -# features = copy(start_features) -# done = false -# while !done -# delta_features = features[3:4, :] .- start_features[3:4, :] -# r = features[5, :] -# feature_vector = vcat(features, d_features, delta_features) -# θ = model(feature_vector) -# η = rand(MersenneTwister(random_seed * env.step), p(θ, sigma), 1)[:, 1] -# S = DAP_optimization(η; instance=env.instance) -# θ_0 = customer_model(features) -# item, revenue = purchase!(env, S) -# features, d_features = step!(env, item) -# feat_next = vcat(features, d_features, features[3:4, :] .- start_features[3:4, :]) -# push!( -# buffer, -# ( -# t=env.step - 1, -# feat_t=feature_vector, -# theta=θ, -# eta=η, -# S_t=S, -# a_T=item, -# rev_t=revenue, -# ret_t=0.0, -# feat_next=feat_next, -# ), -# ) -# count(!iszero, inventory) < env.instance.K ? break : nothing -# env.step > env.instance.max_steps ? done = true : done = false -# end -# for i in (length(buffer) - 1):-1:1 -# if i == length(buffer) - 1 -# ret = buffer[i].rev_t -# else -# ret = buffer[i].rev_t + 0.99 * buffer[i + 1].ret_t -# end -# traj = buffer[i] -# traj_updated = (; traj..., ret_t=ret) -# buffer[i] = traj_updated -# end -# return buffer -# end diff --git a/src/DynamicAssortment/instance.jl b/src/DynamicAssortment/instance.jl new file mode 100644 index 0000000..3250cdd --- /dev/null +++ b/src/DynamicAssortment/instance.jl @@ -0,0 +1,33 @@ +""" +$TYPEDEF + +Instance of the dynamic assortment problem. + +# Fields +$TYPEDFIELDS +""" +@kwdef struct Instance{B<:DynamicAssortmentBenchmark} + "associated benchmark" + config::B + "item prices (including no purchase action)" + prices::Vector{Float64} + "static features, size (d, N)" + features::Matrix{Float64} + "starting hype and saturation features, size (2, N)" + starting_hype_and_saturation::Matrix{Float64} +end + +function Instance(b::DynamicAssortmentBenchmark, rng::AbstractRNG) + N = item_count(b) + d = feature_count(b) + prices = vcat(rand(rng, Uniform(1.0, 10.0), N), 0.0) # last price is for no purchase action + features = rand(rng, Uniform(1.0, 10.0), (d, N)) + starting_hype_and_saturation = rand(rng, Uniform(1.0, 10.0), (2, N)) + return Instance(; config=b, prices, features, starting_hype_and_saturation) +end + +customer_choice_model(b::Instance) = customer_choice_model(b.config) +item_count(b::Instance) = item_count(b.config) +feature_count(b::Instance) = feature_count(b.config) +assortment_size(b::Instance) = assortment_size(b.config) +max_steps(b::Instance) = max_steps(b.config) diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index d57644b..ada7212 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -55,7 +55,7 @@ Abstract type for dynamic vehicle scheduling benchmarks. # Fields $TYPEDFIELDS """ -@kwdef struct DVSPBenchmark <: AbstractDynamicBenchmark +@kwdef struct DVSPBenchmark <: AbstractDynamicBenchmark{true} "todo" max_requests_per_epoch::Int = 10 "todo" diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl index 7a1e804..6c37b26 100644 --- a/src/Utils/Utils.jl +++ b/src/Utils/Utils.jl @@ -16,6 +16,7 @@ include("interface.jl") include("grid_graph.jl") include("misc.jl") include("model_builders.jl") +include("maximizers.jl") export DataSample @@ -34,5 +35,8 @@ export grid_graph, get_path, path_to_matrix export neg_tensor, squeeze_last_dims, average_tensor export scip_model, highs_model export objective_value +export is_exogenous, is_endogenous + +export TopKMaximizer end diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl index ce9b6ed..1a5b826 100644 --- a/src/Utils/interface.jl +++ b/src/Utils/interface.jl @@ -39,7 +39,7 @@ function generate_dataset( bench::AbstractBenchmark, dataset_size::Int; seed=nothing, - rng=MersenneTwister(0), + rng=MersenneTwister(seed), kwargs..., ) Random.seed!(rng, seed) @@ -187,18 +187,21 @@ This type should be used for benchmarks that involve single stage stochastic opt It follows the same interface as [`AbstractBenchmark`](@ref), with the addition of the following methods: - [`generate_anticipative_solver`](@ref) """ -abstract type AbstractStochasticBenchmark <: AbstractBenchmark end +abstract type AbstractStochasticBenchmark{exogenous} <: AbstractBenchmark end -function generate_scenario end +is_exogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = exogenous +is_endogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = !exogenous # only works for exogenous noise +function generate_scenario end + """ - generate_scenario_generator(::AbstractStochasticBenchmark; kwargs...) + generate_scenario_generator(::AbstractStochasticBenchmark{true}; kwargs...) """ function generate_scenario_generator end """ - generate_anticipative_solver(::AbstractStochasticBenchmark; kwargs...) + generate_anticipative_solver(::AbstractStochasticBenchmark{true}; kwargs...) """ function generate_anticipative_solver end @@ -211,7 +214,7 @@ This type should be used for benchmarks that involve multi-stage stochastic opti It follows the same interface as [`AbstractStochasticBenchmark`](@ref), with the addition of the following methods: TODO """ -abstract type AbstractDynamicBenchmark <: AbstractStochasticBenchmark end +abstract type AbstractDynamicBenchmark{exogenous} <: AbstractStochasticBenchmark{exogenous} end """ generate_environment(::AbstractDynamicBenchmark, instance; kwargs...) @@ -226,7 +229,7 @@ $TYPEDSIGNATURES Generate a vector of environments for the given dynamic benchmark and dataset. """ function generate_environments( - bench::AbstractDynamicBenchmark, dataset::Vector{<:DataSample}, kwargs... + bench::AbstractDynamicBenchmark, dataset::Vector{<:DataSample}; kwargs... ) return map(dataset) do sample generate_environment(bench, sample.instance; kwargs...) diff --git a/src/Utils/maximizers.jl b/src/Utils/maximizers.jl new file mode 100644 index 0000000..ee5ceea --- /dev/null +++ b/src/Utils/maximizers.jl @@ -0,0 +1,22 @@ +""" +$TYPEDEF + +Top k maximizer. +""" +struct TopKMaximizer + k::Int +end + +""" +$TYPEDSIGNATURES + +Return the top k indices of `θ`. +""" +function (m::TopKMaximizer)(θ; kwargs...) + N = length(θ) + @assert N >= m.k "The length of θ must be at least k" + solution = partialsortperm(θ, 1:(m.k); rev=true) + res = falses(N) + res[solution] .= 1 + return res +end From d51fe95e03b06216beca25cbfc0e5f68ed104689 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Wed, 6 Aug 2025 17:18:12 +0200 Subject: [PATCH 16/29] Fix tests --- src/Argmax2D/Argmax2D.jl | 22 ++++------ src/DynamicAssortment/DynamicAssortment.jl | 4 +- src/DynamicAssortment/environment.jl | 42 ------------------- src/DynamicAssortment/policies.jl | 41 ++++++++++++++++++ .../algorithms/anticipative_solver.jl | 12 +++--- 5 files changed, 59 insertions(+), 62 deletions(-) create mode 100644 src/DynamicAssortment/policies.jl diff --git a/src/Argmax2D/Argmax2D.jl b/src/Argmax2D/Argmax2D.jl index a1c76c4..169c403 100644 --- a/src/Argmax2D/Argmax2D.jl +++ b/src/Argmax2D/Argmax2D.jl @@ -7,7 +7,7 @@ using Flux: Chain, Dense using LaTeXStrings: @L_str using LinearAlgebra: dot, norm using Plots: Plots -using Random: Random, MersenneTwister +using Random: Random, MersenneTwister, AbstractRNG include("polytope.jl") @@ -53,20 +53,16 @@ maximizer(θ; instance, kwargs...) = instance[argmax(dot(θ, v) for v in instanc """ $TYPEDSIGNATURES -Generate a dataset for the [`Argmax2DBenchmark`](@ref). +Generate a sample for the [`Argmax2DBenchmark`](@ref). """ -function Utils.generate_dataset( - bench::Argmax2DBenchmark, dataset_size=10; seed=nothing, rng=MersenneTwister(seed) -) +function Utils.generate_sample(bench::Argmax2DBenchmark, rng::AbstractRNG) (; nb_features, encoder, polytope_vertex_range) = bench - return map(1:dataset_size) do _ - x = randn(rng, Float32, nb_features) - θ_true = encoder(x) - θ_true ./= 2 * norm(θ_true) - instance = build_polytope(rand(rng, polytope_vertex_range); shift=rand(rng)) - y_true = maximizer(θ_true; instance) - return DataSample(; x=x, θ_true=θ_true, y_true=y_true, instance=instance) - end + x = randn(rng, Float32, nb_features) + θ_true = encoder(x) + θ_true ./= 2 * norm(θ_true) + instance = build_polytope(rand(rng, polytope_vertex_range); shift=rand(rng)) + y_true = maximizer(θ_true; instance) + return DataSample(; x=x, θ_true=θ_true, y_true=y_true, instance=instance) end """ diff --git a/src/DynamicAssortment/DynamicAssortment.jl b/src/DynamicAssortment/DynamicAssortment.jl index f5abbbe..4ab32e4 100644 --- a/src/DynamicAssortment/DynamicAssortment.jl +++ b/src/DynamicAssortment/DynamicAssortment.jl @@ -48,6 +48,7 @@ end include("instance.jl") include("environment.jl") +include("policies.jl") customer_choice_model(b::DynamicAssortmentBenchmark) = b.customer_choice_model item_count(b::DynamicAssortmentBenchmark) = b.N @@ -56,9 +57,8 @@ assortment_size(b::DynamicAssortmentBenchmark) = b.K max_steps(b::DynamicAssortmentBenchmark) = b.max_steps function Utils.generate_sample( - b::DynamicAssortmentBenchmark, rng::AbstractRNG=MersenneTwister(0); seed=nothing + b::DynamicAssortmentBenchmark, rng::AbstractRNG=MersenneTwister(0) ) - Random.seed!(rng, seed) return DataSample(; instance=Instance(b, rng)) end diff --git a/src/DynamicAssortment/environment.jl b/src/DynamicAssortment/environment.jl index f8617c0..df9b01b 100644 --- a/src/DynamicAssortment/environment.jl +++ b/src/DynamicAssortment/environment.jl @@ -166,45 +166,3 @@ function compute_expected_revenue(env::Environment, S) expected_revenue = dot(probs, r) return expected_revenue end - -function expert_solution(env::Environment) - N = item_count(env) - K = assortment_size(env) - best_S = falses(N) - best_revenue = -1.0 - S_vec = falses(N) - for S in combinations(1:N, K) - S_vec .= false - S_vec[S] .= true - expected_revenue = compute_expected_revenue(env, S_vec) - if expected_revenue > best_revenue - best_S, best_revenue = copy(S_vec), expected_revenue - end - end - return best_S -end - -function greedy_solution(env::Environment) - maximizer = generate_maximizer(env.instance.config) - return maximizer(prices(env)) -end - -function run_policy(env::Environment, episodes::Int; first_seed=1, policy=expert_solution) - dataset = [] - rev_global = Float64[] - for i in 1:episodes - rev_episode = 0.0 - CommonRLInterface.reset!(env; seed=first_seed - 1 + i, reset_seed=true) - training_instances = [] - while !CommonRLInterface.terminated(env) - S = policy(env) - features = CommonRLInterface.observe(env) - push!(training_instances, DataSample(; x=features, y_true=S)) - reward = CommonRLInterface.act!(env, S) - rev_episode += reward - end - push!(rev_global, rev_episode) - push!(dataset, training_instances) - end - return mean(rev_global), rev_global, dataset -end diff --git a/src/DynamicAssortment/policies.jl b/src/DynamicAssortment/policies.jl new file mode 100644 index 0000000..56aba6d --- /dev/null +++ b/src/DynamicAssortment/policies.jl @@ -0,0 +1,41 @@ +function expert_policy(env::Environment) + N = item_count(env) + K = assortment_size(env) + best_S = falses(N) + best_revenue = -1.0 + S_vec = falses(N) + for S in combinations(1:N, K) + S_vec .= false + S_vec[S] .= true + expected_revenue = compute_expected_revenue(env, S_vec) + if expected_revenue > best_revenue + best_S, best_revenue = copy(S_vec), expected_revenue + end + end + return best_S +end + +function greedy_policy(env::Environment) + maximizer = generate_maximizer(env.instance.config) + return maximizer(prices(env)) +end + +function run_policy(env::Environment, episodes::Int; first_seed=1, policy=expert_policy) + dataset = [] + rev_global = Float64[] + for i in 1:episodes + rev_episode = 0.0 + CommonRLInterface.reset!(env; seed=first_seed - 1 + i, reset_seed=true) + training_instances = [] + while !CommonRLInterface.terminated(env) + S = policy(env) + features = CommonRLInterface.observe(env) + push!(training_instances, DataSample(; x=features, y_true=S)) + reward = CommonRLInterface.act!(env, S) + rev_episode += reward + end + push!(rev_global, rev_episode) + push!(dataset, training_instances) + end + return mean(rev_global), rev_global, dataset +end diff --git a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl index dd52d19..aeaa6ad 100644 --- a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl +++ b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl @@ -201,9 +201,11 @@ end end function (solver::AnticipativeSolver)(env::DVSPEnv, scenario=env.scenario; reset_env=false) - if solver.is_2D - return anticipative_solver(env, scenario; model_builder=highs_model_2d, reset_env) - else - return anticipative_solver(env, scenario; model_builder=highs_model, reset_env) - end + return anticipative_solver( + env, + scenario; + model_builder=highs_model, + reset_env, + two_dimensional_features=solver.is_2D, + ) end From a90a3d5f080c99cde9db76f7670d7bfceee1ec84 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Wed, 6 Aug 2025 18:08:10 +0200 Subject: [PATCH 17/29] simplify interface --- src/DecisionFocusedLearningBenchmarks.jl | 4 ++- .../DynamicVehicleScheduling.jl | 24 ++++++++++------- .../algorithms/anticipative_solver.jl | 26 +++++++++---------- .../environment/environment.jl | 4 +-- .../environment/scenario.jl | 6 ++--- .../policy/anticipative_policy.jl | 2 +- src/Utils/Utils.jl | 2 ++ src/Utils/interface.jl | 21 ++++++++++----- 8 files changed, 53 insertions(+), 36 deletions(-) diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl index 67bd006..68e0b1d 100644 --- a/src/DecisionFocusedLearningBenchmarks.jl +++ b/src/DecisionFocusedLearningBenchmarks.jl @@ -74,9 +74,11 @@ using .DynamicAssortment export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample export generate_sample, generate_dataset, generate_environments, generate_environment +export generate_scenario export generate_scenario_generator, generate_anticipative_solver export generate_statistical_model export generate_maximizer, maximizer_kwargs +export generate_anticipative_solution export is_exogenous, is_endogenous export objective_value @@ -92,7 +94,7 @@ export WarcraftBenchmark export FixedSizeShortestPathBenchmark export PortfolioOptimizationBenchmark export StochasticVehicleSchedulingBenchmark -export DVSPBenchmark +export DynamicVehicleSchedulingBenchmark export DynamicAssortmentBenchmark end # module DecisionFocusedLearningBenchmarks diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index ada7212..d044c5e 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -55,7 +55,7 @@ Abstract type for dynamic vehicle scheduling benchmarks. # Fields $TYPEDFIELDS """ -@kwdef struct DVSPBenchmark <: AbstractDynamicBenchmark{true} +@kwdef struct DynamicVehicleSchedulingBenchmark <: AbstractDynamicBenchmark{true} "todo" max_requests_per_epoch::Int = 10 "todo" @@ -66,7 +66,7 @@ $TYPEDFIELDS two_dimensional_features::Bool = false end -function Utils.generate_dataset(b::DVSPBenchmark, dataset_size::Int=1) +function Utils.generate_dataset(b::DynamicVehicleSchedulingBenchmark, dataset_size::Int=1) (; max_requests_per_epoch, Δ_dispatch, epoch_duration) = b files = readdir(datadep"dvrptw"; join=true) dataset_size = min(dataset_size, length(files)) @@ -82,23 +82,29 @@ function Utils.generate_dataset(b::DVSPBenchmark, dataset_size::Int=1) ] end -function Utils.generate_environment(::DVSPBenchmark, instance::Instance; kwargs...) +function Utils.generate_environment( + ::DynamicVehicleSchedulingBenchmark, instance::Instance; kwargs... +) return DVSPEnv(instance; kwargs...) end -function Utils.generate_maximizer(::DVSPBenchmark) +function Utils.generate_maximizer(::DynamicVehicleSchedulingBenchmark) return LinearMaximizer(oracle; g, h) end -function Utils.generate_scenario_generator(::DVSPBenchmark) - return generate_scenario +function Utils.generate_scenario(b::DynamicVehicleSchedulingBenchmark, args...; kwargs...) + return Utils.generate_scenario(args...; kwargs...) end -function Utils.generate_anticipative_solver(b::DVSPBenchmark; kwargs...) - return AnticipativeSolver(b.two_dimensional_features) +function Utils.generate_anticipative_solution( + b::DynamicVehicleSchedulingBenchmark, args...; kwargs... +) + return anticipative_solver( + args...; kwargs..., two_dimensional_features=b.two_dimensional_features + ) end -export DVSPBenchmark #, generate_environment # , generate_sample, generate_anticipative_solver +export DynamicVehicleSchedulingBenchmark export run_policy!, GreedyVSPPolicy, LazyVSPPolicy, KleopatraVSPPolicy, AnticipativeVSPPolicy diff --git a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl index aeaa6ad..f274e02 100644 --- a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl +++ b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl @@ -196,16 +196,16 @@ function anticipative_solver( return obj, dataset end -@kwdef struct AnticipativeSolver - is_2D::Bool = false -end - -function (solver::AnticipativeSolver)(env::DVSPEnv, scenario=env.scenario; reset_env=false) - return anticipative_solver( - env, - scenario; - model_builder=highs_model, - reset_env, - two_dimensional_features=solver.is_2D, - ) -end +# @kwdef struct AnticipativeSolver +# is_2D::Bool = false +# end + +# function (solver::AnticipativeSolver)(env::DVSPEnv, scenario=env.scenario; reset_env=false) +# return generate_anticipative_decision( +# env, +# scenario; +# model_builder=highs_model, +# reset_env, +# two_dimensional_features=solver.is_2D, +# ) +# end diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl index 0bbc4ae..29d9d35 100644 --- a/src/DynamicVehicleScheduling/environment/environment.jl +++ b/src/DynamicVehicleScheduling/environment/environment.jl @@ -13,7 +13,7 @@ $TYPEDSIGNATURES Constructor for [`DVSPEnv`](@ref). """ function DVSPEnv(instance::Instance; seed=nothing, rng=MersenneTwister(seed)) - scenario = generate_scenario(instance; rng, seed) + scenario = Utils.generate_scenario(instance; rng, seed) initial_state = DVSPState(instance; scenario[1]...) return DVSPEnv(instance, initial_state, scenario) end @@ -76,6 +76,6 @@ function CommonRLInterface.act!(env::DVSPEnv, routes, scenario=env.scenario) return reward end -function generate_scenario(env::DVSPEnv; kwargs...) +function Utils.generate_scenario(env::DVSPEnv; kwargs...) return generate_scenario(env.instance; kwargs...) end diff --git a/src/DynamicVehicleScheduling/environment/scenario.jl b/src/DynamicVehicleScheduling/environment/scenario.jl index cee4fe7..9059477 100644 --- a/src/DynamicVehicleScheduling/environment/scenario.jl +++ b/src/DynamicVehicleScheduling/environment/scenario.jl @@ -16,7 +16,7 @@ function Base.getindex(scenario::Scenario, idx::Integer) ) end -function generate_scenario( +function Utils.generate_scenario( instance::Instance; seed=nothing, rng::AbstractRNG=MersenneTwister(seed) ) (; Δ_dispatch, static_instance, last_epoch, epoch_duration, max_requests_per_epoch) = @@ -46,6 +46,6 @@ function generate_scenario( return Scenario(new_indices, new_service_time, new_start_time) end -function generate_scenario(sample::DataSample; kwargs...) - return generate_scenario(sample.instance; kwargs...) +function Utils.generate_scenario(sample::DataSample; kwargs...) + return Utils.generate_scenario(sample.instance; kwargs...) end diff --git a/src/DynamicVehicleScheduling/policy/anticipative_policy.jl b/src/DynamicVehicleScheduling/policy/anticipative_policy.jl index d7f2381..f63b620 100644 --- a/src/DynamicVehicleScheduling/policy/anticipative_policy.jl +++ b/src/DynamicVehicleScheduling/policy/anticipative_policy.jl @@ -13,5 +13,5 @@ Apply the anticipative policy to the environment. function run_policy!( ::AnticipativeVSPPolicy, env::DVSPEnv, scenario=env.scenario; model_builder=highs_model ) - return anticipative_solver(env, scenario; model_builder, reset_env=true) + return generate_anticipative_solution(env, scenario; model_builder, reset_env=true) end diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl index 6c37b26..c9a7153 100644 --- a/src/Utils/Utils.jl +++ b/src/Utils/Utils.jl @@ -25,10 +25,12 @@ export generate_dataset, generate_statistical_model, generate_maximizer, generate_sample, + generate_scenario, generate_scenario_generator, generate_anticipative_solver, generate_environment, generate_environments +export generate_anticipative_solution export plot_data, compute_gap export maximizer_kwargs export grid_graph, get_path, path_to_matrix diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl index 1a5b826..2994e4e 100644 --- a/src/Utils/interface.jl +++ b/src/Utils/interface.jl @@ -192,18 +192,25 @@ abstract type AbstractStochasticBenchmark{exogenous} <: AbstractBenchmark end is_exogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = exogenous is_endogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = !exogenous -# only works for exogenous noise -function generate_scenario end - """ - generate_scenario_generator(::AbstractStochasticBenchmark{true}; kwargs...) + generate_anticipative_solver(::AbstractStochasticBenchmark{true}, instance; kwargs...) """ -function generate_scenario_generator end +function generate_scenario end """ - generate_anticipative_solver(::AbstractStochasticBenchmark{true}; kwargs...) + anticipative_policy(::AbstractStochasticBenchmark{true}, instance, scenario; kwargs...) """ -function generate_anticipative_solver end +function generate_anticipative_solution end + +# """ +# generate_scenario_generator(::AbstractStochasticBenchmark{true}; kwargs...) +# """ +# function generate_scenario_generator end + +# """ +# generate_anticipative_solver(::AbstractStochasticBenchmark{true}; kwargs...) +# """ +# function generate_anticipative_solver end """ $TYPEDEF From e8be49680c9ca55cdf0bf155a1c641da948ea5a8 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 7 Aug 2025 16:04:45 +0200 Subject: [PATCH 18/29] Dynamic assortment is in a good state; fix docs; working on DVSP --- Project.toml | 6 +- docs/src/api/dynamic_assorment.md | 15 ++++ docs/src/benchmarks/dynamic_assorment.md | 3 + src/DecisionFocusedLearningBenchmarks.jl | 38 ++++---- src/DynamicAssortment/DynamicAssortment.jl | 29 +++++-- src/DynamicAssortment/environment.jl | 21 ++--- .../DynamicVehicleScheduling.jl | 26 +++--- .../algorithms/prize_collecting_vsp.jl | 87 ------------------- .../environment/environment.jl | 12 +-- src/Utils/Utils.jl | 27 +++--- src/Utils/environment.jl | 52 +++++++++++ src/Utils/interface.jl | 26 +++--- src/Utils/policy.jl | 83 ++++++++++++++++++ 13 files changed, 255 insertions(+), 170 deletions(-) create mode 100644 docs/src/api/dynamic_assorment.md create mode 100644 docs/src/benchmarks/dynamic_assorment.md create mode 100644 src/Utils/environment.jl create mode 100644 src/Utils/policy.jl diff --git a/Project.toml b/Project.toml index 87fe839..09e60e4 100644 --- a/Project.toml +++ b/Project.toml @@ -4,9 +4,8 @@ authors = ["Members of JuliaDecisionFocusedLearning"] version = "0.2.4" [deps] -Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" -CommonRLInterface = "d842c3ba-07a1-494f-bbec-f5741b0a3e98" Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" +Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" ConstrainedShortestPaths = "b3798467-87dc-4d99-943d-35a1bd39e395" DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" @@ -35,9 +34,8 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] -Combinatorics = "1.0.3" -CommonRLInterface = "0.3.3" Colors = "0.13.1" +Combinatorics = "1.0.3" ConstrainedShortestPaths = "0.6.0" DataDeps = "0.7" Distributions = "0.25" diff --git a/docs/src/api/dynamic_assorment.md b/docs/src/api/dynamic_assorment.md new file mode 100644 index 0000000..d738692 --- /dev/null +++ b/docs/src/api/dynamic_assorment.md @@ -0,0 +1,15 @@ +# Dynamic Assortment + +## Public + +```@autodocs +Modules = [DecisionFocusedLearningBenchmarks.DynamicAssortment] +Private = false +``` + +## Private + +```@autodocs +Modules = [DecisionFocusedLearningBenchmarks.DynamicAssortment] +Public = false +``` diff --git a/docs/src/benchmarks/dynamic_assorment.md b/docs/src/benchmarks/dynamic_assorment.md new file mode 100644 index 0000000..dcf3243 --- /dev/null +++ b/docs/src/benchmarks/dynamic_assorment.md @@ -0,0 +1,3 @@ +# Dynamic Assortment + +[`DynamicAssortmentBenchmark`](@ref). diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl index 68e0b1d..b49362a 100644 --- a/src/DecisionFocusedLearningBenchmarks.jl +++ b/src/DecisionFocusedLearningBenchmarks.jl @@ -59,23 +59,16 @@ include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl") include("DynamicAssortment/DynamicAssortment.jl") using .Utils -using .Argmax -using .Argmax2D -using .Ranking -using .SubsetSelection -using .Warcraft -using .FixedSizeShortestPath -using .PortfolioOptimization -using .StochasticVehicleScheduling -using .DynamicVehicleScheduling -using .DynamicAssortment # Interface export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample +export AbstractEnv, get_seed, is_terminated, observe, reset!, step! + +export Policy, run_policy! export generate_sample, generate_dataset, generate_environments, generate_environment export generate_scenario -export generate_scenario_generator, generate_anticipative_solver +export generate_policies export generate_statistical_model export generate_maximizer, maximizer_kwargs export generate_anticipative_solution @@ -86,15 +79,26 @@ export plot_data, plot_instance, plot_solution export compute_gap # Export all benchmarks -export ArgmaxBenchmark +using .Argmax +using .Argmax2D +using .Ranking +using .SubsetSelection +using .Warcraft +using .FixedSizeShortestPath +using .PortfolioOptimization +using .StochasticVehicleScheduling +using .DynamicVehicleScheduling +using .DynamicAssortment + export Argmax2DBenchmark -export RankingBenchmark -export SubsetSelectionBenchmark -export WarcraftBenchmark +export ArgmaxBenchmark +export DynamicAssortmentBenchmark +export DynamicVehicleSchedulingBenchmark export FixedSizeShortestPathBenchmark export PortfolioOptimizationBenchmark +export RankingBenchmark export StochasticVehicleSchedulingBenchmark -export DynamicVehicleSchedulingBenchmark -export DynamicAssortmentBenchmark +export SubsetSelectionBenchmark +export WarcraftBenchmark end # module DecisionFocusedLearningBenchmarks diff --git a/src/DynamicAssortment/DynamicAssortment.jl b/src/DynamicAssortment/DynamicAssortment.jl index 4ab32e4..f04d3e2 100644 --- a/src/DynamicAssortment/DynamicAssortment.jl +++ b/src/DynamicAssortment/DynamicAssortment.jl @@ -2,7 +2,6 @@ module DynamicAssortment using ..Utils -using CommonRLInterface: CommonRLInterface, AbstractEnv using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES using Distributions: Uniform, Categorical using LinearAlgebra: dot @@ -62,17 +61,35 @@ function Utils.generate_sample( return DataSample(; instance=Instance(b, rng)) end +function Utils.generate_statistical_model(b::DynamicAssortmentBenchmark; seed=nothing) + Random.seed!(seed) + d = feature_count(b) + return Chain(Dense(d + 8 => 5), Dense(5 => 1), vec) +end + function Utils.generate_maximizer(b::DynamicAssortmentBenchmark) return TopKMaximizer(assortment_size(b)) end function Utils.generate_environment( - ::DynamicAssortmentBenchmark, - instance::Instance; - seed=nothing, - rng::AbstractRNG=MersenneTwister(seed), + ::DynamicAssortmentBenchmark, instance::Instance, rng::AbstractRNG ) - return Environment(instance; seed=seed, rng=rng) + seed = rand(rng, 1:typemax(Int)) + return Environment(instance; seed) +end + +function Utils.generate_policies(b::DynamicAssortmentBenchmark) + greedy = Policy( + "Greedy", + "policy that selects the assortment with items with the highest prices", + greedy_policy, + ) + expert = Policy( + "Expert", + "policy that selects the assortment with the highest expected revenue", + expert_policy, + ) + return (expert, greedy) end export DynamicAssortmentBenchmark diff --git a/src/DynamicAssortment/environment.jl b/src/DynamicAssortment/environment.jl index df9b01b..5f62acc 100644 --- a/src/DynamicAssortment/environment.jl +++ b/src/DynamicAssortment/environment.jl @@ -7,7 +7,7 @@ Environment for the dynamic assortment problem. $TYPEDFIELDS """ @kwdef mutable struct Environment{I<:Instance,R<:AbstractRNG,S<:Union{Nothing,Int}} <: - AbstractEnv + Utils.AbstractEnvironment "associated instance" instance::I "current step" @@ -43,23 +43,22 @@ function Environment(instance::Instance; seed=0, rng::AbstractRNG=MersenneTwiste features=full_features, d_features=zeros(2, N), ) - CommonRLInterface.reset!(env; reset_seed=true) + Utils.reset!(env; reset_seed=true) return env end +Utils.get_seed(env::Environment) = env.seed customer_choice_model(b::Environment) = customer_choice_model(b.instance) item_count(b::Environment) = item_count(b.instance) feature_count(b::Environment) = feature_count(b.instance) assortment_size(b::Environment) = assortment_size(b.instance) max_steps(b::Environment) = max_steps(b.instance) prices(b::Environment) = b.instance.prices -# features(b::Environment) = b.instance.features -# starting_hype_and_saturation(b::Environment) = b.instance.starting_hype_and_saturation ## Basic operations of environment # Reset the environment -function CommonRLInterface.reset!(env::Environment; reset_seed=false, seed=env.seed) +function Utils.reset!(env::Environment; reset_seed=false, seed=env.seed) reset_seed && Random.seed!(env.rng, seed) env.step = 1 @@ -79,18 +78,19 @@ function CommonRLInterface.reset!(env::Environment; reset_seed=false, seed=env.s return nothing end -function CommonRLInterface.terminated(env::Environment) +function Utils.is_terminated(env::Environment) return env.step > max_steps(env) end -function CommonRLInterface.observe(env::Environment) +function Utils.observe(env::Environment) delta_features = env.features[2:3, :] .- env.instance.starting_hype_and_saturation return vcat( env.features, env.d_features, delta_features, ones(1, item_count(env)) .* (env.step / max_steps(env) * 10), - ) #./ 10 + ) ./ 10, + nothing end # Compute the hype vector @@ -149,9 +149,10 @@ function choice_probabilities(env::Environment, S) end # Purchase decision -function CommonRLInterface.act!(env::Environment, S) +function Utils.step!(env::Environment, assortment) + @assert !Utils.is_terminated(env) "Environment is terminated, cannot act!" r = prices(env) - probs = choice_probabilities(env, S) + probs = choice_probabilities(env, assortment) item = rand(env.rng, Categorical(probs)) reward = r[item] buy_item!(env, item) diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index d044c5e..1cc5262 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -3,7 +3,6 @@ module DynamicVehicleScheduling using ..Utils using Base: @kwdef -using CommonRLInterface: CommonRLInterface, AbstractEnv, reset!, terminated, observe, act! using DataDeps: @datadep_str using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES using Graphs @@ -39,11 +38,11 @@ include("algorithms/anticipative_solver.jl") include("learning/features.jl") include("learning/2d_features.jl") -include("policy/abstract_vsp_policy.jl") -include("policy/greedy_policy.jl") -include("policy/lazy_policy.jl") -include("policy/anticipative_policy.jl") -include("policy/kleopatra_policy.jl") +# include("policy/abstract_vsp_policy.jl") +# include("policy/greedy_policy.jl") +# include("policy/lazy_policy.jl") +# include("policy/anticipative_policy.jl") +# include("policy/kleopatra_policy.jl") include("maximizer.jl") @@ -56,13 +55,13 @@ Abstract type for dynamic vehicle scheduling benchmarks. $TYPEDFIELDS """ @kwdef struct DynamicVehicleSchedulingBenchmark <: AbstractDynamicBenchmark{true} - "todo" + "maximum number of customers entering the system per epoch" max_requests_per_epoch::Int = 10 - "todo" + "time between decision and dispatch of a vehicle" Δ_dispatch::Float64 = 1.0 - "todo" + "duration of an epoch" epoch_duration::Float64 = 1.0 - "todo" + "whether to use two-dimensional features" two_dimensional_features::Bool = false end @@ -83,9 +82,10 @@ function Utils.generate_dataset(b::DynamicVehicleSchedulingBenchmark, dataset_si end function Utils.generate_environment( - ::DynamicVehicleSchedulingBenchmark, instance::Instance; kwargs... + ::DynamicVehicleSchedulingBenchmark, instance::Instance, rng::AbstractRNG ) - return DVSPEnv(instance; kwargs...) + seed = rand(rng, 1:typemax(Int)) + return DVSPEnv(instance; seed) end function Utils.generate_maximizer(::DynamicVehicleSchedulingBenchmark) @@ -105,7 +105,5 @@ function Utils.generate_anticipative_solution( end export DynamicVehicleSchedulingBenchmark -export run_policy!, - GreedyVSPPolicy, LazyVSPPolicy, KleopatraVSPPolicy, AnticipativeVSPPolicy end diff --git a/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl b/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl index 14c51f9..d98f4d0 100644 --- a/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl +++ b/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl @@ -126,90 +126,3 @@ function prize_collecting_vsp( return retrieve_routes(value.(y), graph) end - -# # ? -# function prize_collecting_vsp_Q( -# θ::AbstractVector, -# vals::AbstractVector; -# instance::DVSPState, -# model_builder=highs_model, -# kwargs..., -# ) -# (; duration) = instance.instance -# graph = create_graph(instance) -# model = model_builder() -# set_silent(model) -# nb_nodes = nv(graph) -# job_indices = 2:(nb_nodes) -# @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0) -# θ_ext = fill(0.0, location_count(instance.instance)) # no prize for must dispatch requests, only hard constraints -# θ_ext[instance.is_postponable] .= θ -# # v_ext = fill(0.0, nb_locations(instance.instance)) # no prize for must dispatch requests, only hard constraints -# # v_ext[instance.is_postponable] .= vals -# @objective( -# model, -# Max, -# sum( -# (θ_ext[dst(edge)] + vals[dst(edge)] - duration[src(edge), dst(edge)]) * -# y[src(edge), dst(edge)] for edge in edges(graph) -# ) -# ) -# @constraint( -# model, -# flow[i in 2:nb_nodes], -# sum(y[j, i] for j in inneighbors(graph, i)) == -# sum(y[i, j] for j in outneighbors(graph, i)) -# ) -# @constraint( -# model, demand[i in job_indices], sum(y[j, i] for j in inneighbors(graph, i)) <= 1 -# ) -# # must dispatch constraints -# @constraint( -# model, -# demand_must_dispatch[i in job_indices; instance.is_must_dispatch[i]], -# sum(y[j, i] for j in inneighbors(graph, i)) == 1 -# ) -# optimize!(model) -# return retrieve_routes(value.(y), graph) -# end - -# function my_objective_value(θ, routes; instance) -# (; duration) = instance.instance -# total = 0.0 -# θ_ext = fill(0.0, location_count(instance)) -# θ_ext[instance.is_postponable] .= θ -# for route in routes -# for (u, v) in partition(vcat(1, route), 2, 1) -# total += θ_ext[v] - duration[u, v] -# end -# end -# return -total -# end - -# function _objective_value(θ, routes; instance) -# (; duration) = instance.instance -# total = 0.0 -# θ_ext = fill(0.0, location_count(instance)) -# θ_ext[instance.is_postponable] .= θ -# mapping = cumsum(instance.is_postponable) -# g = falses(length(θ)) -# for route in routes -# for (u, v) in partition(vcat(1, route), 2, 1) -# total -= duration[u, v] -# if instance.is_postponable[v] -# total += θ_ext[v] -# g[mapping[v]] = 1 -# end -# end -# end -# return -total, g -# end - -# function ChainRulesCore.rrule(::typeof(my_objective_value), θ, routes; instance) -# total, g = _objective_value(θ, routes; instance) -# function pullback(dy) -# g = g .* dy -# return NoTangent(), g, NoTangent() -# end -# return total, pullback -# end diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl index 29d9d35..ce146e6 100644 --- a/src/DynamicVehicleScheduling/environment/environment.jl +++ b/src/DynamicVehicleScheduling/environment/environment.jl @@ -1,4 +1,4 @@ -struct DVSPEnv{S<:DVSPState} <: AbstractEnv +struct DVSPEnv{S<:DVSPState} <: Utils.AbstractEnvironment "associated instance" instance::Instance "current state" @@ -28,7 +28,7 @@ $TYPEDSIGNATURES Get the current state of the environment. """ -CommonRLInterface.observe(env::DVSPEnv) = env.state +Utils.observe(env::DVSPEnv) = nothing, env.state current_epoch(env::DVSPEnv) = current_epoch(env.state) @@ -51,7 +51,7 @@ $TYPEDSIGNATURES Check if the episode is terminated, i.e. if the current epoch is the last one. """ -CommonRLInterface.terminated(env::DVSPEnv) = current_epoch(env) > last_epoch(env) +Utils.is_terminated(env::DVSPEnv) = current_epoch(env) > last_epoch(env) """ $TYPEDSIGNATURES @@ -59,7 +59,7 @@ $TYPEDSIGNATURES Reset the environment to its initial state. Also reset the seed if `reset_seed` is set to true. """ -function CommonRLInterface.reset!(env::DVSPEnv, scenario=env.scenario) +function Utils.reset!(env::DVSPEnv, scenario=env.scenario) reset_state!(env.state, env.instance; scenario[1]...) return nothing end @@ -67,10 +67,10 @@ end """ remove dispatched customers, advance time, and add new requests to the environment. """ -function CommonRLInterface.act!(env::DVSPEnv, routes, scenario=env.scenario) +function Utils.step!(env::DVSPEnv, routes, scenario=env.scenario) reward = -apply_routes!(env.state, routes) env.state.current_epoch += 1 - if !CommonRLInterface.terminated(env) + if !Utils.is_terminated(env) add_new_customers!(env.state, env.instance; scenario[current_epoch(env)]...) end return reward diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl index c9a7153..2efd0d0 100644 --- a/src/Utils/Utils.jl +++ b/src/Utils/Utils.jl @@ -12,25 +12,28 @@ using StatsBase: StatsBase using Statistics: mean include("data_sample.jl") +include("maximizers.jl") +include("environment.jl") +include("policy.jl") include("interface.jl") include("grid_graph.jl") include("misc.jl") include("model_builders.jl") -include("maximizers.jl") -export DataSample +export DataSample, Policy +export run_policy! +export TopKMaximizer + +export AbstractEnv, get_seed, is_terminated, observe, reset!, step! export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark -export generate_dataset, - generate_statistical_model, - generate_maximizer, - generate_sample, - generate_scenario, - generate_scenario_generator, - generate_anticipative_solver, - generate_environment, - generate_environments +export generate_sample, generate_dataset +export generate_statistical_model, generate_maximizer +export generate_scenario +export generate_environment, generate_environments +export generate_policies export generate_anticipative_solution + export plot_data, compute_gap export maximizer_kwargs export grid_graph, get_path, path_to_matrix @@ -39,6 +42,4 @@ export scip_model, highs_model export objective_value export is_exogenous, is_endogenous -export TopKMaximizer - end diff --git a/src/Utils/environment.jl b/src/Utils/environment.jl new file mode 100644 index 0000000..38a3f34 --- /dev/null +++ b/src/Utils/environment.jl @@ -0,0 +1,52 @@ +""" +$TYPEDEF + +Abstract type for environments in decision-focused learning benchmarks. +""" +abstract type AbstractEnvironment end + +""" +$TYPEDSIGNATURES + +Seed accessor for environments. +By default, environments have no seed. +Override this method to provide a seed for the environment. +""" +function get_seed(::AbstractEnvironment) + return nothing +end + +""" + is_terminated(env::AbstractEnvironment) --> Bool + +Check if the environment has reached a terminal state. +""" +function is_terminated end + +""" + observe(env::AbstractEnvironment) --> Tuple + +Get the current observation from the environment. +This function should return a tuple of two elements: + 1. An array of features representing the current state of the environment. + 2. An internal state of the environment, which can be used for further processing (return `nothing` if not needed). +""" +function observe end + +""" + reset!(env::AbstractEnvironment; reset_seed::Bool, seed=get_seed(env)) --> Nothing + +Reset the environment to its initial state. +If `reset_seed` is true, the random number generator is reset to the given `seed`. +""" +function reset! end + +""" + step!(env::AbstractEnvironment, action) --> Float64 + +Perform a step in the environment with the given action. +Returns the reward received after taking the action. +This function may also update the internal state of the environment. +If the environment is terminated, it should raise an error. +""" +function step! end diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl index 2994e4e..8c3c0c7 100644 --- a/src/Utils/interface.jl +++ b/src/Utils/interface.jl @@ -62,6 +62,11 @@ It's usually a Flux model, that takes a feature matrix x as input, and returns a """ function generate_statistical_model end +""" + generate_policies(::AbstractBenchmark) -> Vector{Policy} +""" +function generate_policies end + """ plot_data(::AbstractBenchmark, ::DataSample; kwargs...) @@ -202,16 +207,6 @@ function generate_scenario end """ function generate_anticipative_solution end -# """ -# generate_scenario_generator(::AbstractStochasticBenchmark{true}; kwargs...) -# """ -# function generate_scenario_generator end - -# """ -# generate_anticipative_solver(::AbstractStochasticBenchmark{true}; kwargs...) -# """ -# function generate_anticipative_solver end - """ $TYPEDEF @@ -224,7 +219,7 @@ TODO abstract type AbstractDynamicBenchmark{exogenous} <: AbstractStochasticBenchmark{exogenous} end """ - generate_environment(::AbstractDynamicBenchmark, instance; kwargs...) + generate_environment(::AbstractDynamicBenchmark, instance, rng::AbstractRNG; kwargs...) Initialize an environment for the given dynamic benchmark instance. """ @@ -236,9 +231,14 @@ $TYPEDSIGNATURES Generate a vector of environments for the given dynamic benchmark and dataset. """ function generate_environments( - bench::AbstractDynamicBenchmark, dataset::Vector{<:DataSample}; kwargs... + bench::AbstractDynamicBenchmark, + dataset::Vector{<:DataSample}; + seed=nothing, + rng=MersenneTwister(seed), + kwargs..., ) + Random.seed!(rng, seed) return map(dataset) do sample - generate_environment(bench, sample.instance; kwargs...) + generate_environment(bench, sample.instance, rng; kwargs...) end end diff --git a/src/Utils/policy.jl b/src/Utils/policy.jl new file mode 100644 index 0000000..0e216fd --- /dev/null +++ b/src/Utils/policy.jl @@ -0,0 +1,83 @@ +""" +$TYPEDEF + +Policy type for decision-focused learning benchmarks. +""" +struct Policy{P} + "policy name" + name::String + "policy description" + description::String + "policy run function" + policy::P +end + +function Base.show(io::IO, p::Policy) + println(io, "$(p.name): $(p.description)") + return nothing +end +""" +$TYPEDSIGNATURES + +Run the policy and get the next decision on the given environment/instance. +""" +function (p::Policy)(args...; kwargs...) + return p.policy(args...; kwargs...) +end + +""" +$TYPEDSIGNATURES + +Run the policy on the environment and return the total reward and a dataset of observations. +By default, the environment is reset before running the policy. +""" +function run_policy!(policy, env::AbstractEnvironment) + total_reward = 0.0 + reset!(env; reset_seed=false) + local labeled_dataset + while !is_terminated(env) + y = policy(env) + features, state = observe(env) + if @isdefined labeled_dataset + push!(labeled_dataset, DataSample(; x=features, y_true=y, instance=state)) + else + labeled_dataset = [DataSample(; x=features, y_true=y, instance=state)] + end + reward = step!(env, y) + total_reward += reward + end + return total_reward, labeled_dataset +end + +function run_policy!(policy, envs::Vector{<:AbstractEnvironment}) + E = length(envs) + rewards = zeros(Float64, E) + datasets = map(1:E) do e + reward, dataset = run_policy!(policy, envs[e]) + rewards[e] = reward + return dataset + end + return rewards, vcat(datasets...) +end + +function run_policy!(policy, env::AbstractEnvironment, episodes::Int; seed=get_seed(env)) + reset!(env; reset_seed=true, seed) + total_reward = 0.0 + datasets = map(1:episodes) do _i + reward, dataset = run_policy!(policy, env) + total_reward += reward + return dataset + end + return total_reward / episodes, vcat(datasets...) +end + +function run_policy!(policy, envs::Vector{<:AbstractEnvironment}, episodes::Int) + E = length(envs) + rewards = zeros(Float64, E) + datasets = map(1:E) do e + reward, dataset = run_policy!(policy, envs[e], episodes) + rewards[e] = reward + return dataset + end + return rewards, vcat(datasets...) +end From 987fec867e3fe8da27e200eda0537a5ef5d6e434 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 7 Aug 2025 17:45:40 +0200 Subject: [PATCH 19/29] Implement policy generator for DVSP, and cleanup seed handling --- .../DynamicVehicleScheduling.jl | 15 ++++++++ .../environment/environment.jl | 27 +++++++++++---- src/DynamicVehicleScheduling/policy.jl | 34 +++++++++++++++++++ .../policy/greedy_policy.jl | 9 +++++ .../policy/kleopatra_policy.jl | 32 ----------------- src/Utils/policy.jl | 18 +++++----- 6 files changed, 88 insertions(+), 47 deletions(-) create mode 100644 src/DynamicVehicleScheduling/policy.jl diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index 1cc5262..4c54966 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -38,6 +38,7 @@ include("algorithms/anticipative_solver.jl") include("learning/features.jl") include("learning/2d_features.jl") +include("policy.jl") # include("policy/abstract_vsp_policy.jl") # include("policy/greedy_policy.jl") # include("policy/lazy_policy.jl") @@ -104,6 +105,20 @@ function Utils.generate_anticipative_solution( ) end +function Utils.generate_policies(b::DynamicVehicleSchedulingBenchmark) + lazy = Policy( + "Lazy", + "Lazy policy that dispatches vehicles only when they are ready.", + lazy_policy, + ) + greedy = Policy( + "Greedy", + "Greedy policy that dispatches vehicles to the nearest customer.", + greedy_policy, + ) + return (lazy, greedy) +end + export DynamicVehicleSchedulingBenchmark end diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl index ce146e6..afbefb5 100644 --- a/src/DynamicVehicleScheduling/environment/environment.jl +++ b/src/DynamicVehicleScheduling/environment/environment.jl @@ -1,10 +1,14 @@ -struct DVSPEnv{S<:DVSPState} <: Utils.AbstractEnvironment +mutable struct DVSPEnv{S<:DVSPState,R<:AbstractRNG,SS} <: Utils.AbstractEnvironment "associated instance" instance::Instance "current state" state::S "scenario the environment will use when not given a specific one" scenario::Scenario + "random number generator" + rng::R + "seed for the environment" + seed::SS end """ @@ -12,10 +16,11 @@ $TYPEDSIGNATURES Constructor for [`DVSPEnv`](@ref). """ -function DVSPEnv(instance::Instance; seed=nothing, rng=MersenneTwister(seed)) - scenario = Utils.generate_scenario(instance; rng, seed) +function DVSPEnv(instance::Instance; seed=nothing) + rng = MersenneTwister(seed) + scenario = Utils.generate_scenario(instance; rng) initial_state = DVSPState(instance; scenario[1]...) - return DVSPEnv(instance, initial_state, scenario) + return DVSPEnv(instance, initial_state, scenario, rng, seed) end currrent_epoch(env::DVSPEnv) = current_epoch(env.state) @@ -23,6 +28,8 @@ epoch_duration(env::DVSPEnv) = epoch_duration(env.instance) last_epoch(env::DVSPEnv) = last_epoch(env.instance) Δ_dispatch(env::DVSPEnv) = Δ_dispatch(env.instance) +Utils.get_seed(env::DVSPEnv) = env.seed + """ $TYPEDSIGNATURES @@ -59,13 +66,19 @@ $TYPEDSIGNATURES Reset the environment to its initial state. Also reset the seed if `reset_seed` is set to true. """ -function Utils.reset!(env::DVSPEnv, scenario=env.scenario) - reset_state!(env.state, env.instance; scenario[1]...) +function Utils.reset!(env::DVSPEnv; seed=get_seed(env), reset_seed=false) + if reset_seed + Random.seed!(env.rng, seed) + end + env.scenario = Utils.generate_scenario(env; rng=env.rng) + reset_state!(env.state, env.instance; env.scenario[1]...) return nothing end """ -remove dispatched customers, advance time, and add new requests to the environment. +$TYPEDSIGNATURES + +Remove dispatched customers, advance time, and add new requests to the environment. """ function Utils.step!(env::DVSPEnv, routes, scenario=env.scenario) reward = -apply_routes!(env.state, routes) diff --git a/src/DynamicVehicleScheduling/policy.jl b/src/DynamicVehicleScheduling/policy.jl new file mode 100644 index 0000000..ebce796 --- /dev/null +++ b/src/DynamicVehicleScheduling/policy.jl @@ -0,0 +1,34 @@ +function greedy_policy(env::DVSPEnv; model_builder=highs_model) + _, state = observe(env) + (; is_postponable) = state + nb_postponable_requests = sum(is_postponable) + θ = ones(nb_postponable_requests) * 1e9 + routes = prize_collecting_vsp(θ; instance=state, model_builder) + return routes +end + +function lazy_policy(env::DVSPEnv; model_builder=highs_model) + _, state = observe(env) + nb_postponable_requests = sum(state.is_postponable) + θ = ones(nb_postponable_requests) * -1e9 + routes = prize_collecting_vsp(θ; instance=state, model_builder) + return routes +end + +""" +$TYPEDEF + +Kleopatra policy for the Dynamic Vehicle Scheduling Problem. +""" +struct KleopatraVSPPolicy{P} + prize_predictor::P +end + +function (π::KleopatraVSPPolicy)(env::DVSPEnv; model_builder=highs_model) + x, state = observe(env) + (; prize_predictor) = π + # x = has_2D_features ? compute_2D_features(env) : compute_features(env) + θ = prize_predictor(x) + routes = prize_collecting_vsp(θ; instance=state, model_builder) + return routes +end diff --git a/src/DynamicVehicleScheduling/policy/greedy_policy.jl b/src/DynamicVehicleScheduling/policy/greedy_policy.jl index a15a3b9..90de293 100644 --- a/src/DynamicVehicleScheduling/policy/greedy_policy.jl +++ b/src/DynamicVehicleScheduling/policy/greedy_policy.jl @@ -14,3 +14,12 @@ function (π::GreedyVSPPolicy)(env::DVSPEnv; model_builder=highs_model) routes = prize_collecting_vsp(θ; instance=state, model_builder) return routes end + +function greedy_policy(env::DVSPEnv; model_builder=highs_model) + _, state = observe(env) + (; is_postponable) = state + nb_postponable_requests = sum(is_postponable) + θ = ones(nb_postponable_requests) * 1e9 + routes = prize_collecting_vsp(θ; instance=state, model_builder) + return routes +end diff --git a/src/DynamicVehicleScheduling/policy/kleopatra_policy.jl b/src/DynamicVehicleScheduling/policy/kleopatra_policy.jl index 8a7e8d1..e69de29 100644 --- a/src/DynamicVehicleScheduling/policy/kleopatra_policy.jl +++ b/src/DynamicVehicleScheduling/policy/kleopatra_policy.jl @@ -1,32 +0,0 @@ -""" -$TYPEDEF - -Kleopatra policy for the Dynamic Vehicle Scheduling Problem. -""" -struct KleopatraVSPPolicy{P} <: AbstractDynamicVSPPolicy - prize_predictor::P - has_2D_features::Bool -end - -""" -$TYPEDSIGNATURES - -Custom constructor for [`KleopatraVSPPolicy`](@ref). -""" -function KleopatraVSPPolicy(prize_predictor; has_2D_features=nothing) - has_2D_features = if isnothing(has_2D_features) - size(prize_predictor[1].weight, 2) == 2 - else - has_2D_features - end - return KleopatraVSPPolicy(prize_predictor, has_2D_features) -end - -function (π::KleopatraVSPPolicy)(env::DVSPEnv; model_builder=highs_model) - state = observe(env) - (; prize_predictor, has_2D_features) = π - x = has_2D_features ? compute_2D_features(env) : compute_features(env) - θ = prize_predictor(x) - routes = prize_collecting_vsp(θ; instance=state, model_builder) - return routes -end diff --git a/src/Utils/policy.jl b/src/Utils/policy.jl index 0e216fd..cf8fbd3 100644 --- a/src/Utils/policy.jl +++ b/src/Utils/policy.jl @@ -31,12 +31,12 @@ $TYPEDSIGNATURES Run the policy on the environment and return the total reward and a dataset of observations. By default, the environment is reset before running the policy. """ -function run_policy!(policy, env::AbstractEnvironment) +function run_policy!(policy, env::AbstractEnvironment; kwargs...) total_reward = 0.0 reset!(env; reset_seed=false) local labeled_dataset while !is_terminated(env) - y = policy(env) + y = policy(env; kwargs...) features, state = observe(env) if @isdefined labeled_dataset push!(labeled_dataset, DataSample(; x=features, y_true=y, instance=state)) @@ -49,33 +49,35 @@ function run_policy!(policy, env::AbstractEnvironment) return total_reward, labeled_dataset end -function run_policy!(policy, envs::Vector{<:AbstractEnvironment}) +function run_policy!(policy, envs::Vector{<:AbstractEnvironment}; kwargs...) E = length(envs) rewards = zeros(Float64, E) datasets = map(1:E) do e - reward, dataset = run_policy!(policy, envs[e]) + reward, dataset = run_policy!(policy, envs[e]; kwargs...) rewards[e] = reward return dataset end return rewards, vcat(datasets...) end -function run_policy!(policy, env::AbstractEnvironment, episodes::Int; seed=get_seed(env)) +function run_policy!( + policy, env::AbstractEnvironment, episodes::Int; seed=get_seed(env), kwargs... +) reset!(env; reset_seed=true, seed) total_reward = 0.0 datasets = map(1:episodes) do _i - reward, dataset = run_policy!(policy, env) + reward, dataset = run_policy!(policy, env; kwargs...) total_reward += reward return dataset end return total_reward / episodes, vcat(datasets...) end -function run_policy!(policy, envs::Vector{<:AbstractEnvironment}, episodes::Int) +function run_policy!(policy, envs::Vector{<:AbstractEnvironment}, episodes::Int; kwargs...) E = length(envs) rewards = zeros(Float64, E) datasets = map(1:E) do e - reward, dataset = run_policy!(policy, envs[e], episodes) + reward, dataset = run_policy!(policy, envs[e], episodes; kwargs...) rewards[e] = reward return dataset end From ed01c451f0d615991d4cd73e818b52d394c134f3 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Fri, 8 Aug 2025 17:05:16 +0200 Subject: [PATCH 20/29] Improve anticipative policy --- .../DynamicVehicleScheduling.jl | 3 +- .../algorithms/anticipative_solver.jl | 87 ++++++++++--------- .../environment/environment.jl | 8 +- .../environment/instance.jl | 4 + .../environment/state.jl | 30 ++++++- src/DynamicVehicleScheduling/policy.jl | 2 + src/Utils/data_sample.jl | 17 ++++ src/Utils/policy.jl | 7 +- 8 files changed, 108 insertions(+), 50 deletions(-) diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index 4c54966..ab4ba8c 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -67,7 +67,7 @@ $TYPEDFIELDS end function Utils.generate_dataset(b::DynamicVehicleSchedulingBenchmark, dataset_size::Int=1) - (; max_requests_per_epoch, Δ_dispatch, epoch_duration) = b + (; max_requests_per_epoch, Δ_dispatch, epoch_duration, two_dimensional_features) = b files = readdir(datadep"dvrptw"; join=true) dataset_size = min(dataset_size, length(files)) return [ @@ -77,6 +77,7 @@ function Utils.generate_dataset(b::DynamicVehicleSchedulingBenchmark, dataset_si max_requests_per_epoch, Δ_dispatch, epoch_duration, + two_dimensional_features, ), ) for i in 1:dataset_size ] diff --git a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl index f274e02..47a2c1f 100644 --- a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl +++ b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl @@ -4,15 +4,17 @@ $TYPEDSIGNATURES Retrieve anticipative routes solution from the given MIP solution `y`. Outputs a set of routes per epoch. """ -function retrieve_routes_anticipative(y::AbstractArray, dvspenv::DVSPEnv, customer_index) +function retrieve_routes_anticipative( + y::AbstractArray, dvspenv::DVSPEnv, customer_index, epoch_indices +) nb_tasks = length(customer_index) - first_epoch = 1 - (; last_epoch) = dvspenv.instance + # first_epoch = 1 + # (; last_epoch) = dvspenv.instance job_indices = 2:(nb_tasks) - epoch_indices = first_epoch:last_epoch + # epoch_indices = first_epoch:last_epoch routes = [Vector{Int}[] for _ in epoch_indices] - for t in epoch_indices + for (i, t) in enumerate(epoch_indices) start = [i for i in job_indices if y[1, i, t] ≈ 1] for task in start route = Int[] @@ -28,7 +30,7 @@ function retrieve_routes_anticipative(y::AbstractArray, dvspenv::DVSPEnv, custom end current_task = next_task end - push!(routes[t], route) + push!(routes[i], route) end end return routes @@ -44,28 +46,33 @@ function anticipative_solver( env::DVSPEnv, scenario=env.scenario; model_builder=highs_model, - reset_env=false, - two_dimensional_features=false, + two_dimensional_features=env.instance.two_dimensional_features, + reset_env=true, + nb_epochs=typemax(Int), ) - reset_env && reset!(env) + reset_env && reset!(env; reset_seed=true) + + start_epoch = current_epoch(env) + end_epoch = min(last_epoch(env), start_epoch + nb_epochs - 1) + T = start_epoch:end_epoch + request_epoch = [0] - for (epoch, indices) in enumerate(scenario.indices) - request_epoch = vcat(request_epoch, fill(epoch, length(indices))) + for t in T + request_epoch = vcat(request_epoch, fill(t, length(scenario.indices[t]))) end - customer_index = vcat(1, scenario.indices...) - service_time = vcat(0.0, scenario.service_time...) - start_time = vcat(0.0, scenario.start_time...) + customer_index = vcat(1, scenario.indices[T]...) + service_time = vcat(0.0, scenario.service_time[T]...) + start_time = vcat(0.0, scenario.start_time[T]...) duration = env.instance.static_instance.duration[customer_index, customer_index] - first_epoch = 1 - (; last_epoch, epoch_duration, Δ_dispatch) = env.instance + (; epoch_duration, Δ_dispatch) = env.instance model = model_builder() set_silent(model) nb_nodes = length(customer_index) job_indices = 2:nb_nodes - epoch_indices = first_epoch:last_epoch + epoch_indices = T#first_epoch:last_epoch @variable(model, y[i=1:nb_nodes, j=1:nb_nodes, t=epoch_indices]; binary=true) @@ -102,7 +109,7 @@ function anticipative_solver( # a trip from i can be done only before limit date for i in job_indices, t in epoch_indices, j in 1:nb_nodes - if (t - 1) * epoch_duration + duration[1, i] + Δ_dispatch > start_time[i] # ! this only works if first_epoch = 1 + if (t - 1) * epoch_duration + duration[1, i] + Δ_dispatch > start_time[i] @constraint(model, y[i, j, t] <= 0) end end @@ -121,27 +128,32 @@ function anticipative_solver( optimize!(model) obj = JuMP.objective_value(model) - epoch_routes = retrieve_routes_anticipative(value.(y), env, customer_index) + epoch_routes = retrieve_routes_anticipative( + value.(y), env, customer_index, epoch_indices + ) epoch_indices = Vector{Int}[] N = 1 indices = [1] - for epoch in 1:last_epoch + index = 1 + for epoch in 1:last_epoch(env) M = length(scenario.indices[epoch]) indices = vcat(indices, (N + 1):(N + M)) push!(epoch_indices, copy(indices)) N = N + M - epoch_routes[epoch] - dispatched = vcat(epoch_routes[epoch]...) - indices = setdiff(indices, dispatched) + if epoch in T + dispatched = vcat(epoch_routes[index]...) + index += 1 + indices = setdiff(indices, dispatched) + end end indices = vcat(1, scenario.indices...) start_time = vcat(0.0, scenario.start_time...) service_time = vcat(0.0, scenario.service_time...) - dataset = map(1:last_epoch) do epoch - routes = epoch_routes[epoch] + dataset = map(enumerate(T)) do (i, epoch) + routes = epoch_routes[i] epoch_customers = epoch_indices[epoch] y_true = @@ -170,9 +182,13 @@ function anticipative_solver( epoch_duration = env.instance.epoch_duration Δ_dispatch = env.instance.Δ_dispatch planning_start_time = (epoch - 1) * epoch_duration + Δ_dispatch - is_must_dispatch[2:end] .= - planning_start_time .+ epoch_duration .+ @view(new_duration[1, 2:end]) .> - new_start_time[2:end] + if epoch == last_epoch + # If we are in the last epoch, all requests must be dispatched + is_must_dispatch[2:end] .= true + else + is_must_dispatch[2:end] .= + planning_start_time .+ epoch_duration .+ @view(new_duration[1, 2:end]) .> new_start_time[2:end] + end is_postponable[2:end] .= .!is_must_dispatch[2:end] state = DVSPState(; @@ -183,7 +199,6 @@ function anticipative_solver( current_epoch=epoch, ) - # x = compute_2D_features(state, env.instance) x = if two_dimensional_features compute_2D_features(state, env.instance) else @@ -195,17 +210,3 @@ function anticipative_solver( return obj, dataset end - -# @kwdef struct AnticipativeSolver -# is_2D::Bool = false -# end - -# function (solver::AnticipativeSolver)(env::DVSPEnv, scenario=env.scenario; reset_env=false) -# return generate_anticipative_decision( -# env, -# scenario; -# model_builder=highs_model, -# reset_env, -# two_dimensional_features=solver.is_2D, -# ) -# end diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl index afbefb5..cd8d6a6 100644 --- a/src/DynamicVehicleScheduling/environment/environment.jl +++ b/src/DynamicVehicleScheduling/environment/environment.jl @@ -35,7 +35,13 @@ $TYPEDSIGNATURES Get the current state of the environment. """ -Utils.observe(env::DVSPEnv) = nothing, env.state +function Utils.observe(env::DVSPEnv) + if env.instance.two_dimensional_features + return compute_2D_features(env.state, env.instance), env.state + end + # else + return compute_features(env.state, env.instance), env.state +end current_epoch(env::DVSPEnv) = current_epoch(env.state) diff --git a/src/DynamicVehicleScheduling/environment/instance.jl b/src/DynamicVehicleScheduling/environment/instance.jl index 17d5e9d..d65010c 100644 --- a/src/DynamicVehicleScheduling/environment/instance.jl +++ b/src/DynamicVehicleScheduling/environment/instance.jl @@ -14,6 +14,8 @@ Instance data structure for the dynamic vehicle scheduling problem. epoch_duration::T = 1.0 "last epoch index" last_epoch::Int + "whether to use two-dimensional features" + two_dimensional_features::Bool = false end function Instance( @@ -21,6 +23,7 @@ function Instance( max_requests_per_epoch::Int=10, Δ_dispatch::Float64=1.0, epoch_duration::Float64=1.0, + two_dimensional_features::Bool=false, ) last_epoch = trunc( Int, @@ -35,6 +38,7 @@ function Instance( Δ_dispatch=Δ_dispatch, epoch_duration=epoch_duration, last_epoch=last_epoch, + two_dimensional_features=two_dimensional_features, ) end diff --git a/src/DynamicVehicleScheduling/environment/state.jl b/src/DynamicVehicleScheduling/environment/state.jl index ebac101..704ef79 100644 --- a/src/DynamicVehicleScheduling/environment/state.jl +++ b/src/DynamicVehicleScheduling/environment/state.jl @@ -16,6 +16,25 @@ State data structure for the Dynamic Vehicle Scheduling Problem. is_postponable::BitVector = falses(0) end +function Base.show(io::IO, state::DVSPState) + return print( + io, + "DVSPState(", + "current_epoch=", + state.current_epoch, + ", ", + "location_indices=", + state.location_indices, + ", ", + "is_must_dispatch=", + state.is_must_dispatch, + ", ", + "is_postponable=", + state.is_postponable, + ")", + ) +end + function reset_state!( state::DVSPState, instance::Instance; indices, service_time, start_time ) @@ -189,9 +208,14 @@ function add_new_customers!( epoch_duration = instance.epoch_duration Δ_dispatch = instance.Δ_dispatch planning_start_time = (state.current_epoch - 1) * epoch_duration + Δ_dispatch - is_must_dispatch[2:end] .= - planning_start_time .+ epoch_duration .+ @view(updated_duration[1, 2:end]) .> - updated_start_time[2:end] + if state.current_epoch == last_epoch(instance) + # If we are in the last epoch, all requests must be dispatched + is_must_dispatch[2:end] .= true + else + is_must_dispatch[2:end] .= + planning_start_time .+ epoch_duration .+ @view(updated_duration[1, 2:end]) .> + updated_start_time[2:end] + end is_postponable[2:end] .= .!is_must_dispatch[2:end] state.is_must_dispatch = is_must_dispatch diff --git a/src/DynamicVehicleScheduling/policy.jl b/src/DynamicVehicleScheduling/policy.jl index ebce796..244dc66 100644 --- a/src/DynamicVehicleScheduling/policy.jl +++ b/src/DynamicVehicleScheduling/policy.jl @@ -4,6 +4,7 @@ function greedy_policy(env::DVSPEnv; model_builder=highs_model) nb_postponable_requests = sum(is_postponable) θ = ones(nb_postponable_requests) * 1e9 routes = prize_collecting_vsp(θ; instance=state, model_builder) + @assert is_feasible(state, routes) return routes end @@ -12,6 +13,7 @@ function lazy_policy(env::DVSPEnv; model_builder=highs_model) nb_postponable_requests = sum(state.is_postponable) θ = ones(nb_postponable_requests) * -1e9 routes = prize_collecting_vsp(θ; instance=state, model_builder) + @assert is_feasible(state, routes) return routes end diff --git a/src/Utils/data_sample.jl b/src/Utils/data_sample.jl index fde1bf3..d0cccc6 100644 --- a/src/Utils/data_sample.jl +++ b/src/Utils/data_sample.jl @@ -22,6 +22,23 @@ $TYPEDFIELDS instance::I = nothing end +function Base.show(io::IO, d::DataSample) + fields = String[] + if !isnothing(d.x) + push!(fields, "x=$(d.x)") + end + if !isnothing(d.θ_true) + push!(fields, "θ_true=$(d.θ_true)") + end + if !isnothing(d.y_true) + push!(fields, "y_true=$(d.y_true)") + end + if !isnothing(d.instance) + push!(fields, "instance=$(d.instance)") + end + return print(io, "DataSample(", join(fields, ", "), ")") +end + """ $TYPEDSIGNATURES diff --git a/src/Utils/policy.jl b/src/Utils/policy.jl index cf8fbd3..8f17f79 100644 --- a/src/Utils/policy.jl +++ b/src/Utils/policy.jl @@ -39,9 +39,12 @@ function run_policy!(policy, env::AbstractEnvironment; kwargs...) y = policy(env; kwargs...) features, state = observe(env) if @isdefined labeled_dataset - push!(labeled_dataset, DataSample(; x=features, y_true=y, instance=state)) + push!( + labeled_dataset, + DataSample(; x=features, y_true=y, instance=deepcopy(state)), + ) else - labeled_dataset = [DataSample(; x=features, y_true=y, instance=state)] + labeled_dataset = [DataSample(; x=features, y_true=y, instance=deepcopy(state))] end reward = step!(env, y) total_reward += reward From 5f2ccca6b1298b765ff9ff464089d16e4e007c72 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Fri, 8 Aug 2025 17:45:27 +0200 Subject: [PATCH 21/29] cleanup + fix tests --- src/DecisionFocusedLearningBenchmarks.jl | 2 +- src/DynamicAssortment/DynamicAssortment.jl | 2 +- src/DynamicAssortment/policies.jl | 20 --- .../DynamicVehicleScheduling.jl | 28 ++-- .../algorithms/prize_collecting_vsp.jl | 128 ----------------- .../{algorithms => }/anticipative_solver.jl | 0 .../{environment => }/environment.jl | 0 .../{learning => }/features.jl | 17 +++ .../{environment => }/instance.jl | 0 .../learning/2d_features.jl | 16 --- src/DynamicVehicleScheduling/maximizer.jl | 129 ++++++++++++++++++ .../{environment => }/plot.jl | 0 .../policy/abstract_vsp_policy.jl | 42 ------ .../policy/anticipative_policy.jl | 17 --- .../policy/greedy_policy.jl | 25 ---- .../policy/kleopatra_policy.jl | 0 .../policy/lazy_policy.jl | 15 -- .../{environment => }/scenario.jl | 0 .../{environment => }/state.jl | 0 src/Utils/Utils.jl | 2 +- test/dynamic_vsp.jl | 32 +++-- 21 files changed, 182 insertions(+), 293 deletions(-) delete mode 100644 src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl rename src/DynamicVehicleScheduling/{algorithms => }/anticipative_solver.jl (100%) rename src/DynamicVehicleScheduling/{environment => }/environment.jl (100%) rename src/DynamicVehicleScheduling/{learning => }/features.jl (69%) rename src/DynamicVehicleScheduling/{environment => }/instance.jl (100%) delete mode 100644 src/DynamicVehicleScheduling/learning/2d_features.jl rename src/DynamicVehicleScheduling/{environment => }/plot.jl (100%) delete mode 100644 src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl delete mode 100644 src/DynamicVehicleScheduling/policy/anticipative_policy.jl delete mode 100644 src/DynamicVehicleScheduling/policy/greedy_policy.jl delete mode 100644 src/DynamicVehicleScheduling/policy/kleopatra_policy.jl delete mode 100644 src/DynamicVehicleScheduling/policy/lazy_policy.jl rename src/DynamicVehicleScheduling/{environment => }/scenario.jl (100%) rename src/DynamicVehicleScheduling/{environment => }/state.jl (100%) diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl index b49362a..a33c1de 100644 --- a/src/DecisionFocusedLearningBenchmarks.jl +++ b/src/DecisionFocusedLearningBenchmarks.jl @@ -62,7 +62,7 @@ using .Utils # Interface export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample -export AbstractEnv, get_seed, is_terminated, observe, reset!, step! +export AbstractEnvironment, get_seed, is_terminated, observe, reset!, step! export Policy, run_policy! diff --git a/src/DynamicAssortment/DynamicAssortment.jl b/src/DynamicAssortment/DynamicAssortment.jl index f04d3e2..2c61c5f 100644 --- a/src/DynamicAssortment/DynamicAssortment.jl +++ b/src/DynamicAssortment/DynamicAssortment.jl @@ -72,7 +72,7 @@ function Utils.generate_maximizer(b::DynamicAssortmentBenchmark) end function Utils.generate_environment( - ::DynamicAssortmentBenchmark, instance::Instance, rng::AbstractRNG + ::DynamicAssortmentBenchmark, instance::Instance, rng::AbstractRNG; kwargs... ) seed = rand(rng, 1:typemax(Int)) return Environment(instance; seed) diff --git a/src/DynamicAssortment/policies.jl b/src/DynamicAssortment/policies.jl index 56aba6d..320c501 100644 --- a/src/DynamicAssortment/policies.jl +++ b/src/DynamicAssortment/policies.jl @@ -19,23 +19,3 @@ function greedy_policy(env::Environment) maximizer = generate_maximizer(env.instance.config) return maximizer(prices(env)) end - -function run_policy(env::Environment, episodes::Int; first_seed=1, policy=expert_policy) - dataset = [] - rev_global = Float64[] - for i in 1:episodes - rev_episode = 0.0 - CommonRLInterface.reset!(env; seed=first_seed - 1 + i, reset_seed=true) - training_instances = [] - while !CommonRLInterface.terminated(env) - S = policy(env) - features = CommonRLInterface.observe(env) - push!(training_instances, DataSample(; x=features, y_true=S)) - reward = CommonRLInterface.act!(env, S) - rev_episode += reward - end - push!(rev_global, rev_episode) - push!(dataset, training_instances) - end - return mean(rev_global), rev_global, dataset -end diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index ab4ba8c..f76a43b 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -25,27 +25,17 @@ include("static_vsp/parsing.jl") include("static_vsp/solution.jl") include("static_vsp/plot.jl") -# dynamic environment -include("environment/instance.jl") -include("environment/state.jl") -include("environment/scenario.jl") -include("environment/environment.jl") -include("environment/plot.jl") +include("instance.jl") +include("state.jl") +include("scenario.jl") +include("environment.jl") +include("plot.jl") -include("algorithms/prize_collecting_vsp.jl") -include("algorithms/anticipative_solver.jl") - -include("learning/features.jl") -include("learning/2d_features.jl") +include("maximizer.jl") +include("anticipative_solver.jl") +include("features.jl") include("policy.jl") -# include("policy/abstract_vsp_policy.jl") -# include("policy/greedy_policy.jl") -# include("policy/lazy_policy.jl") -# include("policy/anticipative_policy.jl") -# include("policy/kleopatra_policy.jl") - -include("maximizer.jl") """ $TYPEDEF @@ -84,7 +74,7 @@ function Utils.generate_dataset(b::DynamicVehicleSchedulingBenchmark, dataset_si end function Utils.generate_environment( - ::DynamicVehicleSchedulingBenchmark, instance::Instance, rng::AbstractRNG + ::DynamicVehicleSchedulingBenchmark, instance::Instance, rng::AbstractRNG; kwargs... ) seed = rand(rng, 1:typemax(Int)) return DVSPEnv(instance; seed) diff --git a/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl b/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl deleted file mode 100644 index d98f4d0..0000000 --- a/src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl +++ /dev/null @@ -1,128 +0,0 @@ -""" -$TYPEDSIGNATURES - -Create the acyclic digraph associated with the given VSP `instance`. -""" -function create_graph(instance::StaticInstance) - (; duration, start_time, service_time) = instance - # Initialize directed graph - nb_vertices = location_count(instance) - graph = SimpleDiGraph(nb_vertices) - - depot = 1 # depot is always index 1 - customers = 2:nb_vertices # other vertices are customers - - # Create existing edges - for i₁ in customers - # link every task to depot - add_edge!(graph, depot, i₁) - add_edge!(graph, i₁, depot) - - t₁ = start_time[i₁] - for i₂ in (i₁ + 1):nb_vertices - t₂ = start_time[i₂] - - if t₁ <= t₂ - if t₁ + service_time[i₁] + duration[i₁, i₂] <= t₂ - add_edge!(graph, i₁, i₂) - end - else - if t₂ + service_time[i₂] + duration[i₂, i₁] <= t₁ - add_edge!(graph, i₂, i₁) - end - end - end - end - - return graph -end - -""" -$TYPEDSIGNATURES - -Create the acyclic digraph associated with the given VSP `state`. -""" -function create_graph(state::DVSPState) - return create_graph(state.state_instance) -end - -""" -$TYPEDSIGNATURES - -Retrieve routes solution from the given MIP solution `y` matrix and `graph`. -""" -function retrieve_routes(y::AbstractArray, graph::AbstractGraph) - nb_tasks = nv(graph) - job_indices = 2:(nb_tasks) - routes = Vector{Int}[] - - start = [i for i in job_indices if y[1, i] ≈ 1] - for task in start - route = Int[] - current_task = task - while current_task != 1 # < nb_tasks - push!(route, current_task) - local next_task - for i in outneighbors(graph, current_task) - if isapprox(y[current_task, i], 1; atol=0.1) - next_task = i - break - end - end - current_task = next_task - end - push!(routes, route) - end - return routes -end - -""" -$TYPEDSIGNATURES - -Solve the Prize Collecting Vehicle Scheduling Problem defined by `instance` and prize vector `θ`. -""" -function prize_collecting_vsp( - θ::AbstractVector; instance::DVSPState, model_builder=highs_model, kwargs... -) - (; duration) = instance.state_instance - graph = create_graph(instance) - - model = model_builder() - set_silent(model) - - nb_nodes = nv(graph) - job_indices = 2:(nb_nodes) - - @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0) - - θ_ext = fill(0.0, location_count(instance)) # no prize for must dispatch requests, only hard constraints - θ_ext[instance.is_postponable] .= θ - - @objective( - model, - Max, - sum( - (θ_ext[dst(edge)] - duration[src(edge), dst(edge)]) * y[src(edge), dst(edge)] - for edge in edges(graph) - ) - ) - @constraint( - model, - flow[i in 2:nb_nodes], - sum(y[j, i] for j in inneighbors(graph, i)) == - sum(y[i, j] for j in outneighbors(graph, i)) - ) - @constraint( - model, demand[i in job_indices], sum(y[j, i] for j in inneighbors(graph, i)) <= 1 - ) - # must dispatch constraints - @constraint( - model, - demand_must_dispatch[i in job_indices; instance.is_must_dispatch[i]], - sum(y[j, i] for j in inneighbors(graph, i)) == 1 - ) - - optimize!(model) - - return retrieve_routes(value.(y), graph) -end diff --git a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/anticipative_solver.jl similarity index 100% rename from src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl rename to src/DynamicVehicleScheduling/anticipative_solver.jl diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment.jl similarity index 100% rename from src/DynamicVehicleScheduling/environment/environment.jl rename to src/DynamicVehicleScheduling/environment.jl diff --git a/src/DynamicVehicleScheduling/learning/features.jl b/src/DynamicVehicleScheduling/features.jl similarity index 69% rename from src/DynamicVehicleScheduling/learning/features.jl rename to src/DynamicVehicleScheduling/features.jl index c9470c3..10e0ab8 100644 --- a/src/DynamicVehicleScheduling/learning/features.jl +++ b/src/DynamicVehicleScheduling/features.jl @@ -40,3 +40,20 @@ end function compute_features(env::DVSPEnv) return compute_features(env.state, env.instance) end + +function get_features_meanTimeToRequests(state::DVSPState, instance::Instance) + quantiles = [0.5] + a = instance.static_instance.duration[state.location_indices, 2:end] + quantileTimeToRequests = mapslices(x -> quantile(x, quantiles), a; dims=2) + return quantileTimeToRequests +end + +function compute_2D_features(state::DVSPState, instance::Instance) + timeDepotRequest = state.state_instance.duration[:, 1][state.is_postponable] + quantileTimeToRequests = get_features_meanTimeToRequests(state, instance)[state.is_postponable] + return hcat(timeDepotRequest, quantileTimeToRequests)' +end + +function compute_2D_features(env::DVSPEnv) + return compute_2D_features(env.state, env.instance) +end diff --git a/src/DynamicVehicleScheduling/environment/instance.jl b/src/DynamicVehicleScheduling/instance.jl similarity index 100% rename from src/DynamicVehicleScheduling/environment/instance.jl rename to src/DynamicVehicleScheduling/instance.jl diff --git a/src/DynamicVehicleScheduling/learning/2d_features.jl b/src/DynamicVehicleScheduling/learning/2d_features.jl deleted file mode 100644 index 6e23810..0000000 --- a/src/DynamicVehicleScheduling/learning/2d_features.jl +++ /dev/null @@ -1,16 +0,0 @@ -function get_features_meanTimeToRequests(state::DVSPState, instance::Instance) - quantiles = [0.5] - a = instance.static_instance.duration[state.location_indices, 2:end] - quantileTimeToRequests = mapslices(x -> quantile(x, quantiles), a; dims=2) - return quantileTimeToRequests -end - -function compute_2D_features(state::DVSPState, instance::Instance) - timeDepotRequest = state.state_instance.duration[:, 1][state.is_postponable] - quantileTimeToRequests = get_features_meanTimeToRequests(state, instance)[state.is_postponable] - return hcat(timeDepotRequest, quantileTimeToRequests)' -end - -function compute_2D_features(env::DVSPEnv) - return compute_2D_features(env.state, env.instance) -end diff --git a/src/DynamicVehicleScheduling/maximizer.jl b/src/DynamicVehicleScheduling/maximizer.jl index eecbf57..450ab8a 100644 --- a/src/DynamicVehicleScheduling/maximizer.jl +++ b/src/DynamicVehicleScheduling/maximizer.jl @@ -1,3 +1,132 @@ +""" +$TYPEDSIGNATURES + +Create the acyclic digraph associated with the given VSP `instance`. +""" +function create_graph(instance::StaticInstance) + (; duration, start_time, service_time) = instance + # Initialize directed graph + nb_vertices = location_count(instance) + graph = SimpleDiGraph(nb_vertices) + + depot = 1 # depot is always index 1 + customers = 2:nb_vertices # other vertices are customers + + # Create existing edges + for i₁ in customers + # link every task to depot + add_edge!(graph, depot, i₁) + add_edge!(graph, i₁, depot) + + t₁ = start_time[i₁] + for i₂ in (i₁ + 1):nb_vertices + t₂ = start_time[i₂] + + if t₁ <= t₂ + if t₁ + service_time[i₁] + duration[i₁, i₂] <= t₂ + add_edge!(graph, i₁, i₂) + end + else + if t₂ + service_time[i₂] + duration[i₂, i₁] <= t₁ + add_edge!(graph, i₂, i₁) + end + end + end + end + + return graph +end + +""" +$TYPEDSIGNATURES + +Create the acyclic digraph associated with the given VSP `state`. +""" +function create_graph(state::DVSPState) + return create_graph(state.state_instance) +end + +""" +$TYPEDSIGNATURES + +Retrieve routes solution from the given MIP solution `y` matrix and `graph`. +""" +function retrieve_routes(y::AbstractArray, graph::AbstractGraph) + nb_tasks = nv(graph) + job_indices = 2:(nb_tasks) + routes = Vector{Int}[] + + start = [i for i in job_indices if y[1, i] ≈ 1] + for task in start + route = Int[] + current_task = task + while current_task != 1 # < nb_tasks + push!(route, current_task) + local next_task + for i in outneighbors(graph, current_task) + if isapprox(y[current_task, i], 1; atol=0.1) + next_task = i + break + end + end + current_task = next_task + end + push!(routes, route) + end + return routes +end + +""" +$TYPEDSIGNATURES + +Solve the Prize Collecting Vehicle Scheduling Problem defined by `instance` and prize vector `θ`. +""" +function prize_collecting_vsp( + θ::AbstractVector; instance::DVSPState, model_builder=highs_model, kwargs... +) + (; duration) = instance.state_instance + graph = create_graph(instance) + + model = model_builder() + set_silent(model) + + nb_nodes = nv(graph) + job_indices = 2:(nb_nodes) + + @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0) + + θ_ext = fill(0.0, location_count(instance)) # no prize for must dispatch requests, only hard constraints + θ_ext[instance.is_postponable] .= θ + + @objective( + model, + Max, + sum( + (θ_ext[dst(edge)] - duration[src(edge), dst(edge)]) * y[src(edge), dst(edge)] + for edge in edges(graph) + ) + ) + @constraint( + model, + flow[i in 2:nb_nodes], + sum(y[j, i] for j in inneighbors(graph, i)) == + sum(y[i, j] for j in outneighbors(graph, i)) + ) + @constraint( + model, demand[i in job_indices], sum(y[j, i] for j in inneighbors(graph, i)) <= 1 + ) + # must dispatch constraints + @constraint( + model, + demand_must_dispatch[i in job_indices; instance.is_must_dispatch[i]], + sum(y[j, i] for j in inneighbors(graph, i)) == 1 + ) + + optimize!(model) + + return retrieve_routes(value.(y), graph) +end + function oracle(θ; instance::DVSPState, kwargs...) routes = prize_collecting_vsp(θ; instance=instance, kwargs...) return VSPSolution( diff --git a/src/DynamicVehicleScheduling/environment/plot.jl b/src/DynamicVehicleScheduling/plot.jl similarity index 100% rename from src/DynamicVehicleScheduling/environment/plot.jl rename to src/DynamicVehicleScheduling/plot.jl diff --git a/src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl b/src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl deleted file mode 100644 index 0a1f755..0000000 --- a/src/DynamicVehicleScheduling/policy/abstract_vsp_policy.jl +++ /dev/null @@ -1,42 +0,0 @@ -abstract type AbstractDynamicPolicy end - -function (π::AbstractDynamicPolicy)(env; kwargs...) - throw("Not implemented") -end - -""" -$TYPEDEF - -Abstract type for dynamic VSP policies. -""" -abstract type AbstractDynamicVSPPolicy <: AbstractDynamicPolicy end - -""" -$TYPEDSIGNATURES - -Apply the policy to the environment. -""" -function run_policy!( - π::AbstractDynamicVSPPolicy, - env::DVSPEnv, - scenario=env.scenario; - check_feasibility=true, - kwargs..., -) - # reset environment, and initialize variables - reset!(env) - total_cost = 0 - epoch_routes = Vector{Vector{Int}}[] - - # epoch loop - while !terminated(env) - state_routes = π(env; kwargs...) - check_feasibility && @assert is_feasible(observe(env), state_routes) - # env_routes = env_routes_from_state_routes(env, state_routes) - push!(epoch_routes, state_routes) - local_cost = act!(env, state_routes, scenario) - total_cost += local_cost - end - - return total_cost, epoch_routes -end diff --git a/src/DynamicVehicleScheduling/policy/anticipative_policy.jl b/src/DynamicVehicleScheduling/policy/anticipative_policy.jl deleted file mode 100644 index f63b620..0000000 --- a/src/DynamicVehicleScheduling/policy/anticipative_policy.jl +++ /dev/null @@ -1,17 +0,0 @@ -""" -$TYPEDEF - -Anticipative policy for the Dynamic Vehicle Scheduling Problem. -""" -struct AnticipativeVSPPolicy <: AbstractDynamicPolicy end - -""" -$TYPEDSIGNATURES - -Apply the anticipative policy to the environment. -""" -function run_policy!( - ::AnticipativeVSPPolicy, env::DVSPEnv, scenario=env.scenario; model_builder=highs_model -) - return generate_anticipative_solution(env, scenario; model_builder, reset_env=true) -end diff --git a/src/DynamicVehicleScheduling/policy/greedy_policy.jl b/src/DynamicVehicleScheduling/policy/greedy_policy.jl deleted file mode 100644 index 90de293..0000000 --- a/src/DynamicVehicleScheduling/policy/greedy_policy.jl +++ /dev/null @@ -1,25 +0,0 @@ -""" -$TYPEDEF - -Greedy policy for the Dynamic Vehicle Scheduling Problem. -Dispatch customers as soon as they appear. -""" -struct GreedyVSPPolicy <: AbstractDynamicVSPPolicy end - -function (π::GreedyVSPPolicy)(env::DVSPEnv; model_builder=highs_model) - state = observe(env) - (; is_postponable) = state - nb_postponable_requests = sum(is_postponable) - θ = ones(nb_postponable_requests) * 1e9 - routes = prize_collecting_vsp(θ; instance=state, model_builder) - return routes -end - -function greedy_policy(env::DVSPEnv; model_builder=highs_model) - _, state = observe(env) - (; is_postponable) = state - nb_postponable_requests = sum(is_postponable) - θ = ones(nb_postponable_requests) * 1e9 - routes = prize_collecting_vsp(θ; instance=state, model_builder) - return routes -end diff --git a/src/DynamicVehicleScheduling/policy/kleopatra_policy.jl b/src/DynamicVehicleScheduling/policy/kleopatra_policy.jl deleted file mode 100644 index e69de29..0000000 diff --git a/src/DynamicVehicleScheduling/policy/lazy_policy.jl b/src/DynamicVehicleScheduling/policy/lazy_policy.jl deleted file mode 100644 index 50b44d3..0000000 --- a/src/DynamicVehicleScheduling/policy/lazy_policy.jl +++ /dev/null @@ -1,15 +0,0 @@ -""" -$TYPEDEF - -Lazy policy for the Dynamic Vehicle Scheduling Problem. -Dispatch customers only when necessary (i.e. must-dispatch). -""" -struct LazyVSPPolicy <: AbstractDynamicVSPPolicy end - -function (π::LazyVSPPolicy)(env::DVSPEnv; model_builder=highs_model) - state = observe(env) - nb_postponable_requests = sum(state.is_postponable) - θ = ones(nb_postponable_requests) * -1e9 - routes = prize_collecting_vsp(θ; instance=state, model_builder) - return routes -end diff --git a/src/DynamicVehicleScheduling/environment/scenario.jl b/src/DynamicVehicleScheduling/scenario.jl similarity index 100% rename from src/DynamicVehicleScheduling/environment/scenario.jl rename to src/DynamicVehicleScheduling/scenario.jl diff --git a/src/DynamicVehicleScheduling/environment/state.jl b/src/DynamicVehicleScheduling/state.jl similarity index 100% rename from src/DynamicVehicleScheduling/environment/state.jl rename to src/DynamicVehicleScheduling/state.jl diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl index 2efd0d0..00d9547 100644 --- a/src/Utils/Utils.jl +++ b/src/Utils/Utils.jl @@ -24,7 +24,7 @@ export DataSample, Policy export run_policy! export TopKMaximizer -export AbstractEnv, get_seed, is_terminated, observe, reset!, step! +export AbstractEnvironment, get_seed, is_terminated, observe, reset!, step! export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark export generate_sample, generate_dataset diff --git a/test/dynamic_vsp.jl b/test/dynamic_vsp.jl index 49c9b77..367db06 100644 --- a/test/dynamic_vsp.jl +++ b/test/dynamic_vsp.jl @@ -1,8 +1,24 @@ -# @testitem "DVSP - parsing" begin -# using DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling: -# read_vsp_instance, location_count, customer_count -# path = joinpath(@__DIR__, "data", "vsp_instance.txt") -# instance = read_vsp_instance(path) -# @test location_count(instance) == 6 -# @test customer_count(instance) == 5 -# end +@testitem "DVSP" begin + using DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling + + b = DynamicVehicleSchedulingBenchmark(; two_dimensional_features=true) + dataset = generate_dataset(b, 10) + environments = generate_environments(b, dataset) + + env = environments[1] + get_seed(env) + + policies = generate_policies(b) + lazy = policies[1] + greedy = policies[2] + + d = run_policy!(lazy, env, 1; seed=0)[2] + + r, d = run_policy!(lazy, environments, 10) + r, d = run_policy!(greedy, environments, 10) + + env = environments[1] + instance = dataset[1].instance + scenario = generate_scenario(b, instance) + v, y = generate_anticipative_solution(b, env, scenario; nb_epochs=2, reset_env=true) +end From d9eaa8f86f479a7672ccb18288edaa05a47acfb1 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Fri, 8 Aug 2025 17:50:03 +0200 Subject: [PATCH 22/29] fix doc --- docs/src/api/dvsp.md | 15 +++++++++++++++ docs/src/benchmarks/dvsp.md | 3 +++ 2 files changed, 18 insertions(+) create mode 100644 docs/src/api/dvsp.md create mode 100644 docs/src/benchmarks/dvsp.md diff --git a/docs/src/api/dvsp.md b/docs/src/api/dvsp.md new file mode 100644 index 0000000..4ecdfd9 --- /dev/null +++ b/docs/src/api/dvsp.md @@ -0,0 +1,15 @@ +# Dynamic Vehicle Scheduling + +## Public + +```@autodocs +Modules = [DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling] +Private = false +``` + +## Private + +```@autodocs +Modules = [DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling] +Public = false +``` diff --git a/docs/src/benchmarks/dvsp.md b/docs/src/benchmarks/dvsp.md new file mode 100644 index 0000000..2b96c67 --- /dev/null +++ b/docs/src/benchmarks/dvsp.md @@ -0,0 +1,3 @@ +# Dynamic Vehicle Scheduling + +[`DynamicVehicleSchedulingBenchmark`](@ref). From fd0e247b3011d62b30b9276ed295383fe2ac19e6 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Fri, 8 Aug 2025 17:54:23 +0200 Subject: [PATCH 23/29] fix doc (again) --- src/Utils/interface.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl index 8c3c0c7..1fa1f65 100644 --- a/src/Utils/interface.jl +++ b/src/Utils/interface.jl @@ -190,7 +190,7 @@ Abstract type interface for stochastic benchmark problems. This type should be used for benchmarks that involve single stage stochastic optimization problems. It follows the same interface as [`AbstractBenchmark`](@ref), with the addition of the following methods: -- [`generate_anticipative_solver`](@ref) +- TODO """ abstract type AbstractStochasticBenchmark{exogenous} <: AbstractBenchmark end @@ -198,12 +198,12 @@ is_exogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = exoge is_endogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = !exogenous """ - generate_anticipative_solver(::AbstractStochasticBenchmark{true}, instance; kwargs...) + generate_scenario(::AbstractStochasticBenchmark{true}, instance; kwargs...) """ function generate_scenario end """ - anticipative_policy(::AbstractStochasticBenchmark{true}, instance, scenario; kwargs...) + generate_anticipative_solution(::AbstractStochasticBenchmark{true}, instance, scenario; kwargs...) """ function generate_anticipative_solution end From cd3bb3584464fbecf77efcd56764a33788eb9d56 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 21 Aug 2025 15:35:39 +0200 Subject: [PATCH 24/29] fix doc --- docs/make.jl | 16 +++++----------- docs/src/benchmarks/argmax.md | 1 + docs/src/benchmarks/ranking.md | 1 + docs/src/tutorials/warcraft.jl | 3 +++ src/DynamicVehicleScheduling/state.jl | 2 +- 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index 21f5480..1946e7f 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -12,8 +12,8 @@ api_dir = joinpath(@__DIR__, "src", "api") api_files = map(x -> joinpath("api", x), readdir(api_dir)) tutorial_files = readdir(tutorial_dir) md_tutorial_files = [split(file, ".")[1] * ".md" for file in tutorial_files] -benchmark_files = readdir(benchmarks_dir) -md_benchmark_files = [split(file, ".")[1] * ".md" for file in benchmark_files] +benchmark_files = [joinpath("benchmarks", e) for e in readdir(benchmarks_dir)] +# md_benchmark_files = [split(file, ".")[1] * ".md" for file in benchmark_files] include_tutorial = true @@ -25,20 +25,14 @@ if include_tutorial end makedocs(; - modules=[DecisionFocusedLearningBenchmarks, DecisionFocusedLearningBenchmarks.Warcraft], + modules=[DecisionFocusedLearningBenchmarks], authors="Members of JuliaDecisionFocusedLearning", sitename="DecisionFocusedLearningBenchmarks.jl", - format=Documenter.HTML(), + format=Documenter.HTML(; size_threshold=typemax(Int)), pages=[ "Home" => "index.md", "Tutorials" => include_tutorial ? md_tutorial_files : [], - "Benchmark problems list" => [ - "benchmarks/subset_selection.md", - "benchmarks/fixed_size_shortest_path.md", - "benchmarks/warcraft.md", - "benchmarks/portfolio_optimization.md", - "benchmarks/vsp.md", - ], + "Benchmark problems list" => benchmark_files, "API reference" => api_files, ], ) diff --git a/docs/src/benchmarks/argmax.md b/docs/src/benchmarks/argmax.md index e69de29..1ab74f9 100644 --- a/docs/src/benchmarks/argmax.md +++ b/docs/src/benchmarks/argmax.md @@ -0,0 +1 @@ +# Argmax diff --git a/docs/src/benchmarks/ranking.md b/docs/src/benchmarks/ranking.md index e69de29..5bfcaeb 100644 --- a/docs/src/benchmarks/ranking.md +++ b/docs/src/benchmarks/ranking.md @@ -0,0 +1 @@ +# Ranking diff --git a/docs/src/tutorials/warcraft.jl b/docs/src/tutorials/warcraft.jl index 13f21ba..2d41563 100644 --- a/docs/src/tutorials/warcraft.jl +++ b/docs/src/tutorials/warcraft.jl @@ -86,3 +86,6 @@ final_gap = compute_gap(b, test_dataset, model, maximizer) θ = model(x) y = maximizer(θ) plot_data(b, DataSample(; x, θ_true=θ, y_true=y)) + +using Test #src +@test final_gap < starting_gap #src diff --git a/src/DynamicVehicleScheduling/state.jl b/src/DynamicVehicleScheduling/state.jl index 704ef79..0d0a177 100644 --- a/src/DynamicVehicleScheduling/state.jl +++ b/src/DynamicVehicleScheduling/state.jl @@ -121,7 +121,7 @@ $TYPEDSIGNATURES Check if the given routes are feasible. Routes should be given with global indexation. -Use [`env_routes_from_state_routes`](@ref) if needed to convert the indices beforehand. +Use `env_routes_from_state_routes` if needed to convert the indices beforehand. """ function is_feasible(state::DVSPState, routes::Vector{Vector{Int}}; verbose::Bool=false) (; is_must_dispatch, state_instance) = state From d32b1e813be16987ac9a41e32d401ef6742e58e1 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 21 Aug 2025 16:20:45 +0200 Subject: [PATCH 25/29] Inpromve basic coverage --- docs/src/api/0_interface.md | 4 ++ docs/src/api/argmax.md | 4 ++ docs/src/api/argmax_2d.md | 4 ++ docs/src/api/dvsp.md | 4 ++ docs/src/api/dynamic_assorment.md | 4 ++ docs/src/api/fixed_shortest_path.md | 4 ++ docs/src/api/portfolio_optimization.md | 4 ++ docs/src/api/ranking.md | 4 ++ docs/src/api/subset_selection.md | 4 ++ docs/src/api/vsp.md | 4 ++ docs/src/api/warcraft.md | 4 ++ src/DecisionFocusedLearningBenchmarks.jl | 30 +++++------ src/DynamicAssortment/environment.jl | 6 +-- .../anticipative_solver.jl | 3 +- src/DynamicVehicleScheduling/environment.jl | 6 +-- .../PortfolioOptimization.jl | 34 ------------- src/Utils/Utils.jl | 2 +- src/Utils/environment.jl | 4 +- src/Utils/policy.jl | 50 ++++++++++++------- test/dynamic_vsp.jl | 15 ++++-- test/utils.jl | 32 ++++++++++++ 21 files changed, 145 insertions(+), 81 deletions(-) diff --git a/docs/src/api/0_interface.md b/docs/src/api/0_interface.md index 1b0a22c..6363833 100644 --- a/docs/src/api/0_interface.md +++ b/docs/src/api/0_interface.md @@ -1,3 +1,7 @@ +```@meta +CollapsedDocStrings = true +``` + # Interface ## Public diff --git a/docs/src/api/argmax.md b/docs/src/api/argmax.md index 6ea12e4..d3b8d29 100644 --- a/docs/src/api/argmax.md +++ b/docs/src/api/argmax.md @@ -1,3 +1,7 @@ +```@meta +CollapsedDocStrings = true +``` + # Argmax ## Public diff --git a/docs/src/api/argmax_2d.md b/docs/src/api/argmax_2d.md index 1b6b44e..ce28b54 100644 --- a/docs/src/api/argmax_2d.md +++ b/docs/src/api/argmax_2d.md @@ -1,3 +1,7 @@ +```@meta +CollapsedDocStrings = true +``` + # Argmax2D ## Public diff --git a/docs/src/api/dvsp.md b/docs/src/api/dvsp.md index 4ecdfd9..2922696 100644 --- a/docs/src/api/dvsp.md +++ b/docs/src/api/dvsp.md @@ -1,3 +1,7 @@ +```@meta +CollapsedDocStrings = true +``` + # Dynamic Vehicle Scheduling ## Public diff --git a/docs/src/api/dynamic_assorment.md b/docs/src/api/dynamic_assorment.md index d738692..847d184 100644 --- a/docs/src/api/dynamic_assorment.md +++ b/docs/src/api/dynamic_assorment.md @@ -1,3 +1,7 @@ +```@meta +CollapsedDocStrings = true +``` + # Dynamic Assortment ## Public diff --git a/docs/src/api/fixed_shortest_path.md b/docs/src/api/fixed_shortest_path.md index df50a9f..36a03b2 100644 --- a/docs/src/api/fixed_shortest_path.md +++ b/docs/src/api/fixed_shortest_path.md @@ -1,3 +1,7 @@ +```@meta +CollapsedDocStrings = true +``` + # Fixed-size shortest path ## Public diff --git a/docs/src/api/portfolio_optimization.md b/docs/src/api/portfolio_optimization.md index 5b0102b..6d198ac 100644 --- a/docs/src/api/portfolio_optimization.md +++ b/docs/src/api/portfolio_optimization.md @@ -1,3 +1,7 @@ +```@meta +CollapsedDocStrings = true +``` + # Subset selection ## Public diff --git a/docs/src/api/ranking.md b/docs/src/api/ranking.md index f249a48..82d0719 100644 --- a/docs/src/api/ranking.md +++ b/docs/src/api/ranking.md @@ -1,3 +1,7 @@ +```@meta +CollapsedDocStrings = true +``` + # Ranking ## Public diff --git a/docs/src/api/subset_selection.md b/docs/src/api/subset_selection.md index 76b686d..946eb3c 100644 --- a/docs/src/api/subset_selection.md +++ b/docs/src/api/subset_selection.md @@ -1,3 +1,7 @@ +```@meta +CollapsedDocStrings = true +``` + # Subset selection ## Public diff --git a/docs/src/api/vsp.md b/docs/src/api/vsp.md index 96e4cdb..119c9ba 100644 --- a/docs/src/api/vsp.md +++ b/docs/src/api/vsp.md @@ -1,3 +1,7 @@ +```@meta +CollapsedDocStrings = true +``` + # Stochastic Vehicle Scheduling ## Public diff --git a/docs/src/api/warcraft.md b/docs/src/api/warcraft.md index 3ff6824..c3bd480 100644 --- a/docs/src/api/warcraft.md +++ b/docs/src/api/warcraft.md @@ -1,3 +1,7 @@ +```@meta +CollapsedDocStrings = true +``` + # Warcraft ## Public diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl index a33c1de..be2c500 100644 --- a/src/DecisionFocusedLearningBenchmarks.jl +++ b/src/DecisionFocusedLearningBenchmarks.jl @@ -3,22 +3,22 @@ module DecisionFocusedLearningBenchmarks using DataDeps using Requires: @require -function _euro_neurips_unpack(local_filepath) - directory = dirname(local_filepath) - unpack(local_filepath) - # Move instances and delete the rest - for filepath in readdir( - joinpath(directory, "euro-neurips-vrp-2022-quickstart-main", "instances"); join=true - ) - if endswith(filepath, ".txt") - mv(filepath, joinpath(directory, basename(filepath))) +function __init__() + function _euro_neurips_unpack(local_filepath) + directory = dirname(local_filepath) + unpack(local_filepath) + # Move instances and delete the rest + for filepath in readdir( + joinpath(directory, "euro-neurips-vrp-2022-quickstart-main", "instances"); + join=true, + ) + if endswith(filepath, ".txt") + mv(filepath, joinpath(directory, basename(filepath))) + end end + rm(joinpath(directory, "euro-neurips-vrp-2022-quickstart-main"); recursive=true) + return nothing end - rm(joinpath(directory, "euro-neurips-vrp-2022-quickstart-main"); recursive=true) - return nothing -end - -function __init__() # Register the Warcraft dataset ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" register( @@ -64,7 +64,7 @@ using .Utils export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample export AbstractEnvironment, get_seed, is_terminated, observe, reset!, step! -export Policy, run_policy! +export Policy, evaluate_policy! export generate_sample, generate_dataset, generate_environments, generate_environment export generate_scenario diff --git a/src/DynamicAssortment/environment.jl b/src/DynamicAssortment/environment.jl index 5f62acc..8389a0c 100644 --- a/src/DynamicAssortment/environment.jl +++ b/src/DynamicAssortment/environment.jl @@ -43,7 +43,7 @@ function Environment(instance::Instance; seed=0, rng::AbstractRNG=MersenneTwiste features=full_features, d_features=zeros(2, N), ) - Utils.reset!(env; reset_seed=true) + Utils.reset!(env; reset_rng=true) return env end @@ -58,8 +58,8 @@ prices(b::Environment) = b.instance.prices ## Basic operations of environment # Reset the environment -function Utils.reset!(env::Environment; reset_seed=false, seed=env.seed) - reset_seed && Random.seed!(env.rng, seed) +function Utils.reset!(env::Environment; reset_rng=false, seed=env.seed) + reset_rng && Random.seed!(env.rng, seed) env.step = 1 diff --git a/src/DynamicVehicleScheduling/anticipative_solver.jl b/src/DynamicVehicleScheduling/anticipative_solver.jl index 47a2c1f..5847808 100644 --- a/src/DynamicVehicleScheduling/anticipative_solver.jl +++ b/src/DynamicVehicleScheduling/anticipative_solver.jl @@ -49,8 +49,9 @@ function anticipative_solver( two_dimensional_features=env.instance.two_dimensional_features, reset_env=true, nb_epochs=typemax(Int), + seed=get_seed(env), ) - reset_env && reset!(env; reset_seed=true) + reset_env && reset!(env; reset_rng=true, seed) start_epoch = current_epoch(env) end_epoch = min(last_epoch(env), start_epoch + nb_epochs - 1) diff --git a/src/DynamicVehicleScheduling/environment.jl b/src/DynamicVehicleScheduling/environment.jl index cd8d6a6..339bd78 100644 --- a/src/DynamicVehicleScheduling/environment.jl +++ b/src/DynamicVehicleScheduling/environment.jl @@ -70,10 +70,10 @@ Utils.is_terminated(env::DVSPEnv) = current_epoch(env) > last_epoch(env) $TYPEDSIGNATURES Reset the environment to its initial state. -Also reset the seed if `reset_seed` is set to true. +Also reset the rng to `seed` if `reset_rng` is set to true. """ -function Utils.reset!(env::DVSPEnv; seed=get_seed(env), reset_seed=false) - if reset_seed +function Utils.reset!(env::DVSPEnv; seed=get_seed(env), reset_rng=false) + if reset_rng Random.seed!(env.rng, seed) end env.scenario = Utils.generate_scenario(env; rng=env.rng) diff --git a/src/PortfolioOptimization/PortfolioOptimization.jl b/src/PortfolioOptimization/PortfolioOptimization.jl index 7d1b577..e94262a 100644 --- a/src/PortfolioOptimization/PortfolioOptimization.jl +++ b/src/PortfolioOptimization/PortfolioOptimization.jl @@ -105,40 +105,6 @@ end """ $TYPEDSIGNATURES -Generate a dataset of labeled instances for the portfolio optimization problem. -""" -function Utils.generate_dataset( - bench::PortfolioOptimizationBenchmark, - dataset_size::Int=10; - seed::Int=0, - type::Type=Float32, -) - (; d, p, deg, ν, L, f) = bench - rng = MersenneTwister(seed) - - # Features - features = [randn(rng, type, p) for _ in 1:dataset_size] - - # True weights - B = rand(rng, Bernoulli(0.5), d, p) - c̄ = [ - (0.05 / type(sqrt(p)) .* B * features[i] .+ 0.1^(1 / deg)) .^ deg for - i in 1:dataset_size - ] - costs = [c̄ᵢ .+ L * f .+ 0.01 .* ν .* randn(rng, type, d) for c̄ᵢ in c̄] - - maximizer = Utils.generate_maximizer(bench) - solutions = maximizer.(costs) - - return [ - DataSample(; x, θ_true, y_true) for - (x, θ_true, y_true) in zip(features, costs, solutions) - ] -end - -""" -$TYPEDSIGNATURES - Initialize a linear model for `bench` using `Flux`. """ function Utils.generate_statistical_model(bench::PortfolioOptimizationBenchmark) diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl index 00d9547..d738e31 100644 --- a/src/Utils/Utils.jl +++ b/src/Utils/Utils.jl @@ -21,7 +21,7 @@ include("misc.jl") include("model_builders.jl") export DataSample, Policy -export run_policy! +export evaluate_policy! export TopKMaximizer export AbstractEnvironment, get_seed, is_terminated, observe, reset!, step! diff --git a/src/Utils/environment.jl b/src/Utils/environment.jl index 38a3f34..88eff08 100644 --- a/src/Utils/environment.jl +++ b/src/Utils/environment.jl @@ -34,10 +34,10 @@ This function should return a tuple of two elements: function observe end """ - reset!(env::AbstractEnvironment; reset_seed::Bool, seed=get_seed(env)) --> Nothing + reset!(env::AbstractEnvironment; reset_rng::Bool, seed=get_seed(env)) --> Nothing Reset the environment to its initial state. -If `reset_seed` is true, the random number generator is reset to the given `seed`. +If `reset_rng` is true, the random number generator is reset to the given `seed`. """ function reset! end diff --git a/src/Utils/policy.jl b/src/Utils/policy.jl index 8f17f79..3057050 100644 --- a/src/Utils/policy.jl +++ b/src/Utils/policy.jl @@ -31,9 +31,9 @@ $TYPEDSIGNATURES Run the policy on the environment and return the total reward and a dataset of observations. By default, the environment is reset before running the policy. """ -function run_policy!(policy, env::AbstractEnvironment; kwargs...) +function evaluate_policy!(policy, env::AbstractEnvironment; kwargs...) total_reward = 0.0 - reset!(env; reset_seed=false) + reset!(env; reset_rng=false) local labeled_dataset while !is_terminated(env) y = policy(env; kwargs...) @@ -52,35 +52,49 @@ function run_policy!(policy, env::AbstractEnvironment; kwargs...) return total_reward, labeled_dataset end -function run_policy!(policy, envs::Vector{<:AbstractEnvironment}; kwargs...) - E = length(envs) - rewards = zeros(Float64, E) - datasets = map(1:E) do e - reward, dataset = run_policy!(policy, envs[e]; kwargs...) - rewards[e] = reward - return dataset - end - return rewards, vcat(datasets...) -end +# function evaluate_policy!(policy, envs::Vector{<:AbstractEnvironment}; kwargs...) +# E = length(envs) +# rewards = zeros(Float64, E) +# datasets = map(1:E) do e +# reward, dataset = evaluate_policy!(policy, envs[e]; kwargs...) +# rewards[e] = reward +# return dataset +# end +# return rewards, vcat(datasets...) +# end + +""" +$TYPEDSIGNATURES -function run_policy!( - policy, env::AbstractEnvironment, episodes::Int; seed=get_seed(env), kwargs... +Evaluate the policy on the environment and return the total reward and a dataset of observations. +By default, the environment is reset before running the policy. +""" +function evaluate_policy!( + policy, env::AbstractEnvironment, episodes::Int=1; seed=get_seed(env), kwargs... ) - reset!(env; reset_seed=true, seed) + reset!(env; reset_rng=true, seed) total_reward = 0.0 datasets = map(1:episodes) do _i - reward, dataset = run_policy!(policy, env; kwargs...) + reward, dataset = evaluate_policy!(policy, env; kwargs...) total_reward += reward return dataset end return total_reward / episodes, vcat(datasets...) end -function run_policy!(policy, envs::Vector{<:AbstractEnvironment}, episodes::Int; kwargs...) +""" +$TYPEDSIGNATURES + +Run the policy on the environments and return the total rewards and a dataset of observations. +By default, the environments are reset before running the policy. +""" +function evaluate_policy!( + policy, envs::Vector{<:AbstractEnvironment}, episodes::Int=1; kwargs... +) E = length(envs) rewards = zeros(Float64, E) datasets = map(1:E) do e - reward, dataset = run_policy!(policy, envs[e], episodes; kwargs...) + reward, dataset = evaluate_policy!(policy, envs[e], episodes; kwargs...) rewards[e] = reward return dataset end diff --git a/test/dynamic_vsp.jl b/test/dynamic_vsp.jl index 367db06..166778a 100644 --- a/test/dynamic_vsp.jl +++ b/test/dynamic_vsp.jl @@ -1,9 +1,14 @@ @testitem "DVSP" begin using DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling + using Statistics: mean b = DynamicVehicleSchedulingBenchmark(; two_dimensional_features=true) + + @test is_exogenous(b) + @test !is_endogenous(b) + dataset = generate_dataset(b, 10) - environments = generate_environments(b, dataset) + environments = generate_environments(b, dataset; seed=0) env = environments[1] get_seed(env) @@ -12,10 +17,12 @@ lazy = policies[1] greedy = policies[2] - d = run_policy!(lazy, env, 1; seed=0)[2] + d = evaluate_policy!(lazy, env, 1; seed=0)[2] + + r_lazy, d = evaluate_policy!(lazy, environments, 10) + r_greedy, d = evaluate_policy!(greedy, environments, 10) - r, d = run_policy!(lazy, environments, 10) - r, d = run_policy!(greedy, environments, 10) + @test mean(r_lazy) <= mean(r_greedy) env = environments[1] instance = dataset[1].instance diff --git a/test/utils.jl b/test/utils.jl index b071bd8..e0a4bac 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -23,3 +23,35 @@ @test max(h, w) <= length(path) <= h + w end end + +@testitem "DataSample" begin + using DecisionFocusedLearningBenchmarks + using StableRNGs + + rng = StableRNG(1234) + + function random_sample() + return DataSample(; + x=randn(rng, 10, 5), + θ_true=rand(rng, 5), + y_true=rand(rng, 10), + instance="this is an instance", + ) + end + + sample = random_sample() + @test sample isa DataSample + + io = IOBuffer() + show(io, sample) + @test String(take!(io)) == + "DataSample(x=$(sample.x), θ_true=$(sample.θ_true), y_true=$(sample.y_true), instance=$(sample.instance))" +end + +@testitem "Maximizers" begin + using DecisionFocusedLearningBenchmarks.Utils: TopKMaximizer + top_k = TopKMaximizer(3) + @test top_k([1, 2, 3, 4, 5]) == [0, 0, 1, 1, 1] + @test top_k([5, 4, 3, 2, 1]) == [1, 1, 1, 0, 0] + @test_throws(AssertionError, top_k([1, 2])) +end From 805a22be271386717e61babae0601fb137fcd7a2 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 21 Aug 2025 16:38:45 +0200 Subject: [PATCH 26/29] bugfix --- src/PortfolioOptimization/PortfolioOptimization.jl | 4 ++-- src/Utils/policy.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/PortfolioOptimization/PortfolioOptimization.jl b/src/PortfolioOptimization/PortfolioOptimization.jl index e94262a..5fc7e8f 100644 --- a/src/PortfolioOptimization/PortfolioOptimization.jl +++ b/src/PortfolioOptimization/PortfolioOptimization.jl @@ -91,7 +91,7 @@ function Utils.generate_sample( bench::PortfolioOptimizationBenchmark, rng::AbstractRNG; type::Type=Float32 ) (; d, p, deg, ν, L, f) = bench - features = randn(rng, type, p, d) + features = randn(rng, type, p) B = rand(rng, Bernoulli(0.5), d, p) c̄ = (0.05 / type(sqrt(p)) .* B * features .+ 0.1^(1 / deg)) .^ deg costs = c̄ .+ L * f .+ 0.01 * ν * randn(rng, type, d) @@ -99,7 +99,7 @@ function Utils.generate_sample( maximizer = Utils.generate_maximizer(bench) solution = maximizer(costs) - return DataSample(; x=features, θ_true=c̄, y_true=solution) + return DataSample(; x=features, θ_true=costs, y_true=solution) end """ diff --git a/src/Utils/policy.jl b/src/Utils/policy.jl index 3057050..2b3c8e5 100644 --- a/src/Utils/policy.jl +++ b/src/Utils/policy.jl @@ -70,7 +70,7 @@ Evaluate the policy on the environment and return the total reward and a dataset By default, the environment is reset before running the policy. """ function evaluate_policy!( - policy, env::AbstractEnvironment, episodes::Int=1; seed=get_seed(env), kwargs... + policy, env::AbstractEnvironment, episodes::Int; seed=get_seed(env), kwargs... ) reset!(env; reset_rng=true, seed) total_reward = 0.0 From 522a91f1cabe9ad0f53b836b41025c18618640d8 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 21 Aug 2025 16:53:37 +0200 Subject: [PATCH 27/29] basic tests for dynamic assortment --- test/dynamic_assortment.jl | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 test/dynamic_assortment.jl diff --git a/test/dynamic_assortment.jl b/test/dynamic_assortment.jl new file mode 100644 index 0000000..d9787fb --- /dev/null +++ b/test/dynamic_assortment.jl @@ -0,0 +1,25 @@ +@testitem "DVSP" begin + using DecisionFocusedLearningBenchmarks + using Statistics: mean + + b = DynamicAssortmentBenchmark() + + @test is_endogenous(b) + @test !is_exogenous(b) + + dataset = generate_dataset(b, 10; seed=0) + environments = generate_environments(b, dataset) + + env = environments[1] + get_seed(env) + env.seed + + policies = generate_policies(b) + expert = policies[1] + greedy = policies[2] + + r_expert, _ = evaluate_policy!(expert, environments) + r_greedy, _ = evaluate_policy!(greedy, environments) + + @test mean(r_expert) >= mean(r_greedy) +end From 1905623fc8ae79bfc00e1327fddc911b1f327ca3 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 21 Aug 2025 17:14:59 +0200 Subject: [PATCH 28/29] improve coverage --- test/dynamic_assortment.jl | 11 ++++-- test/utils.jl | 68 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/test/dynamic_assortment.jl b/test/dynamic_assortment.jl index d9787fb..54030fe 100644 --- a/test/dynamic_assortment.jl +++ b/test/dynamic_assortment.jl @@ -1,4 +1,4 @@ -@testitem "DVSP" begin +@testitem "dynamic Assortment" begin using DecisionFocusedLearningBenchmarks using Statistics: mean @@ -18,8 +18,15 @@ expert = policies[1] greedy = policies[2] - r_expert, _ = evaluate_policy!(expert, environments) + r_expert, d = evaluate_policy!(expert, environments) r_greedy, _ = evaluate_policy!(greedy, environments) @test mean(r_expert) >= mean(r_greedy) + + model = generate_statistical_model(b) + maximizer = generate_maximizer(b) + sample = d[1] + x = sample.x + θ = model(x) + y = maximizer(θ) end diff --git a/test/utils.jl b/test/utils.jl index e0a4bac..4fd4b4f 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -46,6 +46,74 @@ end show(io, sample) @test String(take!(io)) == "DataSample(x=$(sample.x), θ_true=$(sample.θ_true), y_true=$(sample.y_true), instance=$(sample.instance))" + + # Test StatsBase methods + using StatsBase: + ZScoreTransform, + UnitRangeTransform, + fit, + transform, + transform!, + reconstruct, + reconstruct! + + # Create a dataset for testing + N = 5 + dataset = [random_sample() for _ in 1:N] + + # Test fit with ZScoreTransform + zt = fit(ZScoreTransform, dataset) + @test zt isa ZScoreTransform + + # Test fit with UnitRangeTransform + ut = fit(UnitRangeTransform, dataset) + @test ut isa UnitRangeTransform + + # Test transform (non-mutating) + dataset_zt = transform(zt, dataset) + @test length(dataset_zt) == length(dataset) + @test all(d -> d isa DataSample, dataset_zt) + + # Check that other fields are preserved + for i in 1:N + @test dataset_zt[i].θ_true == dataset[i].θ_true + @test dataset_zt[i].y_true == dataset[i].y_true + @test dataset_zt[i].instance == dataset[i].instance + end + + # Check that features are actually transformed + @test dataset_zt[1].x != dataset[1].x + + # Test transform! (mutating) + dataset_copy = deepcopy(dataset) + original_x = copy(dataset_copy[1].x) + transform!(ut, dataset_copy) + @test dataset_copy[1].x != original_x + + # Check that other fields remain unchanged after transform! + for i in 1:N + @test dataset_copy[i].θ_true == dataset[i].θ_true + @test dataset_copy[i].y_true == dataset[i].y_true + @test dataset_copy[i].instance == dataset[i].instance + end + + # Test reconstruct (non-mutating) + dataset_reconstructed = reconstruct(zt, dataset_zt) + @test length(dataset_reconstructed) == length(dataset) + + # Test round-trip consistency (should be close to original) + for i in 1:N + @test dataset_reconstructed[i].x ≈ dataset[i].x atol = 1e-10 + @test dataset_reconstructed[i].θ_true == dataset[i].θ_true + @test dataset_reconstructed[i].y_true == dataset[i].y_true + @test dataset_reconstructed[i].instance == dataset[i].instance + end + + # Test reconstruct! (mutating) + reconstruct!(zt, dataset_zt) + for i in 1:N + @test dataset_zt[i].x ≈ dataset[i].x atol = 1e-10 + end end @testitem "Maximizers" begin From eb5b67f893c3a199a7f074f68382c8b39d8c412a Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Fri, 22 Aug 2025 09:41:49 +0200 Subject: [PATCH 29/29] improve coverage --- .../DynamicVehicleScheduling.jl | 5 +++++ test/dynamic_vsp.jl | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index f76a43b..7421032 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -5,6 +5,7 @@ using ..Utils using Base: @kwdef using DataDeps: @datadep_str using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES +using Flux: Chain, Dense using Graphs using HiGHS using InferOpt: LinearMaximizer @@ -110,6 +111,10 @@ function Utils.generate_policies(b::DynamicVehicleSchedulingBenchmark) return (lazy, greedy) end +function Utils.generate_statistical_model(b::DynamicVehicleSchedulingBenchmark) + return Chain(Dense((b.two_dimensional_features ? 2 : 14) => 1), vec) +end + export DynamicVehicleSchedulingBenchmark end diff --git a/test/dynamic_vsp.jl b/test/dynamic_vsp.jl index 166778a..0f890c0 100644 --- a/test/dynamic_vsp.jl +++ b/test/dynamic_vsp.jl @@ -3,6 +3,7 @@ using Statistics: mean b = DynamicVehicleSchedulingBenchmark(; two_dimensional_features=true) + b2 = DynamicVehicleSchedulingBenchmark(; two_dimensional_features=false) @test is_exogenous(b) @test !is_endogenous(b) @@ -28,4 +29,21 @@ instance = dataset[1].instance scenario = generate_scenario(b, instance) v, y = generate_anticipative_solution(b, env, scenario; nb_epochs=2, reset_env=true) + + maximizer = generate_maximizer(b) + + x, instance = observe(env) + model = generate_statistical_model(b) + θ = model(x) + y = maximizer(θ; instance) + + dataset2 = generate_dataset(b2, 10) + environments2 = generate_environments(b2, dataset2; seed=0) + env2 = environments2[1] + x2, instance2 = observe(env2) + model2 = generate_statistical_model(b2) + θ2 = model2(x2) + y2 = maximizer(θ2; instance=instance2) + @test size(x, 1) == 2 + @test size(x2, 1) == 14 end