diff --git a/docs/make.jl b/docs/make.jl index 6b79219..4a1ec1b 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -26,9 +26,10 @@ makedocs(; sitename="DecisionFocusedLearningBenchmarks.jl", format=Documenter.HTML(; size_threshold=typemax(Int)), pages=[ - "Home" => [ - "Getting started" => "index.md", - "Understanding Benchmark Interfaces" => "benchmark_interfaces.md", + "Home" => "index.md", + "Guides" => [ + "Using benchmarks" => "using_benchmarks.md", + "Creating custom benchmarks" => "custom_benchmarks.md", ], "Tutorials" => include_tutorial ? md_tutorial_files : [], "Benchmark problems list" => benchmark_files, diff --git a/docs/src/benchmark_interfaces.md b/docs/src/benchmark_interfaces.md deleted file mode 100644 index 7c9e457..0000000 --- a/docs/src/benchmark_interfaces.md +++ /dev/null @@ -1,153 +0,0 @@ -# Understanding Benchmark Interface - -This guide explains how benchmarks work through the common interface of DecisionFocusedLearningBenchmarks.jl. -Understanding this interface is essential for using existing benchmarks and implementing new ones. - -## Core Concepts - -### DataSample Structure - -All benchmarks work with [`DataSample`](@ref) objects that encapsulate the data needed for decision-focused learning: - -```julia -@kwdef struct DataSample{I,F,S,C} - x::F = nothing # Input features of the policy - θ::C = nothing # Intermediate cost/utility parameters - y::S = nothing # Output solution - info::I = nothing # Additional data information (e.g., problem instance) -end -``` - -The `DataSample` provides flexibility, not all fields need to be populated depending on the benchmark type and use. - -### Benchmark Type Hierarchy - -The package defines a hierarchy of three abstract types: - -``` -AbstractBenchmark -└── AbstractStochasticBenchmark{exogenous} - └── AbstractDynamicBenchmark{exogenous} -``` - -- **`AbstractBenchmark`**: static, single-stage optimization problems -- **`AbstractStochasticBenchmark{exogenous}`**: stochastic, single stage optimization problems -- **`AbstractDynamicBenchmark{exogenous}`**: multi-stage sequential decision-making problems - -The `{exogenous}` type parameter indicates whether uncertainty distribution comes from external sources (`true`) or is influenced by decisions (`false`), which affects available methods. - -## Common Interface Methods - -### Data Generation - -Every benchmark must implement a data generation method: - -```julia -# Generate a single sample -generate_sample(benchmark::AbstractBenchmark, rng::AbstractRNG; kwargs...) -> DataSample -``` -This method should output a single `DataSample` given a random number generator and optional parameters as keyword arguments. - -If needed, benchmarks can instead override the [`generate_dataset`](@ref) method to directly create an entire dataset of size `size`: -```julia -generate_dataset(benchmark::AbstractBenchmark, size::Int; kwargs...) -> Vector{DataSample} -``` - -The default `generate_dataset` implementation calls `generate_sample` repeatedly, but benchmarks can override this for custom dataset generation logic. - -### DFL Policy Components - -Benchmarks provide the building blocks for decision-focused learning policies: - -```julia -# Create a statistical model (e.g., a neural network) -generate_statistical_model(benchmark::AbstractBenchmark; kwargs...) - -# Create an optimization maximizer/solver -generate_maximizer(benchmark::AbstractBenchmark; kwargs...) -``` - -The statistical model typically maps features `x` to cost parameters `θ`. -The maximizer solves optimization problems given cost parameters `θ` (and potentially additional problem dependent keyword arguments), returning decision `y`. - -### Benchmark Policies - -Benchmarks can provide baseline policies for comparison and evaluation: - -```julia -# Get baseline policies for comparison -generate_policies(benchmark::AbstractBenchmark) -> Tuple{Policy} -``` -This returns a tuple of `Policy` objects representing different benchmark-specific policies. -A `Policy` is just a function with a name and description: -```julia -struct Policy{F} - name::String - description::String - policy_function::F -end -``` - -Policies can be evaluated across multiple instances/environments using: -```julia -evaluate_policy!(policy::Policy, instances; kwargs...) -> (rewards, data_samples) -``` - -### Evaluation Methods - -Optional methods for analysis and visualization: - -```julia -# Visualize data samples -plot_data(benchmark::AbstractBenchmark, sample::DataSample; kwargs...) -plot_instance(benchmark::AbstractBenchmark, instance; kwargs...) -plot_solution(benchmark::AbstractBenchmark, sample::DataSample, solution; kwargs...) - -# Compute optimality gap -compute_gap(benchmark::AbstractBenchmark, dataset, model, maximizer) -> Float64 - -# Evaluate objective value -objective_value(benchmark::AbstractBenchmark, sample::DataSample, solution) -``` - -## Benchmark-Specific Interfaces - -### Static Benchmarks - -Static benchmarks follow the basic interface above. - -### Stochastic Benchmarks - -Exogenous stochastic benchmarks add methods for scenario generation and anticipative solutions: - -```julia -# Generate uncertainty scenarios (for exogenous benchmarks) -generate_scenario(benchmark::AbstractStochasticBenchmark{true}, instance; kwargs...) - -# Solve anticipative optimization problem for given scenario -generate_anticipative_solution(benchmark::AbstractStochasticBenchmark{true}, - instance, scenario; kwargs...) -``` - -### Dynamic Benchmarks - -In order to model sequential decision-making, dynamic benchmarks additionally work with environments. -For this, they implement methods to create environments from instances or datasets: -```julia -# Create environment for sequential decision-making -generate_environment(benchmark::AbstractDynamicBenchmark, instance, rng; kwargs...) -> <:AbstractEnvironment - -# Generate multiple environments -generate_environments(benchmark::AbstractDynamicBenchmark, dataset; kwargs...) -> Vector{<:AbstractEnvironment} -``` -Similarly to `generate_dataset` and `generate_sample`, one only needs to implement `generate_environment`, as `generate_environments` has a default implementation that calls it repeatedly. - -The [`AbstractEnvironment`](@ref) interface is defined as follows: -```julia -# Environment methods -get_seed(env::AbstractEnvironment) # Get current RNG seed -reset!(env::AbstractEnvironment; reset_rng::Bool, seed=get_seed(env)) # Reset to initial state -observe(env::AbstractEnvironment) -> (obs, info) # Get current observation -step!(env::AbstractEnvironment, action) -> reward # Take action, get reward -is_terminated(env::AbstractEnvironment) -> Bool # Check if episode ended -``` diff --git a/docs/src/custom_benchmarks.md b/docs/src/custom_benchmarks.md new file mode 100644 index 0000000..9c95b8b --- /dev/null +++ b/docs/src/custom_benchmarks.md @@ -0,0 +1,277 @@ +# Creating Custom Benchmarks + +This guide explains how to implement new benchmarks in +DecisionFocusedLearningBenchmarks.jl. It is aimed at developers who want to add +problems to the benchmark suite or integrate their own domains. + +--- + +## Type hierarchy + +``` +AbstractBenchmark +└── AbstractStochasticBenchmark{exogenous} + └── AbstractDynamicBenchmark{exogenous} +``` + +| Type | Use case | +|------|----------| +| `AbstractBenchmark` | Static, single-stage optimization (e.g. shortest path, portfolio) | +| `AbstractStochasticBenchmark{true}` | Single-stage with exogenous uncertainty (scenarios drawn independently of decisions) | +| `AbstractStochasticBenchmark{false}` | Single-stage with endogenous uncertainty (not yet used) | +| `AbstractDynamicBenchmark{true}` | Multi-stage sequential decisions with exogenous uncertainty | +| `AbstractDynamicBenchmark{false}` | Multi-stage sequential decisions with endogenous uncertainty | + +--- + +## Implementation strategies + +There are three strategies for data generation. Pick the one that best fits your +benchmark: + +| Strategy | Method to implement | When to use | +|----------|---------------------|-------------| +| **1** | `generate_instance(bench, rng; kwargs...) -> DataSample` | Samples are independent; `y=nothing` at generation time | +| **2** | `generate_sample(bench, rng; kwargs...) -> DataSample` | Samples are independent; you want to compute `y` inside `generate_sample` | +| **3** | `generate_dataset(bench, N; kwargs...) -> Vector{DataSample}` | Samples are not independent (e.g. loaded from shared files) | + +The default `generate_sample` calls `generate_instance` and then applies +`target_policy` to the returned sample. `generate_dataset` calls `generate_sample` +repeatedly and applies `target_policy` to each result. + +--- + +## `AbstractBenchmark`: required methods + +### Data generation (choose one strategy) + +```julia +# Strategy 1: recommended for most static benchmarks +generate_instance(bench::MyBenchmark, rng::AbstractRNG; kwargs...) -> DataSample + +# Strategy 2: when you want to compute y inside generate_sample +generate_sample(bench::MyBenchmark, rng::AbstractRNG; kwargs...) -> DataSample + +# Strategy 3: when samples are not independent +generate_dataset(bench::MyBenchmark, N::Int; kwargs...) -> Vector{DataSample} +``` + +### Pipeline components (required) + +```julia +generate_statistical_model(bench::MyBenchmark; seed=nothing) +# Returns an untrained Flux model mapping x -> θ + +generate_maximizer(bench::MyBenchmark) +# Returns a callable (θ; context...) -> y +``` + +### Optional methods + +```julia +is_minimization_problem(bench::MyBenchmark) -> Bool # default: false (maximization) +objective_value(bench::MyBenchmark, sample::DataSample, y) -> Real +compute_gap(bench::MyBenchmark, dataset, model, maximizer) -> Float64 +plot_data(bench::MyBenchmark, sample::DataSample; kwargs...) +plot_instance(bench::MyBenchmark, instance; kwargs...) +plot_solution(bench::MyBenchmark, sample::DataSample, y; kwargs...) +generate_baseline_policies(bench::MyBenchmark) -> collection of callables +``` + +--- + +## `AbstractStochasticBenchmark{true}`: additional methods + +For stochastic benchmarks with exogenous uncertainty, implement: + +```julia +# Instance + features, no scenario (y = nothing) +generate_instance(bench::MyStochasticBenchmark, rng::AbstractRNG; kwargs...) -> DataSample + +# Draw one scenario given the instance encoded in context +generate_scenario(bench::MyStochasticBenchmark, rng::AbstractRNG; context...) -> scenario +# Note: sample.context is spread as kwargs when called by the framework +``` + +The framework `generate_sample` calls `generate_instance`, draws `nb_scenarios` +scenarios via `generate_scenario`, then: +- If `target_policy` is provided: calls `target_policy(sample, scenarios) -> Vector{DataSample}`. +- Otherwise: returns unlabeled samples with `extra=(; scenario=ξ)` for each scenario. + +#### Anticipative solver (optional) + +```julia +generate_anticipative_solver(bench::MyStochasticBenchmark) +# Returns a callable: (scenario; context...) -> y +``` + +#### `DataSample` conventions + +- `context`: solver kwargs (instance data, graph, capacities, …) +- `extra`: scenario: **never** passed to the maximizer + +```julia +DataSample(; x=features, y=nothing, + instance=my_instance, # goes into context + extra=(; scenario=ξ)) +``` + +--- + +## `AbstractDynamicBenchmark`: additional methods + +Dynamic benchmarks extend stochastic ones with an environment-based rollout interface. + +### Environment generation + +```julia +# Strategy A: generate one environment at a time (default implementation of +# generate_environments calls this repeatedly) +generate_environment(bench::MyDynamicBenchmark, rng::AbstractRNG; kwargs...) -> AbstractEnvironment + +# Strategy B: override when environments are not independent (e.g. loaded from files) +generate_environments(bench::MyDynamicBenchmark, n::Int; rng, kwargs...) -> Vector{<:AbstractEnvironment} +``` + +### `AbstractEnvironment` interface + +Your environment type must implement: + +```julia +get_seed(env::MyEnv) # Return the RNG seed used at creation +reset!(env::MyEnv; reset_rng::Bool, seed=get_seed(env)) # Reset to initial state +observe(env::MyEnv) -> (observation, info) # Current observation +step!(env::MyEnv, action) -> reward # Apply action, advance state +is_terminated(env::MyEnv) -> Bool # True when episode has ended +``` + +### Baseline policies (required for `generate_dataset`) + +```julia +generate_baseline_policies(bench::MyDynamicBenchmark) +# Returns named callables: (env) -> Vector{DataSample} +# Each callable performs a full episode rollout and returns the trajectory. +``` + +`generate_dataset` for dynamic benchmarks **requires** a `target_policy` kwarg, +there is no default. The `target_policy` must be a callable `(env) -> Vector{DataSample}`. + +### `DataSample` conventions + +- `context`: solver-relevant state (observation fields, graph, etc.) +- `extra`: reward, step counter, history (never passed to the maximizer) + +```julia +DataSample(; x=features, y=action, + instance=current_state, # goes into context + extra=(; reward=r, step=t)) +``` + +--- + +## `DataSample` construction guide + +| Benchmark category | `context` fields | `extra` fields | +|--------------------|-----------------|----------------| +| Static | instance, graph, capacities, … | — | +| Stochastic | instance (not scenario) | `scenario` | +| Dynamic | solver-relevant state / observation | `reward`, `step`, `history`, … | + +Any named argument that is not `x`, `θ`, `y`, `context`, or `extra` is treated as a `context` field: + +```julia +# Equivalent forms: +DataSample(; x=feat, y=sol, instance=inst) +DataSample(; x=feat, y=sol, context=(; instance=inst)) + +# With extra: +DataSample(; x=feat, y=nothing, instance=inst, extra=(; scenario=ξ)) +``` + +Keys must not appear in both `context` and `extra`, the constructor raises an error. + +--- + +## Small examples + +### Static benchmark + +```julia +using DecisionFocusedLearningBenchmarks +const DFLBenchmarks = DecisionFocusedLearningBenchmarks + +struct MyStaticBenchmark <: AbstractBenchmark end + +function DFLBenchmarks.generate_instance(bench::MyStaticBenchmark, rng::AbstractRNG; kwargs...) + instance = build_my_instance(rng) + x = compute_features(instance) + return DataSample(; x=x, instance=instance) # y = nothing +end + + +DFLBenchmarks.generate_statistical_model(bench::MyStaticBenchmark; seed=nothing) = + Chain(Dense(10 => 32, relu), Dense(32 => 5)) + +DFLBenchmarks.generate_maximizer(bench::MyStaticBenchmark) = + (θ; instance, kwargs...) -> solve_my_problem(θ, instance) +``` + +### Stochastic benchmark + +```julia + +struct MyStochasticBenchmark <: AbstractStochasticBenchmark{true} end + +function DFLBenchmarks.generate_instance(bench::MyStochasticBenchmark, rng::AbstractRNG; kwargs...) + instance = build_my_instance(rng) + x = compute_features(instance) + return DataSample(; x=x, instance=instance) +end + +function DFLBenchmarks.generate_scenario(bench::MyStochasticBenchmark, rng::AbstractRNG; instance, kwargs...) + return sample_scenario(instance, rng) +end + +DFLBenchmarks.generate_anticipative_solver(bench::MyStochasticBenchmark) = + (scenario; instance, kwargs...) -> solve_with_scenario(instance, scenario) +``` + +### Dynamic benchmark + +```julia +struct MyDynamicBenchmark <: AbstractDynamicBenchmark{true} end + +mutable struct MyEnv <: AbstractEnvironment + const instance::MyInstance + const seed::Int + state::MyState +end + +DFLBenchmarks.get_seed(env::MyEnv) = env.seed +DFLBenchmarks.reset!(env::MyEnv; reset_rng=true, seed=env.seed) = (env.state = initial_state(env.instance)) +DFLBenchmarks.observe(env::MyEnv) = (env.state, nothing) +DFLBenchmarks.step!(env::MyEnv, action) = apply_action!(env.state, action) +DFLBenchmarks.is_terminated(env::MyEnv) = env.state.done + +function DFLBenchmarks.generate_environment(bench::MyDynamicBenchmark, rng::AbstractRNG; kwargs...) + inst = build_my_instance(rng) + seed = rand(rng, Int) + return MyEnv(inst, seed, initial_state(inst)) +end + +function DFLBenchmarks.generate_baseline_policies(bench::MyDynamicBenchmark) + greedy = function(env) + samples = DataSample[] + reset!(env) + while !is_terminated(env) + obs, _ = observe(env) + x = compute_features(obs) + y = greedy_action(obs) + r = step!(env, y) + push!(samples, DataSample(; x=x, y=y, instance=obs, extra=(; reward=r))) + end + return samples + end + return (; greedy) +end +``` diff --git a/docs/src/using_benchmarks.md b/docs/src/using_benchmarks.md new file mode 100644 index 0000000..a1e95dd --- /dev/null +++ b/docs/src/using_benchmarks.md @@ -0,0 +1,173 @@ +# Using Benchmarks + +This guide covers everything you need to work with existing benchmarks in +DecisionFocusedLearningBenchmarks.jl: generating datasets, assembling DFL pipeline +components, and evaluating results. + +--- + +## DataSample at a glance + +All data in the package is represented as [`DataSample`](@ref) objects. + +| Field | Type | Description | +|-------|------|-------------| +| `x` | any | Input features (fed to the statistical model) | +| `θ` | any | Intermediate cost parameters | +| `y` | any | Output decision / solution | +| `context` | `NamedTuple` | Solver kwargs — spread into `maximizer(θ; sample.context...)` | +| `extra` | `NamedTuple` | Non-solver data (scenario, reward, step, …) — never passed to the solver | + +Not all fields are populated in every sample. For convenience, named entries inside +`context` and `extra` can be accessed directly on the sample via property forwarding: + +```julia +sample.instance # looks up :instance in context first, then in extra +sample.scenario # looks up :scenario in context first, then in extra +``` + +--- + +## Generating datasets for training + +### Static benchmarks + +For static benchmarks (`<:AbstractBenchmark`) the framework already computes the +ground-truth label `y`: + +```julia +bench = ArgmaxBenchmark() +dataset = generate_dataset(bench, 100; seed=0) # Vector{DataSample} with x, y, context +``` + +You can override the labels by providing a `target_policy`: + +```julia +my_policy = sample -> DataSample(; sample.context..., x=sample.x, + y=my_algorithm(sample.instance)) +dataset = generate_dataset(bench, 100; seed=0, target_policy=my_policy) +``` + +### Stochastic benchmarks (exogenous) + +For `AbstractStochasticBenchmark{true}` benchmarks the default call returns +*unlabeled* samples, each sample carries one scenario in `sample.extra.scenario`: + +```julia +bench = StochasticVehicleSchedulingBenchmark() +dataset = generate_dataset(bench, 20; seed=0) # y = nothing +``` + +Request multiple scenarios per instance with `nb_scenarios`: + +```julia +dataset = generate_dataset(bench, 20; seed=0, nb_scenarios=5) +# returns 20 × 5 = 100 samples +``` + +To compute labels, wrap your algorithm as a `target_policy`: + +```julia +anticipative = generate_anticipative_solver(bench) # (scenario; kwargs...) -> y + +policy = (sample, scenarios) -> [ + DataSample(; sample.context..., x=sample.x, + y=anticipative(ξ; sample.context...)) + for ξ in scenarios +] +labeled = generate_dataset(bench, 20; seed=0, nb_scenarios=5, target_policy=policy) +``` + +### Dynamic benchmarks + +Dynamic benchmarks use a two-step workflow: + +```julia +bench = DynamicVehicleSchedulingBenchmark() + +# Step 1 — create environments (reusable across experiments) +envs = generate_environments(bench, 10; seed=0) + +# Step 2 — roll out a policy to collect training trajectories +policy = generate_baseline_policies(bench)[1] # e.g. lazy policy +dataset = generate_dataset(bench, envs; target_policy=policy) +# dataset is a flat Vector{DataSample} of all steps across all trajectories +``` + +`target_policy` is **required** for dynamic benchmarks (there is no default label). +It must be a callable `(env) -> Vector{DataSample}` that performs a full episode +rollout and returns the resulting trajectory. + +### Seed / RNG control + +All `generate_dataset` and `generate_environments` calls accept either `seed` +(creates an internal `MersenneTwister`) or `rng` for full control: + +```julia +using Random +rng = MersenneTwister(42) +dataset = generate_dataset(bench, 50; rng=rng) +``` + +--- + +## DFL pipeline components + +```julia +model = generate_statistical_model(bench; seed=0) # untrained Flux model +maximizer = generate_maximizer(bench) # combinatorial oracle +``` + +These two pieces compose naturally: + +```julia +θ = model(sample.x) # predict cost parameters +y = maximizer(θ; sample.context...) # solve the optimization problem +``` + +--- + +## Evaluation + +```julia +# Average relative optimality gap across a dataset +gap = compute_gap(bench, dataset, model, maximizer) +``` + +# Objective value for a single decision +```julia +obj = objective_value(bench, sample, y) +``` + +--- + +## Baseline policies + +`generate_baseline_policies` returns a collection of named callables that can serve as +reference points or as `target_policy` arguments: + +```julia +policies = generate_baseline_policies(bench) +pol = policies[1] # e.g. greedy, lazy, or anticipative policy +``` + +- **Static / stochastic:** `pol(sample) -> DataSample` +- **Dynamic:** `pol(env) -> Vector{DataSample}` (full episode trajectory) + +For dynamic benchmarks you can evaluate a policy over multiple episodes: + +```julia +rewards, samples = evaluate_policy!(pol, envs, n_episodes) +``` + +--- + +## Visualization + +Where implemented, benchmarks provide benchmark-specific plotting helpers: + +```julia +plot_data(bench, sample) # overview of a data sample +plot_instance(bench, instance) # raw problem instance +plot_solution(bench, sample, y) # overlay solution on instance +``` diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl index 2f1c320..6561b7a 100644 --- a/src/DecisionFocusedLearningBenchmarks.jl +++ b/src/DecisionFocusedLearningBenchmarks.jl @@ -67,9 +67,10 @@ export AbstractEnvironment, get_seed, is_terminated, observe, reset!, step! export Policy, evaluate_policy! -export generate_sample, generate_dataset, generate_environments, generate_environment +export generate_instance, + generate_sample, generate_dataset, generate_environments, generate_environment export generate_scenario -export generate_policies +export generate_baseline_policies export generate_statistical_model export generate_maximizer export generate_anticipative_solution diff --git a/src/DynamicAssortment/DynamicAssortment.jl b/src/DynamicAssortment/DynamicAssortment.jl index c943dba..df0e64e 100644 --- a/src/DynamicAssortment/DynamicAssortment.jl +++ b/src/DynamicAssortment/DynamicAssortment.jl @@ -110,12 +110,13 @@ end """ $TYPEDSIGNATURES -Creates an [`Environment`](@ref) from an [`Instance`](@ref) of the dynamic assortment benchmark. -The seed of the environment is randomly generated using the provided random number generator. +Creates an [`Environment`](@ref) for the dynamic assortment benchmark. +The instance and seed are randomly generated using the provided random number generator. """ function Utils.generate_environment( - ::DynamicAssortmentBenchmark, instance::Instance, rng::AbstractRNG; kwargs... + b::DynamicAssortmentBenchmark, rng::AbstractRNG; kwargs... ) + instance = Instance(b, rng) seed = rand(rng, 1:typemax(Int)) return Environment(instance; seed) end @@ -127,7 +128,7 @@ Returns two policies for the dynamic assortment benchmark: - `Greedy`: selects the assortment containing items with the highest prices - `Expert`: selects the assortment with the highest expected revenue (through brute-force enumeration) """ -function Utils.generate_policies(::DynamicAssortmentBenchmark) +function Utils.generate_baseline_policies(::DynamicAssortmentBenchmark) greedy = Policy( "Greedy", "policy that selects the assortment with items with the highest prices", diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl index fb0ea7e..1eba500 100644 --- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl +++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl @@ -18,6 +18,25 @@ using Random: Random, AbstractRNG, MersenneTwister, seed!, randperm using Requires: @require using Statistics: mean, quantile +""" +$TYPEDEF + +Abstract type for dynamic vehicle scheduling benchmarks. + +# Fields +$TYPEDFIELDS +""" +@kwdef struct DynamicVehicleSchedulingBenchmark <: AbstractDynamicBenchmark{true} + "maximum number of customers entering the system per epoch" + max_requests_per_epoch::Int = 10 + "time between decision and dispatch of a vehicle" + Δ_dispatch::Float64 = 1.0 + "duration of an epoch" + epoch_duration::Float64 = 1.0 + "whether to use two-dimensional features" + two_dimensional_features::Bool = false +end + include("utils.jl") # static vsp stuff @@ -38,46 +57,35 @@ include("anticipative_solver.jl") include("features.jl") include("policy.jl") -""" -$TYPEDEF - -Abstract type for dynamic vehicle scheduling benchmarks. - -# Fields -$TYPEDFIELDS -""" -@kwdef struct DynamicVehicleSchedulingBenchmark <: AbstractDynamicBenchmark{true} - "maximum number of customers entering the system per epoch" - max_requests_per_epoch::Int = 10 - "time between decision and dispatch of a vehicle" - Δ_dispatch::Float64 = 1.0 - "duration of an epoch" - epoch_duration::Float64 = 1.0 - "whether to use two-dimensional features" - two_dimensional_features::Bool = false -end - """ $TYPEDSIGNATURES -Generate a dataset for the dynamic vehicle scheduling benchmark. -Returns a vector of [`DataSample`](@ref) objects, each containing an [`Instance`](@ref). -The dataset is generated from pre-existing DVRPTW files. +Generate environments for the dynamic vehicle scheduling benchmark. +Reads from pre-existing DVRPTW files and creates [`DVSPEnv`](@ref) environments. """ -function Utils.generate_dataset(b::DynamicVehicleSchedulingBenchmark, dataset_size::Int=1) +function Utils.generate_environments( + b::DynamicVehicleSchedulingBenchmark, + n::Int; + seed=nothing, + rng=MersenneTwister(seed), + kwargs..., +) (; max_requests_per_epoch, Δ_dispatch, epoch_duration, two_dimensional_features) = b files = readdir(datadep"dvrptw"; join=true) - dataset_size = min(dataset_size, length(files)) + n = min(n, length(files)) return [ - DataSample(; - instance=Instance( + generate_environment( + b, + Instance( read_vsp_instance(files[i]); max_requests_per_epoch, Δ_dispatch, epoch_duration, two_dimensional_features, ), - ) for i in 1:dataset_size + rng; + kwargs..., + ) for i in 1:n ] end @@ -87,7 +95,7 @@ $TYPEDSIGNATURES Creates an environment from an [`Instance`](@ref) of the dynamic vehicle scheduling benchmark. The seed of the environment is randomly generated using the provided random number generator. """ -function Utils.generate_environment( +function generate_environment( ::DynamicVehicleSchedulingBenchmark, instance::Instance, rng::AbstractRNG; kwargs... ) seed = rand(rng, 1:typemax(Int)) @@ -107,16 +115,6 @@ end """ $TYPEDSIGNATURES -Generate a scenario for the dynamic vehicle scheduling benchmark. -This is a wrapper around the generic scenario generation function. -""" -function Utils.generate_scenario(b::DynamicVehicleSchedulingBenchmark, args...; kwargs...) - return Utils.generate_scenario(args...; kwargs...) -end - -""" -$TYPEDSIGNATURES - Generate an anticipative solution for the dynamic vehicle scheduling benchmark. The solution is computed using the anticipative solver with the benchmark's feature configuration. """ @@ -131,12 +129,27 @@ end """ $TYPEDSIGNATURES +Return the anticipative solver for the dynamic vehicle scheduling benchmark. +The callable takes a scenario and solver kwargs (including `instance`) and returns a +training trajectory as a `Vector{DataSample}`. +""" +function Utils.generate_anticipative_solver(::DynamicVehicleSchedulingBenchmark) + return (scenario; instance, kwargs...) -> begin + env = DVSPEnv(instance, scenario) + _, trajectory = anticipative_solver(env; reset_env=false, kwargs...) + return trajectory + end +end + +""" +$TYPEDSIGNATURES + Generate baseline policies for the dynamic vehicle scheduling benchmark. Returns a tuple containing: - `lazy`: A policy that dispatches vehicles only when they are ready - `greedy`: A policy that dispatches vehicles to the nearest customer """ -function Utils.generate_policies(b::DynamicVehicleSchedulingBenchmark) +function Utils.generate_baseline_policies(::DynamicVehicleSchedulingBenchmark) lazy = Policy( "Lazy", "Lazy policy that dispatches vehicles only when they are ready.", diff --git a/src/DynamicVehicleScheduling/environment.jl b/src/DynamicVehicleScheduling/environment.jl index 339bd78..a7f4c17 100644 --- a/src/DynamicVehicleScheduling/environment.jl +++ b/src/DynamicVehicleScheduling/environment.jl @@ -23,6 +23,17 @@ function DVSPEnv(instance::Instance; seed=nothing) return DVSPEnv(instance, initial_state, scenario, rng, seed) end +""" +$TYPEDSIGNATURES + +Constructor for [`DVSPEnv`](@ref) from a pre-existing scenario. +""" +function DVSPEnv(instance::Instance, scenario::Scenario; seed=nothing) + rng = MersenneTwister(seed) + initial_state = DVSPState(instance; scenario[1]...) + return DVSPEnv(instance, initial_state, scenario, rng, seed) +end + currrent_epoch(env::DVSPEnv) = current_epoch(env.state) epoch_duration(env::DVSPEnv) = epoch_duration(env.instance) last_epoch(env::DVSPEnv) = last_epoch(env.instance) diff --git a/src/DynamicVehicleScheduling/scenario.jl b/src/DynamicVehicleScheduling/scenario.jl index 4f7746e..eb189e8 100644 --- a/src/DynamicVehicleScheduling/scenario.jl +++ b/src/DynamicVehicleScheduling/scenario.jl @@ -1,4 +1,3 @@ - struct Scenario "indices of the new requests in each epoch" indices::Vector{Vector{Int}} @@ -51,6 +50,8 @@ function Utils.generate_scenario( return Scenario(new_indices, new_service_time, new_start_time) end -function Utils.generate_scenario(sample::DataSample; kwargs...) - return Utils.generate_scenario(sample.instance; kwargs...) +function Utils.generate_scenario( + ::DynamicVehicleSchedulingBenchmark, rng::AbstractRNG; instance, kwargs... +) + return generate_scenario(instance; rng) end diff --git a/src/FixedSizeShortestPath/FixedSizeShortestPath.jl b/src/FixedSizeShortestPath/FixedSizeShortestPath.jl index 3a350e5..ee0586a 100644 --- a/src/FixedSizeShortestPath/FixedSizeShortestPath.jl +++ b/src/FixedSizeShortestPath/FixedSizeShortestPath.jl @@ -133,7 +133,10 @@ $TYPEDSIGNATURES Initialize a linear model for `bench` using `Flux`. """ -function Utils.generate_statistical_model(bench::FixedSizeShortestPathBenchmark) +function Utils.generate_statistical_model( + bench::FixedSizeShortestPathBenchmark; seed=nothing +) + Random.seed!(seed) (; p, graph) = bench return Chain(Dense(p, ne(graph))) end diff --git a/src/Maintenance/Maintenance.jl b/src/Maintenance/Maintenance.jl index 5dc0580..64e5ec5 100644 --- a/src/Maintenance/Maintenance.jl +++ b/src/Maintenance/Maintenance.jl @@ -114,12 +114,11 @@ end """ $TYPEDSIGNATURES -Creates an [`Environment`](@ref) from an [`Instance`](@ref) of the maintenance benchmark. -The seed of the environment is randomly generated using the provided random number generator. +Creates an [`Environment`](@ref) for the maintenance benchmark. +The instance and seed are randomly generated using the provided random number generator. """ -function Utils.generate_environment( - ::MaintenanceBenchmark, instance::Instance, rng::AbstractRNG; kwargs... -) +function Utils.generate_environment(b::MaintenanceBenchmark, rng::AbstractRNG; kwargs...) + instance = Instance(b, rng) seed = rand(rng, 1:typemax(Int)) return Environment(instance; seed) end @@ -130,7 +129,7 @@ $TYPEDSIGNATURES Returns two policies for the dynamic assortment benchmark: - `Greedy`: maintains components when they are in the last state before failure, up to the maintenance capacity """ -function Utils.generate_policies(::MaintenanceBenchmark) +function Utils.generate_baseline_policies(::MaintenanceBenchmark) greedy = Policy( "Greedy", "policy that maintains components when they are in the last state before failure, up to the maintenance capacity", diff --git a/src/PortfolioOptimization/PortfolioOptimization.jl b/src/PortfolioOptimization/PortfolioOptimization.jl index f79f488..37631eb 100644 --- a/src/PortfolioOptimization/PortfolioOptimization.jl +++ b/src/PortfolioOptimization/PortfolioOptimization.jl @@ -7,7 +7,7 @@ using Flux: Chain, Dense using Ipopt: Ipopt using JuMP: @variable, @objective, @constraint, optimize!, value, Model, set_silent using LinearAlgebra: I -using Random: AbstractRNG, MersenneTwister +using Random: Random, AbstractRNG, MersenneTwister """ $TYPEDEF @@ -107,7 +107,10 @@ $TYPEDSIGNATURES Initialize a linear model for `bench` using `Flux`. """ -function Utils.generate_statistical_model(bench::PortfolioOptimizationBenchmark) +function Utils.generate_statistical_model( + bench::PortfolioOptimizationBenchmark; seed=nothing +) + Random.seed!(seed) (; p, d) = bench return Dense(p, d) end diff --git a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl index b9f2099..f8ba775 100644 --- a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl +++ b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl @@ -73,32 +73,29 @@ end """ $TYPEDSIGNATURES -Generate a sample for the given `StochasticVehicleSchedulingBenchmark`. -If you want to not add label solutions in the sample, set `compute_solutions=false`. -By default, they will be computed using column generation. -Note that computing solutions can be time-consuming, especially for large instances. -You can also use instead `compact_mip` or `compact_linearized_mip` as the algorithm to compute solutions. -If you want to provide a custom algorithm to compute solutions, you can pass it as the `algorithm` keyword argument. -If `algorithm` takes keyword arguments, you can pass them as well directly in `kwargs...`. -If `store_city=false`, the coordinates and unnecessary information about instances will not be stored in the sample. -""" -function Utils.generate_sample( +Generate an unlabeled instance for the given `StochasticVehicleSchedulingBenchmark`. +Returns a [`DataSample`](@ref) with features `x` and `instance` set, but `y=nothing`. + +To obtain labeled samples, pass a `target_policy` to [`generate_dataset`](@ref): + +```julia +policy = sample -> DataSample(; sample.context..., x=sample.x, + y=column_generation_algorithm(sample.instance)) +dataset = generate_dataset(benchmark, N; target_policy=policy) +``` + +If `store_city=false`, coordinates and city information are not stored in the instance. +""" +function Utils.generate_instance( benchmark::StochasticVehicleSchedulingBenchmark, rng::AbstractRNG; store_city=true, - compute_solutions=true, - algorithm=column_generation_algorithm, kwargs..., ) (; nb_tasks, nb_scenarios) = benchmark instance = Instance(; nb_tasks, nb_scenarios, rng, store_city) x = get_features(instance) - y_true = if compute_solutions - algorithm(instance; kwargs...) - else - nothing - end - return DataSample(; x, instance, y=y_true) + return DataSample(; x, instance) end """ diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl index 0989c85..89a6c67 100644 --- a/src/Utils/Utils.jl +++ b/src/Utils/Utils.jl @@ -27,11 +27,11 @@ export TopKMaximizer export AbstractEnvironment, get_seed, is_terminated, observe, reset!, step! export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark -export generate_sample, generate_dataset +export generate_instance, generate_sample, generate_dataset export generate_statistical_model, generate_maximizer export generate_scenario export generate_environment, generate_environments -export generate_policies +export generate_baseline_policies export generate_anticipative_solution export plot_data, compute_gap diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl index 6d23f12..23d6eb5 100644 --- a/src/Utils/interface.jl +++ b/src/Utils/interface.jl @@ -3,69 +3,111 @@ $TYPEDEF Abstract type interface for benchmark problems. -The following methods are mandatory for benchmarks: -- [`generate_dataset`](@ref) or [`generate_sample`](@ref) +# Mandatory methods to implement for any benchmark: +Choose one of three primary implementation strategies: +- Implement [`generate_instance`](@ref) (returns a [`DataSample`](@ref) with `y=nothing`). + The default [`generate_sample`](@ref) then applies `target_policy` if provided. +- Override [`generate_sample`](@ref) directly when the sample requires custom logic. In this case, + [`generate_dataset`](@ref) applies `target_policy` to the result after the call returns. +- Override [`generate_dataset`](@ref) directly when samples cannot be drawn independently. + +Also implement: - [`generate_statistical_model`](@ref) - [`generate_maximizer`](@ref) -The following methods are optional: -- [`plot_data`](@ref) -- [`objective_value`](@ref) -- [`compute_gap`](@ref) +# Optional methods (defaults provided) +- [`is_minimization_problem`](@ref): defaults to `true` +- [`objective_value`](@ref): defaults to `dot(θ, y)` +- [`compute_gap`](@ref): default implementation provided; override for custom evaluation + +# Optional methods (no default) +- [`plot_data`](@ref), [`plot_instance`](@ref), [`plot_solution`](@ref) +- [`generate_baseline_policies`](@ref) """ abstract type AbstractBenchmark end """ - generate_sample(::AbstractBenchmark, rng::AbstractRNG; kwargs...) -> DataSample + generate_instance(::AbstractBenchmark, rng::AbstractRNG; kwargs...) -> DataSample -Generate a single [`DataSample`](@ref) for given benchmark. -This is a low-level function that is used by [`generate_dataset`](@ref) to create -a dataset of samples. It is not mandatory to implement this method, but it is -recommended for benchmarks that have a well-defined way to generate individual samples. -An alternative is to directly implement [`generate_dataset`](@ref) to create a dataset -without generating individual samples. +Generate a single unlabeled [`DataSample`](@ref) (with `y=nothing`) for the benchmark. """ -function generate_sample end +function generate_instance(bench::AbstractBenchmark, rng::AbstractRNG; kwargs...) + return error( + "`generate_instance` is not implemented for $(typeof(bench)). " * + "Implement `generate_instance(::$(typeof(bench)), rng; kwargs...) -> DataSample` " * + "or override `generate_sample` directly.", + ) +end """ - generate_dataset(::AbstractBenchmark, dataset_size::Int; kwargs...) -> Vector{<:DataSample} + generate_sample(::AbstractBenchmark, rng::AbstractRNG; target_policy=nothing, kwargs...) -> DataSample + +Generate a single [`DataSample`](@ref) for the benchmark. + +**Framework default** (when [`generate_instance`](@ref) is implemented): +Calls [`generate_instance`](@ref), then applies `target_policy(sample)` if provided. + +Override directly (instead of implementing [`generate_instance`](@ref)) when the sample +requires custom logic. In this case, [`generate_dataset`](@ref) applies `target_policy` +after the call returns. +""" +function generate_sample(bench::AbstractBenchmark, rng; target_policy=nothing, kwargs...) + sample = generate_instance(bench, rng; kwargs...) + return isnothing(target_policy) ? sample : target_policy(sample) +end + +""" + generate_dataset(::AbstractBenchmark, dataset_size::Int; target_policy=nothing, kwargs...) -> Vector{<:DataSample} Generate a `Vector` of [`DataSample`](@ref) of length `dataset_size` for given benchmark. Content of the dataset can be visualized using [`plot_data`](@ref), when it applies. -By default, it uses [`generate_sample`](@ref) to create each sample in the dataset, and passes any keyword arguments to it. +By default, it uses [`generate_sample`](@ref) to create each sample in the dataset, and passes any +keyword arguments to it. If `target_policy` is provided, it is applied to each sample after +[`generate_sample`](@ref) returns. """ function generate_dataset( bench::AbstractBenchmark, dataset_size::Int; + target_policy=nothing, seed=nothing, rng=MersenneTwister(seed), kwargs..., ) Random.seed!(rng, seed) - return [generate_sample(bench, rng; kwargs...) for _ in 1:dataset_size] + return [ + begin + sample = generate_sample(bench, rng; kwargs...) + isnothing(target_policy) ? sample : target_policy(sample) + end for _ in 1:dataset_size + ] end """ generate_maximizer(::AbstractBenchmark; kwargs...) -Generates a maximizer function. -Returns a callable f: (θ; kwargs...) -> y, where θ is a cost array and y is a solution. +Returns a callable `f(θ; kwargs...) -> y`, solving a maximization problem. """ function generate_maximizer end """ - generate_statistical_model(::AbstractBenchmark; kwargs...) + generate_statistical_model(::AbstractBenchmark, seed=nothing; kwargs...) -Initializes and return an untrained statistical model of the CO-ML pipeline. -It's usually a Flux model, that takes a feature matrix x as input, and returns a cost array θ as output. +Returns an untrained statistical model (usually a Flux neural network) that maps a +feature matrix `x` to an output array `θ`. The `seed` parameter controls initialization +randomness for reproducibility. """ function generate_statistical_model end """ - generate_policies(::AbstractBenchmark) -> Vector{Policy} + generate_baseline_policies(::AbstractBenchmark) -> NamedTuple or Tuple + +Return named baseline policies for the benchmark. Each policy is a callable. + +- For static/stochastic benchmarks: signature `(sample) -> DataSample`. +- For dynamic benchmarks: signature `(env) -> Vector{DataSample}` (full trajectory). """ -function generate_policies end +function generate_baseline_policies end """ plot_data(::AbstractBenchmark, ::DataSample; kwargs...) @@ -99,7 +141,7 @@ function compute_gap end """ $TYPEDSIGNATURES -Default behaviour of `objective_value`. +Compute `dot(θ, y)`. Override for non-linear objectives. """ function objective_value(::AbstractBenchmark, θ::AbstractArray, y::AbstractArray) return dot(θ, y) @@ -139,7 +181,8 @@ end """ $TYPEDSIGNATURES -Default behaviour of `compute_gap` for a benchmark problem where `features`, `solutions` and `costs` are all defined. +Default implementation of [`compute_gap`](@ref): average relative optimality gap over `dataset`. +Requires samples with `x`, `θ`, and `y` fields. Override for custom evaluation logic. """ function compute_gap( bench::AbstractBenchmark, @@ -166,11 +209,43 @@ end """ $TYPEDEF -Abstract type interface for stochastic benchmark problems. -This type should be used for benchmarks that involve single stage stochastic optimization problems. +Abstract type interface for single-stage stochastic benchmark problems. + +A stochastic benchmark separates the problem into an **instance** (the +context known before the scenario is revealed) and a **random scenario** (the uncertain +part). Decisions are taken by seeing only the instance. Scenarios are used to generate +anticipative targets and compute objective values. + +# Required methods (exogenous benchmarks, `{true}` only) +- [`generate_instance`](@ref)`(bench, rng)`: returns a [`DataSample`](@ref) with instance + and features but **no scenario**. Scenarios are added later by [`generate_dataset`](@ref) + via [`generate_scenario`](@ref). +- [`generate_scenario`](@ref)`(bench, rng; kwargs...)`: draws a random scenario. + Instance and context fields are passed as keyword arguments spread from `sample.context`. + +# Optional methods +- [`generate_anticipative_solver`](@ref)`(bench)`: returns a callable + `(scenario; kwargs...) -> y` that computes the anticipative solution per scenario. +- [`generate_parametric_anticipative_solver`](@ref)`(bench)`: returns a callable + `(θ, scenario; kwargs...) -> y` for the parametric anticipative subproblem + `argmin_{y ∈ Y} c(y, scenario) + θᵀy`. + +# Dataset generation (exogenous only) +[`generate_dataset`](@ref) is specialised for `AbstractStochasticBenchmark{true}` and +supports all three standard structures via `nb_scenarios`: -It follows the same interface as [`AbstractBenchmark`](@ref), with the addition of the following methods: -- TODO +| Setting | Call | +|---------|------| +| 1 instance with K scenarios | `generate_dataset(bench, 1; nb_scenarios=K)` | +| N instances with 1 scenario | `generate_dataset(bench, N)` (default) | +| N instances with K scenarios | `generate_dataset(bench, N; nb_scenarios=K)` | + +By default (no `target_policy`), each [`DataSample`](@ref) has `context` holding the +instance (solver kwargs) and `extra=(; scenario)` holding one scenario. + +Provide a `target_policy(sample, scenarios) -> Vector{DataSample}` to compute labels. +This covers both anticipative (K samples, one per scenario) and SAA (1 sample from all K +scenarios) labeling strategies. """ abstract type AbstractStochasticBenchmark{exogenous} <: AbstractBenchmark end @@ -178,59 +253,219 @@ is_exogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = exoge is_endogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = !exogenous """ - generate_scenario(::AbstractStochasticBenchmark{true}, instance; kwargs...) + generate_scenario(::AbstractStochasticBenchmark{true}, rng::AbstractRNG; kwargs...) -> scenario + +Draw a random scenario. Instance and context fields are passed as keyword arguments, +spread from `sample.context`: + + scenario = generate_scenario(bench, rng; sample.context...) """ function generate_scenario end """ - generate_anticipative_solution(::AbstractStochasticBenchmark{true}, instance, scenario; kwargs...) + generate_anticipative_solver(::AbstractStochasticBenchmark{true}) -> callable + +Return a callable that computes the anticipative solution for a given scenario. +The instance and other solver-relevant fields are spread from the sample context. + +- For [`AbstractStochasticBenchmark`](@ref): returns `(scenario; context...) -> y`. +- For [`AbstractDynamicBenchmark`](@ref): returns + `(scenario; context...) -> Vector{DataSample}` — a full training trajectory. + + solver = generate_anticipative_solver(bench) + y = solver(scenario; sample.context...) # stochastic + trajectory = solver(scenario; sample.context...) # dynamic +""" +function generate_anticipative_solver end + +""" + generate_parametric_anticipative_solver(::AbstractStochasticBenchmark{true}) -> callable + +**Optional.** Return a callable `(θ, scenario; kwargs...) -> y` that solves the +parametric anticipative subproblem: + + argmin_{y ∈ Y(instance)} c(y, scenario) + θᵀy +""" +function generate_parametric_anticipative_solver end + +""" + generate_anticipative_solution(::AbstractStochasticBenchmark, instance, scenario; kwargs...) + +!!! warning "Deprecated" + Use [`generate_anticipative_solver`](@ref) instead, which returns a callable + `(scenario; kwargs...) -> y` consistent with the [`generate_maximizer`](@ref) + convention. """ function generate_anticipative_solution end +""" +$TYPEDSIGNATURES + +Default [`generate_sample`](@ref) for exogenous stochastic benchmarks. + +Calls [`generate_instance`](@ref), draws `nb_scenarios` scenarios via +[`generate_scenario`](@ref), then: +- Without `target_policy`: returns K unlabeled samples, each with one scenario in + `extra=(; scenario=ξ)`. +- With `target_policy`: calls `target_policy(sample, scenarios)` and returns the result. + +`target_policy(sample, scenarios) -> Vector{DataSample}` enables anticipative labeling +(K samples, one per scenario) or SAA (1 sample aggregating all K scenarios). +""" +function generate_sample( + bench::AbstractStochasticBenchmark{true}, + rng; + target_policy=nothing, + nb_scenarios::Int=1, + kwargs..., +) + sample = generate_instance(bench, rng; kwargs...) + scenarios = [generate_scenario(bench, rng; sample.context...) for _ in 1:nb_scenarios] + if isnothing(target_policy) + return [ + DataSample(; x=sample.x, θ=sample.θ, sample.context..., extra=(; scenario=ξ)) + for ξ in scenarios + ] + else + return target_policy(sample, scenarios) + end +end + +""" +$TYPEDSIGNATURES + +Specialised [`generate_dataset`](@ref) for exogenous stochastic benchmarks. + +Generates `nb_instances` problem instances, each with `nb_scenarios` independent +scenario draws. The scenario→sample mapping is controlled by the `target_policy`: +- Without `target_policy` (default): K scenarios produce K unlabeled samples (1:1). +- With `target_policy(sample, scenarios) -> Vector{DataSample}`: enables anticipative + labeling (K labeled samples) or SAA (1 sample aggregating all K scenarios). + +# Keyword arguments +- `nb_scenarios::Int = 1`: scenarios per instance (K). +- `target_policy`: when provided, called as `target_policy(sample, scenarios)` to + compute labels. Defaults to `nothing` (unlabeled samples). +- `seed`: passed to `MersenneTwister` when `rng` is not provided. +- `rng`: random number generator; overrides `seed` when provided. +- `kwargs...`: forwarded to [`generate_sample`](@ref). +""" +function generate_dataset( + bench::AbstractStochasticBenchmark{true}, + nb_instances::Int; + target_policy=nothing, + nb_scenarios::Int=1, + seed=nothing, + rng=MersenneTwister(seed), + kwargs..., +) + Random.seed!(rng, seed) + samples = DataSample[] + for _ in 1:nb_instances + new_samples = generate_sample(bench, rng; target_policy, nb_scenarios, kwargs...) + append!(samples, new_samples) + end + return samples +end + """ $TYPEDEF -Abstract type interface for dynamic benchmark problems. -This type should be used for benchmarks that involve multi-stage stochastic optimization problems. +Abstract type interface for multi-stage stochastic (dynamic) benchmark problems. + +Extends [`AbstractStochasticBenchmark`](@ref). The `{exogenous}` parameter retains its +meaning (whether uncertainty is independent of decisions). -It follows the same interface as [`AbstractStochasticBenchmark`](@ref), with the addition of the following methods: -TODO +# Primary entry point +- [`generate_environments`](@ref)`(bench, n; rng)`: mandatory (or implement + [`generate_environment`](@ref)`(bench, rng)`). The count-based default calls + [`generate_environment`](@ref) once per environment. + +# Additional optional methods +- [`generate_environment`](@ref)`(bench, rng)`: initialize a single rollout environment. + Implement this instead of overriding [`generate_environments`](@ref) when environments + can be drawn independently. +- [`generate_baseline_policies`](@ref)`(bench)`: returns named baseline callables of + signature `(env) -> Vector{DataSample}` (full trajectory rollout). +- [`generate_dataset`](@ref)`(bench, environments; target_policy, ...)`: generates + training-ready [`DataSample`](@ref)s by calling `target_policy(env)` for each environment. + Requires `target_policy` as a mandatory keyword argument. """ abstract type AbstractDynamicBenchmark{exogenous} <: AbstractStochasticBenchmark{exogenous} end """ - generate_environment(::AbstractDynamicBenchmark, instance, rng::AbstractRNG; kwargs...) + generate_environment(::AbstractDynamicBenchmark, rng::AbstractRNG; kwargs...) -Initialize an environment for the given dynamic benchmark instance. +Initialize a single environment for the given dynamic benchmark. +Primary implementation target for the count-based [`generate_environments`](@ref) default. +Override [`generate_environments`](@ref) directly when environments cannot be drawn +independently (e.g. loading from files). """ function generate_environment end """ $TYPEDSIGNATURES -Default behaviour of `generate_environment` applied to a data sample. -Uses the info field of the sample as the instance. +Generate `n` environments for the given dynamic benchmark. +Primary entry point for dynamic training algorithms. +Override when environments cannot be drawn independently (e.g. loading from files). """ -function generate_environment( - bench::AbstractDynamicBenchmark, sample::DataSample, rng::AbstractRNG; kwargs... +function generate_environments( + bench::AbstractDynamicBenchmark, + n::Int; + seed=nothing, + rng=MersenneTwister(seed), + kwargs..., ) - return generate_environment(bench, sample.instance, rng; kwargs...) + Random.seed!(rng, seed) + return [generate_environment(bench, rng; kwargs...) for _ in 1:n] end """ $TYPEDSIGNATURES -Generate a vector of environments for the given dynamic benchmark and dataset. +Generate a training dataset from pre-built environments for an exogenous dynamic benchmark. + +For each environment, calls `target_policy(env)` to obtain a training trajectory +(`Vector{DataSample}`). The trajectories are concatenated into a flat dataset. + +`target_policy` is a **required** keyword argument. Use [`generate_baseline_policies`](@ref) +to obtain standard baseline callables (e.g. the anticipative solver). + +# Keyword arguments +- `target_policy`: **required** callable `(env) -> Vector{DataSample}`. +- `seed`: passed to `MersenneTwister` when `rng` is not provided. +- `rng`: random number generator. """ -function generate_environments( - bench::AbstractDynamicBenchmark, - dataset::AbstractArray; +function generate_dataset( + bench::AbstractDynamicBenchmark{true}, + environments::AbstractVector; + target_policy, seed=nothing, rng=MersenneTwister(seed), kwargs..., ) Random.seed!(rng, seed) - return map(dataset) do sample - generate_environment(bench, sample, rng; kwargs...) + samples = DataSample[] + for env in environments + trajectory = target_policy(env) + append!(samples, trajectory) end + return samples +end + +""" +$TYPEDSIGNATURES + +Convenience wrapper for exogenous dynamic benchmarks: generates `n` environments +via [`generate_environments`](@ref), then calls +[`generate_dataset`](@ref)`(bench, environments; target_policy, ...)`. + +`target_policy` is a **required** keyword argument. +""" +function generate_dataset( + bench::AbstractDynamicBenchmark{true}, n::Int; target_policy, seed=nothing, kwargs... +) + environments = generate_environments(bench, n; seed) + return generate_dataset(bench, environments; target_policy, seed, kwargs...) end diff --git a/test/dynamic_assortment.jl b/test/dynamic_assortment.jl index 1504421..a1a136e 100644 --- a/test/dynamic_assortment.jl +++ b/test/dynamic_assortment.jl @@ -276,11 +276,10 @@ end b = DynamicAssortmentBenchmark(; N=5, d=2, K=3, max_steps=20) # Generate test data - dataset = generate_dataset(b, 10; seed=0) - environments = generate_environments(b, dataset) + environments = generate_environments(b, 10; seed=0) # Get policies - policies = generate_policies(b) + policies = generate_baseline_policies(b) expert = policies[1] greedy = policies[2] @@ -311,6 +310,28 @@ end @test sum(greedy_action) == DAP.assortment_size(env) end +@testset "DynamicAssortment - generate_dataset with environments (exogenous)" begin + b = DynamicAssortmentBenchmark(; N=4, d=2, K=2, max_steps=10, exogenous=true) + envs = generate_environments(b, 4; seed=0) + policies = generate_baseline_policies(b) + expert = policies[1] + + # target_policy: env -> Vector{DataSample} (full trajectory) + target_policy = env -> evaluate_policy!(expert, env)[2] + + # vector-of-environments overload + dataset = generate_dataset(b, envs; target_policy=target_policy) + @test dataset isa Vector{DataSample} + @test !isempty(dataset) + @test all(!isnothing(s.x) for s in dataset) + @test all(!isnothing(s.y) for s in dataset) + + # count-based wrapper + dataset2 = generate_dataset(b, 3; seed=7, target_policy=target_policy) + @test dataset2 isa Vector{DataSample} + @test !isempty(dataset2) +end + @testset "DynamicAssortment - Model and Maximizer Integration" begin b = DynamicAssortmentBenchmark(; N=4, d=3, K=2) @@ -323,11 +344,10 @@ end sample = generate_sample(b, MersenneTwister(42)) @test hasfield(typeof(sample), :context) - dataset = generate_dataset(b, 3; seed=42) - environments = generate_environments(b, dataset) + environments = generate_environments(b, 3; seed=42) # Evaluate policy to get data samples - policies = generate_policies(b) + policies = generate_baseline_policies(b) _, data_samples = evaluate_policy!(policies[1], environments) # Test model-maximizer pipeline diff --git a/test/dynamic_vsp.jl b/test/dynamic_vsp.jl index 39a00aa..8564a2e 100644 --- a/test/dynamic_vsp.jl +++ b/test/dynamic_vsp.jl @@ -8,13 +8,12 @@ @test is_exogenous(b) @test !is_endogenous(b) - dataset = generate_dataset(b, 10) - environments = generate_environments(b, dataset; seed=0) + environments = generate_environments(b, 10; seed=0) env = environments[1] get_seed(env) - policies = generate_policies(b) + policies = generate_baseline_policies(b) lazy = policies[1] greedy = policies[2] @@ -26,8 +25,7 @@ @test mean(r_lazy) <= mean(r_greedy) env = environments[1] - instance = dataset[1].instance - scenario = generate_scenario(b, instance) + scenario = env.scenario v, y = generate_anticipative_solution(b, env, scenario; nb_epochs=2, reset_env=true) maximizer = generate_maximizer(b) @@ -37,8 +35,7 @@ θ = model(x) y = maximizer(θ; instance) - dataset2 = generate_dataset(b2, 10) - environments2 = generate_environments(b2, dataset2; seed=0) + environments2 = generate_environments(b2, 10; seed=0) env2 = environments2[1] x2, instance2 = observe(env2) model2 = generate_statistical_model(b2) @@ -54,3 +51,32 @@ @test isapprox(cost, anticipative_value; atol=1e-5) @test isapprox(cost, cost2; atol=1e-5) end + +@testset "DVSP - generate_dataset with environments" begin + using DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling + + b = DynamicVehicleSchedulingBenchmark(; two_dimensional_features=true) + envs = generate_environments(b, 5; seed=0) + policies = generate_baseline_policies(b) + lazy = policies[1] + + # target_policy takes env -> Vector{DataSample} (full trajectory) + target_policy = env -> evaluate_policy!(lazy, env)[2] + + # vector-of-environments overload + dataset = generate_dataset(b, envs; target_policy=target_policy) + @test dataset isa Vector{DataSample} + @test !isempty(dataset) + @test all(!isnothing(s.x) for s in dataset) + @test all(!isnothing(s.y) for s in dataset) + + # count-based wrapper + dataset2 = generate_dataset(b, 3; seed=1, target_policy=target_policy) + @test dataset2 isa Vector{DataSample} + @test !isempty(dataset2) + + # seed keyword is forwarded: same seed → same dataset + dataset3a = generate_dataset(b, 3; seed=42, target_policy=target_policy) + dataset3b = generate_dataset(b, 3; seed=42, target_policy=target_policy) + @test length(dataset3a) == length(dataset3b) +end diff --git a/test/dynamic_vsp_plots.jl b/test/dynamic_vsp_plots.jl index 1fc822b..345e823 100644 --- a/test/dynamic_vsp_plots.jl +++ b/test/dynamic_vsp_plots.jl @@ -4,22 +4,20 @@ # Create test benchmark and data (similar to scripts/a.jl) b = DynamicVehicleSchedulingBenchmark(; two_dimensional_features=true) - dataset = generate_dataset(b, 3) - environments = generate_environments(b, dataset; seed=0) + environments = generate_environments(b, 3; seed=0) env = environments[1] # Test basic plotting functions fig1 = DVSP.plot_instance(env) @test fig1 isa Plots.Plot - instance = dataset[1].instance - scenario = generate_scenario(b, instance; seed=0) + scenario = env.scenario v, y = generate_anticipative_solution(b, env, scenario; nb_epochs=3, reset_env=true) fig2 = DVSP.plot_epochs(y) @test fig2 isa Plots.Plot - policies = generate_policies(b) + policies = generate_baseline_policies(b) lazy = policies[1] _, d = evaluate_policy!(lazy, env) fig3 = DVSP.plot_routes(d[1].instance, d[1].y) diff --git a/test/maintenance.jl b/test/maintenance.jl index 32f90ed..a2a9983 100644 --- a/test/maintenance.jl +++ b/test/maintenance.jl @@ -165,11 +165,10 @@ end b = MaintenanceBenchmark() # Generate test data - dataset = generate_dataset(b, 10; seed=0) - environments = generate_environments(b, dataset) + environments = generate_environments(b, 10; seed=0) # Get policies - policies = generate_policies(b) + policies = generate_baseline_policies(b) greedy = policies[1] @test greedy.name == "Greedy" @@ -200,11 +199,10 @@ end sample = generate_sample(b, MersenneTwister(42)) @test hasfield(typeof(sample), :context) - dataset = generate_dataset(b, 3; seed=42) - environments = generate_environments(b, dataset) + environments = generate_environments(b, 3; seed=42) # Evaluate policy to get data samples - policies = generate_policies(b) + policies = generate_baseline_policies(b) _, data_samples = evaluate_policy!(policies[1], environments) # Test model-maximizer pipeline diff --git a/test/vsp.jl b/test/vsp.jl index 0d6f5d2..2493d3e 100644 --- a/test/vsp.jl +++ b/test/vsp.jl @@ -8,16 +8,29 @@ b = StochasticVehicleSchedulingBenchmark(; nb_tasks=25, nb_scenarios=10) N = 5 - dataset = generate_dataset(b, N; seed=0, rng=StableRNG(0)) - mip_dataset = generate_dataset(b, N; seed=0, rng=StableRNG(0), algorithm=compact_mip) + + # Helper to build a target_policy that wraps a given algorithm + function make_svs_target_policy(algorithm) + return sample -> + DataSample(; sample.context..., x=sample.x, y=algorithm(sample.instance)) + end + + col_gen_policy = make_svs_target_policy(column_generation_algorithm) + mip_policy = make_svs_target_policy(compact_mip) + mipl_policy = make_svs_target_policy(compact_linearized_mip) + local_search_policy = make_svs_target_policy(local_search) + deterministic_policy = make_svs_target_policy(deterministic_mip) + + dataset = generate_dataset(b, N; seed=0, rng=StableRNG(0), target_policy=col_gen_policy) + mip_dataset = generate_dataset(b, N; seed=0, rng=StableRNG(0), target_policy=mip_policy) mipl_dataset = generate_dataset( - b, N; seed=0, rng=StableRNG(0), algorithm=compact_linearized_mip + b, N; seed=0, rng=StableRNG(0), target_policy=mipl_policy ) local_search_dataset = generate_dataset( - b, N; seed=0, rng=StableRNG(0), algorithm=local_search + b, N; seed=0, rng=StableRNG(0), target_policy=local_search_policy ) deterministic_dataset = generate_dataset( - b, N; seed=0, rng=StableRNG(0), algorithm=deterministic_mip + b, N; seed=0, rng=StableRNG(0), target_policy=deterministic_policy ) @test length(dataset) == N