JuliaDecisionFocusedLearning
diff --git a/‎docs/src/custom_benchmarks.md‎
Lines changed: 2 additions & 93 deletions b/‎docs/src/custom_benchmarks.md‎
Lines changed: 2 additions & 93 deletions
diff --git a/‎docs/src/index.md‎
Lines changed: 7 additions & 7 deletions b/‎docs/src/index.md‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎docs/src/using_benchmarks.md‎
Lines changed: 50 additions & 34 deletions b/‎docs/src/using_benchmarks.md‎
Lines changed: 50 additions & 34 deletions
diff --git a/‎src/Argmax/Argmax.jl‎
Lines changed: 1 addition & 1 deletion b/‎src/Argmax/Argmax.jl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/DecisionFocusedLearningBenchmarks.jl‎
Lines changed: 1 addition & 1 deletion b/‎src/DecisionFocusedLearningBenchmarks.jl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/DynamicAssortment/DynamicAssortment.jl‎
Lines changed: 1 addition & 1 deletion b/‎src/DynamicAssortment/DynamicAssortment.jl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl‎
Lines changed: 6 additions & 20 deletions b/‎src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl‎
Lines changed: 6 additions & 20 deletions
diff --git a/‎src/FixedSizeShortestPath/FixedSizeShortestPath.jl‎
Lines changed: 0 additions & 2 deletions b/‎src/FixedSizeShortestPath/FixedSizeShortestPath.jl‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎src/Maintenance/Maintenance.jl‎
Lines changed: 2 additions & 3 deletions b/‎src/Maintenance/Maintenance.jl‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎src/PortfolioOptimization/PortfolioOptimization.jl‎
Lines changed: 0 additions & 2 deletions b/‎src/PortfolioOptimization/PortfolioOptimization.jl‎
Lines changed: 0 additions & 2 deletions
@@ -18,7 +18,7 @@ AbstractBenchmark
 |------|----------|
 | `AbstractBenchmark` | Static, single-stage optimization (e.g. shortest path, portfolio) |
 | `AbstractStochasticBenchmark{true}` | Single-stage with exogenous uncertainty (scenarios drawn independently of decisions) |
-| `AbstractStochasticBenchmark{false}` | Single-stage with endogenous uncertainty (not yet used) |
+| `AbstractStochasticBenchmark{false}` | Single-stage with endogenous uncertainty |
 | `AbstractDynamicBenchmark{true}` | Multi-stage sequential decisions with exogenous uncertainty |
 | `AbstractDynamicBenchmark{false}` | Multi-stage sequential decisions with endogenous uncertainty |
 
@@ -90,14 +90,9 @@ generate_instance(bench::MyStochasticBenchmark, rng::AbstractRNG; kwargs...) ->
 
 # Draw one scenario given the instance encoded in context
 generate_scenario(bench::MyStochasticBenchmark, rng::AbstractRNG; context...) -> scenario
-# Note: sample.context is spread as kwargs when called by the framework
+# Note: sample.context is spread as kwargs when called
 ```
 
-The framework `generate_sample` calls `generate_instance`, draws `nb_scenarios`
-scenarios via `generate_scenario`, then:
-- If `target_policy` is provided: calls `target_policy(sample, scenarios) -> Vector{DataSample}`.
-- Otherwise: returns unlabeled samples with `extra=(; scenario=ξ)` for each scenario.
-
 #### Anticipative solver (optional)
 
 ```julia
@@ -189,89 +184,3 @@ DataSample(; x=feat, y=nothing, instance=inst, extra=(; scenario=ξ))
 ```
 
 Keys must not appear in both `context` and `extra`, the constructor raises an error.
-
----
-
-## Small examples
-
-### Static benchmark
-
-```julia
-using DecisionFocusedLearningBenchmarks
-const DFLBenchmarks = DecisionFocusedLearningBenchmarks
-
-struct MyStaticBenchmark <: AbstractBenchmark end
-
-function DFLBenchmarks.generate_instance(bench::MyStaticBenchmark, rng::AbstractRNG; kwargs...)
-    instance = build_my_instance(rng)
-    x = compute_features(instance)
-    return DataSample(; x=x, instance=instance)   # y = nothing
-end
-
-
-DFLBenchmarks.generate_statistical_model(bench::MyStaticBenchmark; seed=nothing) =
-    Chain(Dense(10 => 32, relu), Dense(32 => 5))
-
-DFLBenchmarks.generate_maximizer(bench::MyStaticBenchmark) =
-    (θ; instance, kwargs...) -> solve_my_problem(θ, instance)
-```
-
-### Stochastic benchmark
-
-```julia
-
-struct MyStochasticBenchmark <: AbstractStochasticBenchmark{true} end
-
-function DFLBenchmarks.generate_instance(bench::MyStochasticBenchmark, rng::AbstractRNG; kwargs...)
-    instance = build_my_instance(rng)
-    x = compute_features(instance)
-    return DataSample(; x=x, instance=instance)
-end
-
-function DFLBenchmarks.generate_scenario(bench::MyStochasticBenchmark, rng::AbstractRNG; instance, kwargs...)
-    return sample_scenario(instance, rng)
-end
-
-DFLBenchmarks.generate_anticipative_solver(bench::MyStochasticBenchmark) =
-    (scenario; instance, kwargs...) -> solve_with_scenario(instance, scenario)
-```
-
-### Dynamic benchmark
-
-```julia
-struct MyDynamicBenchmark <: AbstractDynamicBenchmark{true} end
-
-mutable struct MyEnv <: AbstractEnvironment
-    const instance::MyInstance
-    const seed::Int
-    state::MyState
-end
-
-DFLBenchmarks.get_seed(env::MyEnv) = env.seed
-DFLBenchmarks.reset!(env::MyEnv; reset_rng=true, seed=env.seed) = (env.state = initial_state(env.instance))
-DFLBenchmarks.observe(env::MyEnv) = (env.state, nothing)
-DFLBenchmarks.step!(env::MyEnv, action) = apply_action!(env.state, action)
-DFLBenchmarks.is_terminated(env::MyEnv) = env.state.done
-
-function DFLBenchmarks.generate_environment(bench::MyDynamicBenchmark, rng::AbstractRNG; kwargs...)
-    inst = build_my_instance(rng)
-    seed = rand(rng, Int)
-    return MyEnv(inst, seed, initial_state(inst))
-end
-
-function DFLBenchmarks.generate_baseline_policies(bench::MyDynamicBenchmark)
-    greedy = function(env)
-        samples = DataSample[]
-        reset!(env)
-        while !is_terminated(env)
-            obs, _ = observe(env)
-            x = compute_features(obs)
-            y = greedy_action(obs)
-            r = step!(env, y)
-            push!(samples, DataSample(; x=x, y=y, instance=obs, extra=(; reward=r)))
-        end
-        return samples
-    end
-    return (; greedy)
-end
-```
@@ -24,20 +24,20 @@ x \;\longrightarrow\; \boxed{\,\text{Statistical model } \varphi_w\,}
 ```
 
 Where:
-- **Statistical model** $\varphi_w$: machine learning predictor (e.g., neural network)
-- **CO algorithm** $f$: combinatorial optimization solver
 - **Instance** $x$: input data (e.g., features, context)
+- **Statistical model** $\varphi_w$: machine learning predictor (e.g., neural network)
 - **Parameters** $\theta$: predicted parameters for the optimization problem solved by `f`
+- **CO algorithm** $f$: combinatorial optimization solver
 - **Solution** $y$: output decision/solution
 
 ## Package Overview
 
-**DecisionFocusedLearningBenchmarks.jl** provides a comprehensive collection of benchmark problems for evaluating decision-focused learning algorithms. The package offers:
+**DecisionFocusedLearningBenchmarks.jl** provides a collection of benchmark problems for evaluating decision-focused learning algorithms. The package offers:
 
-- **Standardized benchmark problems** spanning diverse application domains
-- **Common interfaces** for creating datasets, statistical models, and optimization algorithms
-- **Ready-to-use DFL policies** compatible with [InferOpt.jl](https://github.com/JuliaDecisionFocusedLearning/InferOpt.jl) and the whole [JuliaDecisionFocusedLearning](https://github.com/JuliaDecisionFocusedLearning) ecosystem
-- **Evaluation tools** for comparing algorithm performance
+- **Collection of benchmark problems** spanning diverse applications
+- **Common tools** for creating datasets, statistical models, and optimization algorithms
+- **Generic interface** for building custom benchmarks
+- Compatibility with [InferOpt.jl](https://github.com/JuliaDecisionFocusedLearning/InferOpt.jl) and the whole [JuliaDecisionFocusedLearning](https://github.com/JuliaDecisionFocusedLearning) ecosystem
 
 ## Benchmark Categories
 
 
@@ -1,8 +1,42 @@
 # Using Benchmarks
 
-This guide covers everything you need to work with existing benchmarks in
-DecisionFocusedLearningBenchmarks.jl: generating datasets, assembling DFL pipeline
-components, and evaluating results.
+This guide covers everything you need to work with existing benchmarks in DecisionFocusedLearningBenchmarks.jl: generating datasets, assembling DFL pipeline components, applying algorithms, and evaluating results.
+
+---
+
+## What is a benchmark?
+
+A benchmark bundles a problem family (an instance generator, a combinatorial solver, and a statistical model architecture) into a single object. It provides everything needed to run a Decision-Focused Learning experiment out of the box, without having to create each component from scratch.
+Three abstract types cover the main settings:
+- **`AbstractBenchmark`**: static problems (one instance, one decision)
+- **`AbstractStochasticBenchmark{exogenous}`**: stochastic problems (type parameter indicates whether uncertainty is exogenous)
+- **`AbstractDynamicBenchmark`**: sequential / multi-stage problems
+
+The sections below explain what changes between these settings. For most purposes, start with a static benchmark to understand the core workflow.
+
+---
+
+## Core workflow
+
+Every benchmark exposes three key methods. For any static benchmark:
+
+```julia
+bench = ArgmaxBenchmark()
+model = generate_statistical_model(bench; seed=0)   # Flux model
+maximizer = generate_maximizer(bench)               # combinatorial oracle
+dataset = generate_dataset(bench, 100; seed=0)      # Vector{DataSample}
+```
+
+- **`generate_statistical_model`**: returns an untrained neural network that maps input features `x` to cost parameters `θ`.
+- **`generate_maximizer`**: returns a callable `(θ; context...) -> y` that solves the combinatorial problem given cost parameters.
+- **`generate_dataset`**: returns labeled training data as a `Vector{DataSample}`.
+
+At inference time these two pieces compose naturally as an end-to-end policy:
+
+```julia
+θ = model(sample.x)                  # predict cost parameters
+y = maximizer(θ; sample.context...)  # solve the optimization problem
+```
 
 ---
 
@@ -15,11 +49,10 @@ All data in the package is represented as [`DataSample`](@ref) objects.
 | `x` | any | Input features (fed to the statistical model) |
 | `θ` | any | Intermediate cost parameters |
 | `y` | any | Output decision / solution |
-| `context` | `NamedTuple` | Solver kwargs — spread into `maximizer(θ; sample.context...)` |
-| `extra` | `NamedTuple` | Non-solver data (scenario, reward, step, …) — never passed to the solver |
+| `context` | `NamedTuple` | Solver kwargs spread into `maximizer(θ; sample.context...)` |
+| `extra` | `NamedTuple` | Non-solver data (scenario, reward, step, …), never passed to the solver |
 
-Not all fields are populated in every sample. For convenience, named entries inside
-`context` and `extra` can be accessed directly on the sample via property forwarding:
+Not all fields are populated in every sample, depending on the setting. For convenience, named entries inside `context` and `extra` can be accessed directly on the sample via property forwarding:
 
 ```julia
 sample.instance   # looks up :instance in context first, then in extra
@@ -28,12 +61,11 @@ sample.scenario   # looks up :scenario in context first, then in extra
 
 ---
 
-## Generating datasets for training
+## Benchmark type specifics
 
 ### Static benchmarks
 
-For static benchmarks (`<:AbstractBenchmark`) the framework already computes the
-ground-truth label `y`:
+For static benchmarks (`<:AbstractBenchmark`), `generate_dataset` may compute a default ground-truth label `y` if the benchmark implements it:
 
 ```julia
 bench = ArgmaxBenchmark()
@@ -43,15 +75,13 @@ dataset = generate_dataset(bench, 100; seed=0)   # Vector{DataSample} with x, y,
 You can override the labels by providing a `target_policy`:
 
 ```julia
-my_policy = sample -> DataSample(; sample.context..., x=sample.x,
-                                   y=my_algorithm(sample.instance))
+my_policy = sample -> DataSample(; sample.context..., x=sample.x, y=my_algorithm(sample.instance))
 dataset = generate_dataset(bench, 100; seed=0, target_policy=my_policy)
 ```
 
 ### Stochastic benchmarks (exogenous)
 
-For `AbstractStochasticBenchmark{true}` benchmarks the default call returns
-*unlabeled* samples, each sample carries one scenario in `sample.extra.scenario`:
+For `AbstractStochasticBenchmark{true}` benchmarks the default call returns *unlabeled* samples, each sample carries one scenario in `sample.extra.scenario`:
 
 ```julia
 bench   = StochasticVehicleSchedulingBenchmark()
@@ -85,20 +115,22 @@ Dynamic benchmarks use a two-step workflow:
 ```julia
 bench = DynamicVehicleSchedulingBenchmark()
 
-# Step 1 — create environments (reusable across experiments)
+# Step 1: create environments (reusable across experiments)
 envs = generate_environments(bench, 10; seed=0)
 
-# Step 2 — roll out a policy to collect training trajectories
+# Step 2: roll out a policy to collect training trajectories
 policy = generate_baseline_policies(bench)[1]          # e.g. lazy policy
 dataset = generate_dataset(bench, envs; target_policy=policy)
 # dataset is a flat Vector{DataSample} of all steps across all trajectories
 ```
 
-`target_policy` is **required** for dynamic benchmarks (there is no default label).
+`target_policy` is **required** to create datasets for dynamic benchmarks (there is no default label).
 It must be a callable `(env) -> Vector{DataSample}` that performs a full episode
 rollout and returns the resulting trajectory.
 
-### Seed / RNG control
+---
+
+## Seed / RNG control
 
 All `generate_dataset` and `generate_environments` calls accept either `seed`
 (creates an internal `MersenneTwister`) or `rng` for full control:
@@ -111,22 +143,6 @@ dataset = generate_dataset(bench, 50; rng=rng)
 
 ---
 
-## DFL pipeline components
-
-```julia
-model = generate_statistical_model(bench; seed=0)   # untrained Flux model
-maximizer = generate_maximizer(bench)                   # combinatorial oracle
-```
-
-These two pieces compose naturally:
-
-```julia
-θ = model(sample.x)                  # predict cost parameters
-y = maximizer(θ; sample.context...)      # solve the optimization problem
-```
-
----
-
 ## Evaluation
 
 ```julia
 
@@ -60,7 +60,7 @@ $TYPEDSIGNATURES
 
 Return an argmax maximizer.
 """
-function Utils.generate_maximizer(bench::ArgmaxBenchmark)
+function Utils.generate_maximizer(::ArgmaxBenchmark)
     return one_hot_argmax
 end
 
 
@@ -73,7 +73,7 @@ export generate_scenario
 export generate_baseline_policies
 export generate_statistical_model
 export generate_maximizer
-export generate_anticipative_solution
+export generate_anticipative_solver, generate_parametric_anticipative_solver
 export is_exogenous, is_endogenous
 
 export objective_value
 
@@ -139,7 +139,7 @@ function Utils.generate_baseline_policies(::DynamicAssortmentBenchmark)
         "policy that selects the assortment with the highest expected revenue",
         expert_policy,
     )
-    return (expert, greedy)
+    return (; expert, greedy)
 end
 
 export DynamicAssortmentBenchmark
 
@@ -115,28 +115,14 @@ end
 """
 $TYPEDSIGNATURES
 
-Generate an anticipative solution for the dynamic vehicle scheduling benchmark.
-The solution is computed using the anticipative solver with the benchmark's feature configuration.
-"""
-function Utils.generate_anticipative_solution(
-    b::DynamicVehicleSchedulingBenchmark, args...; kwargs...
-)
-    return anticipative_solver(
-        args...; kwargs..., two_dimensional_features=b.two_dimensional_features
-    )
-end
-
-"""
-$TYPEDSIGNATURES
-
 Return the anticipative solver for the dynamic vehicle scheduling benchmark.
-The callable takes a scenario and solver kwargs (including `instance`) and returns a
-training trajectory as a `Vector{DataSample}`.
+The callable takes an environment and solver kwargs and returns a training trajectory
+as a `Vector{DataSample}`. Set `reset_env=true` (default) to reset the environment
+before solving, or `reset_env=false` to plan from the current state.
 """
 function Utils.generate_anticipative_solver(::DynamicVehicleSchedulingBenchmark)
-    return (scenario; instance, kwargs...) -> begin
-        env = DVSPEnv(instance, scenario)
-        _, trajectory = anticipative_solver(env; reset_env=false, kwargs...)
+    return (env; reset_env=true, kwargs...) -> begin
+        _, trajectory = anticipative_solver(env; reset_env, kwargs...)
         return trajectory
     end
 end
@@ -160,7 +146,7 @@ function Utils.generate_baseline_policies(::DynamicVehicleSchedulingBenchmark)
         "Greedy policy that dispatches vehicles to the nearest customer.",
         greedy_policy,
     )
-    return (lazy, greedy)
+    return (; lazy, greedy)
 end
 
 """
 
@@ -142,6 +142,4 @@ function Utils.generate_statistical_model(
 end
 
 export FixedSizeShortestPathBenchmark
-export generate_dataset, generate_maximizer, generate_statistical_model
-
 end
@@ -22,7 +22,6 @@ The number of simultaneous maintenance operations is limited by a maintenance ca
 
 # Fields
 $TYPEDFIELDS
-
 """
 struct MaintenanceBenchmark <: AbstractDynamicBenchmark{true}
     "number of components"
@@ -126,7 +125,7 @@ end
 """
 $TYPEDSIGNATURES
 
-Returns two policies for the dynamic assortment benchmark:
+Returns a policy for the maintenance benchmark:
 - `Greedy`: maintains components when they are in the last state before failure, up to the maintenance capacity
 """
 function Utils.generate_baseline_policies(::MaintenanceBenchmark)
@@ -135,7 +134,7 @@ function Utils.generate_baseline_policies(::MaintenanceBenchmark)
         "policy that maintains components when they are in the last state before failure, up to the maintenance capacity",
         greedy_policy,
     )
-    return (greedy,)
+    return (; greedy)
 end
 
 export MaintenanceBenchmark
 
@@ -116,6 +116,4 @@ function Utils.generate_statistical_model(
 end
 
 export PortfolioOptimizationBenchmark
-export generate_dataset, generate_maximizer, generate_statistical_model
-
 end
Original file line number	Diff line number	Diff line change
`@@ -139,7 +139,7 @@ function Utils.generate_baseline_policies(::DynamicAssortmentBenchmark)`
`139`	`139`	`"policy that selects the assortment with the highest expected revenue",`
`140`	`140`	`expert_policy,`
`141`	`141`	`)`
`142`		`- return (expert, greedy)`
	`142`	`+ return (; expert, greedy)`
`143`	`143`	`end`
`144`	`144`
`145`	`145`	`export DynamicAssortmentBenchmark`