diff --git a/docs/make.jl b/docs/make.jl
index 6b79219..4a1ec1b 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -26,9 +26,10 @@ makedocs(;
     sitename="DecisionFocusedLearningBenchmarks.jl",
     format=Documenter.HTML(; size_threshold=typemax(Int)),
     pages=[
-        "Home" => [
-            "Getting started" => "index.md",
-            "Understanding Benchmark Interfaces" => "benchmark_interfaces.md",
+        "Home" => "index.md",
+        "Guides" => [
+            "Using benchmarks" => "using_benchmarks.md",
+            "Creating custom benchmarks" => "custom_benchmarks.md",
         ],
         "Tutorials" => include_tutorial ? md_tutorial_files : [],
         "Benchmark problems list" => benchmark_files,
diff --git a/docs/src/benchmark_interfaces.md b/docs/src/benchmark_interfaces.md
deleted file mode 100644
index 7c9e457..0000000
--- a/docs/src/benchmark_interfaces.md
+++ /dev/null
@@ -1,153 +0,0 @@
-# Understanding Benchmark Interface
-
-This guide explains how benchmarks work through the common interface of DecisionFocusedLearningBenchmarks.jl.
-Understanding this interface is essential for using existing benchmarks and implementing new ones.
-
-## Core Concepts
-
-### DataSample Structure
-
-All benchmarks work with [`DataSample`](@ref) objects that encapsulate the data needed for decision-focused learning:
-
-```julia
-@kwdef struct DataSample{I,F,S,C}
-    x::F = nothing       # Input features of the policy
-    θ::C = nothing       # Intermediate cost/utility parameters
-    y::S = nothing       # Output solution
-    info::I = nothing    # Additional data information (e.g., problem instance)
-end
-```
-
-The `DataSample` provides flexibility, not all fields need to be populated depending on the benchmark type and use.
-
-### Benchmark Type Hierarchy
-
-The package defines a hierarchy of three abstract types:
-
-```
-AbstractBenchmark
-└── AbstractStochasticBenchmark{exogenous}
-    └── AbstractDynamicBenchmark{exogenous}
-```
-
-- **`AbstractBenchmark`**: static, single-stage optimization problems
-- **`AbstractStochasticBenchmark{exogenous}`**: stochastic, single stage optimization problems
-- **`AbstractDynamicBenchmark{exogenous}`**: multi-stage sequential decision-making problems
-
-The `{exogenous}` type parameter indicates whether uncertainty distribution comes from external sources (`true`) or is influenced by decisions (`false`), which affects available methods.
-
-## Common Interface Methods
-
-### Data Generation
-
-Every benchmark must implement a data generation method:
-
-```julia
-# Generate a single sample
-generate_sample(benchmark::AbstractBenchmark, rng::AbstractRNG; kwargs...) -> DataSample
-```
-This method should output a single `DataSample` given a random number generator and optional parameters as keyword arguments.
-
-If needed, benchmarks can instead override the [`generate_dataset`](@ref) method to directly create an entire dataset of size `size`:
-```julia
-generate_dataset(benchmark::AbstractBenchmark, size::Int; kwargs...) -> Vector{DataSample}
-```
-
-The default `generate_dataset` implementation calls `generate_sample` repeatedly, but benchmarks can override this for custom dataset generation logic.
-
-### DFL Policy Components
-
-Benchmarks provide the building blocks for decision-focused learning policies:
-
-```julia
-# Create a statistical model (e.g., a neural network)
-generate_statistical_model(benchmark::AbstractBenchmark; kwargs...)
-
-# Create an optimization maximizer/solver
-generate_maximizer(benchmark::AbstractBenchmark; kwargs...)
-```
-
-The statistical model typically maps features `x` to cost parameters `θ`.
-The maximizer solves optimization problems given cost parameters `θ` (and potentially additional problem dependent keyword arguments), returning decision `y`.
-
-### Benchmark Policies
-
-Benchmarks can provide baseline policies for comparison and evaluation:
-
-```julia
-# Get baseline policies for comparison
-generate_policies(benchmark::AbstractBenchmark) -> Tuple{Policy}
-```
-This returns a tuple of `Policy` objects representing different benchmark-specific policies.
-A `Policy` is just a function with a name and description:
-```julia
-struct Policy{F}
-    name::String
-    description::String  
-    policy_function::F
-end
-```
-
-Policies can be evaluated across multiple instances/environments using:
-```julia
-evaluate_policy!(policy::Policy, instances; kwargs...) -> (rewards, data_samples)
-```
-
-### Evaluation Methods
-
-Optional methods for analysis and visualization:
-
-```julia
-# Visualize data samples
-plot_data(benchmark::AbstractBenchmark, sample::DataSample; kwargs...)
-plot_instance(benchmark::AbstractBenchmark, instance; kwargs...)  
-plot_solution(benchmark::AbstractBenchmark, sample::DataSample, solution; kwargs...)
-
-# Compute optimality gap
-compute_gap(benchmark::AbstractBenchmark, dataset, model, maximizer) -> Float64
-
-# Evaluate objective value
-objective_value(benchmark::AbstractBenchmark, sample::DataSample, solution)
-```
-
-## Benchmark-Specific Interfaces
-
-### Static Benchmarks
-
-Static benchmarks follow the basic interface above.
-
-### Stochastic Benchmarks
-
-Exogenous stochastic benchmarks add methods for scenario generation and anticipative solutions:
-
-```julia
-# Generate uncertainty scenarios (for exogenous benchmarks)
-generate_scenario(benchmark::AbstractStochasticBenchmark{true}, instance; kwargs...)
-
-# Solve anticipative optimization problem for given scenario
-generate_anticipative_solution(benchmark::AbstractStochasticBenchmark{true}, 
-                               instance, scenario; kwargs...)
-```
-
-### Dynamic Benchmarks
-
-In order to model sequential decision-making, dynamic benchmarks additionally work with environments.
-For this, they implement methods to create environments from instances or datasets:
-```julia
-# Create environment for sequential decision-making
-generate_environment(benchmark::AbstractDynamicBenchmark, instance, rng; kwargs...) -> <:AbstractEnvironment
-
-# Generate multiple environments
-generate_environments(benchmark::AbstractDynamicBenchmark, dataset; kwargs...) -> Vector{<:AbstractEnvironment}
-```
-Similarly to `generate_dataset` and `generate_sample`, one only needs to implement `generate_environment`, as `generate_environments` has a default implementation that calls it repeatedly.
-
-The [`AbstractEnvironment`](@ref) interface is defined as follows:
-```julia
-# Environment methods
-get_seed(env::AbstractEnvironment)  # Get current RNG seed
-reset!(env::AbstractEnvironment; reset_rng::Bool, seed=get_seed(env))  # Reset to initial state
-observe(env::AbstractEnvironment) -> (obs, info)    # Get current observation  
-step!(env::AbstractEnvironment, action) -> reward   # Take action, get reward
-is_terminated(env::AbstractEnvironment) -> Bool     # Check if episode ended
-```
diff --git a/docs/src/custom_benchmarks.md b/docs/src/custom_benchmarks.md
new file mode 100644
index 0000000..9c95b8b
--- /dev/null
+++ b/docs/src/custom_benchmarks.md
@@ -0,0 +1,277 @@
+# Creating Custom Benchmarks
+
+This guide explains how to implement new benchmarks in
+DecisionFocusedLearningBenchmarks.jl. It is aimed at developers who want to add
+problems to the benchmark suite or integrate their own domains.
+
+---
+
+## Type hierarchy
+
+```
+AbstractBenchmark
+└── AbstractStochasticBenchmark{exogenous}
+    └── AbstractDynamicBenchmark{exogenous}
+```
+
+| Type | Use case |
+|------|----------|
+| `AbstractBenchmark` | Static, single-stage optimization (e.g. shortest path, portfolio) |
+| `AbstractStochasticBenchmark{true}` | Single-stage with exogenous uncertainty (scenarios drawn independently of decisions) |
+| `AbstractStochasticBenchmark{false}` | Single-stage with endogenous uncertainty (not yet used) |
+| `AbstractDynamicBenchmark{true}` | Multi-stage sequential decisions with exogenous uncertainty |
+| `AbstractDynamicBenchmark{false}` | Multi-stage sequential decisions with endogenous uncertainty |
+
+---
+
+## Implementation strategies
+
+There are three strategies for data generation. Pick the one that best fits your
+benchmark:
+
+| Strategy | Method to implement | When to use |
+|----------|---------------------|-------------|
+| **1** | `generate_instance(bench, rng; kwargs...) -> DataSample` | Samples are independent; `y=nothing` at generation time |
+| **2** | `generate_sample(bench, rng; kwargs...) -> DataSample` | Samples are independent; you want to compute `y` inside `generate_sample` |
+| **3** | `generate_dataset(bench, N; kwargs...) -> Vector{DataSample}` | Samples are not independent (e.g. loaded from shared files) |
+
+The default `generate_sample` calls `generate_instance` and then applies
+`target_policy` to the returned sample. `generate_dataset` calls `generate_sample`
+repeatedly and applies `target_policy` to each result.
+
+---
+
+## `AbstractBenchmark`: required methods
+
+### Data generation (choose one strategy)
+
+```julia
+# Strategy 1: recommended for most static benchmarks
+generate_instance(bench::MyBenchmark, rng::AbstractRNG; kwargs...) -> DataSample
+
+# Strategy 2: when you want to compute y inside generate_sample
+generate_sample(bench::MyBenchmark, rng::AbstractRNG; kwargs...) -> DataSample
+
+# Strategy 3: when samples are not independent
+generate_dataset(bench::MyBenchmark, N::Int; kwargs...) -> Vector{DataSample}
+```
+
+### Pipeline components (required)
+
+```julia
+generate_statistical_model(bench::MyBenchmark; seed=nothing)
+# Returns an untrained Flux model mapping x -> θ
+
+generate_maximizer(bench::MyBenchmark)
+# Returns a callable (θ; context...) -> y
+```
+
+### Optional methods
+
+```julia
+is_minimization_problem(bench::MyBenchmark) -> Bool   # default: false (maximization)
+objective_value(bench::MyBenchmark, sample::DataSample, y) -> Real
+compute_gap(bench::MyBenchmark, dataset, model, maximizer) -> Float64
+plot_data(bench::MyBenchmark, sample::DataSample; kwargs...)
+plot_instance(bench::MyBenchmark, instance; kwargs...)
+plot_solution(bench::MyBenchmark, sample::DataSample, y; kwargs...)
+generate_baseline_policies(bench::MyBenchmark) -> collection of callables
+```
+
+---
+
+## `AbstractStochasticBenchmark{true}`: additional methods
+
+For stochastic benchmarks with exogenous uncertainty, implement:
+
+```julia
+# Instance + features, no scenario (y = nothing)
+generate_instance(bench::MyStochasticBenchmark, rng::AbstractRNG; kwargs...) -> DataSample
+
+# Draw one scenario given the instance encoded in context
+generate_scenario(bench::MyStochasticBenchmark, rng::AbstractRNG; context...) -> scenario
+# Note: sample.context is spread as kwargs when called by the framework
+```
+
+The framework `generate_sample` calls `generate_instance`, draws `nb_scenarios`
+scenarios via `generate_scenario`, then:
+- If `target_policy` is provided: calls `target_policy(sample, scenarios) -> Vector{DataSample}`.
+- Otherwise: returns unlabeled samples with `extra=(; scenario=ξ)` for each scenario.
+
+#### Anticipative solver (optional)
+
+```julia
+generate_anticipative_solver(bench::MyStochasticBenchmark)
+# Returns a callable: (scenario; context...) -> y
+```
+
+#### `DataSample` conventions
+
+- `context`: solver kwargs (instance data, graph, capacities, …)
+- `extra`: scenario: **never** passed to the maximizer
+
+```julia
+DataSample(; x=features, y=nothing,
+             instance=my_instance,      # goes into context
+             extra=(; scenario=ξ))
+```
+
+---
+
+## `AbstractDynamicBenchmark`: additional methods
+
+Dynamic benchmarks extend stochastic ones with an environment-based rollout interface.
+
+### Environment generation
+
+```julia
+# Strategy A: generate one environment at a time (default implementation of
+#              generate_environments calls this repeatedly)
+generate_environment(bench::MyDynamicBenchmark, rng::AbstractRNG; kwargs...) -> AbstractEnvironment
+
+# Strategy B: override when environments are not independent (e.g. loaded from files)
+generate_environments(bench::MyDynamicBenchmark, n::Int; rng, kwargs...) -> Vector{<:AbstractEnvironment}
+```
+
+### `AbstractEnvironment` interface
+
+Your environment type must implement:
+
+```julia
+get_seed(env::MyEnv)                             # Return the RNG seed used at creation
+reset!(env::MyEnv; reset_rng::Bool, seed=get_seed(env))  # Reset to initial state
+observe(env::MyEnv) -> (observation, info)       # Current observation
+step!(env::MyEnv, action) -> reward              # Apply action, advance state
+is_terminated(env::MyEnv) -> Bool                # True when episode has ended
+```
+
+### Baseline policies (required for `generate_dataset`)
+
+```julia
+generate_baseline_policies(bench::MyDynamicBenchmark)
+# Returns named callables: (env) -> Vector{DataSample}
+# Each callable performs a full episode rollout and returns the trajectory.
+```
+
+`generate_dataset` for dynamic benchmarks **requires** a `target_policy` kwarg, 
+there is no default. The `target_policy` must be a callable `(env) -> Vector{DataSample}`.
+
+### `DataSample` conventions
+
+- `context`: solver-relevant state (observation fields, graph, etc.)
+- `extra`: reward, step counter, history (never passed to the maximizer)
+
+```julia
+DataSample(; x=features, y=action,
+             instance=current_state,             # goes into context
+             extra=(; reward=r, step=t))
+```
+
+---
+
+## `DataSample` construction guide
+
+| Benchmark category | `context` fields | `extra` fields |
+|--------------------|-----------------|----------------|
+| Static | instance, graph, capacities, … | — |
+| Stochastic | instance (not scenario) | `scenario` |
+| Dynamic | solver-relevant state / observation | `reward`, `step`, `history`, … |
+
+Any named argument that is not `x`, `θ`, `y`, `context`, or `extra` is treated as a `context` field:
+
+```julia
+# Equivalent forms:
+DataSample(; x=feat, y=sol, instance=inst)
+DataSample(; x=feat, y=sol, context=(; instance=inst))
+
+# With extra:
+DataSample(; x=feat, y=nothing, instance=inst, extra=(; scenario=ξ))
+```
+
+Keys must not appear in both `context` and `extra`, the constructor raises an error.
+
+---
+
+## Small examples
+
+### Static benchmark
+
+```julia
+using DecisionFocusedLearningBenchmarks
+const DFLBenchmarks = DecisionFocusedLearningBenchmarks
+
+struct MyStaticBenchmark <: AbstractBenchmark end
+
+function DFLBenchmarks.generate_instance(bench::MyStaticBenchmark, rng::AbstractRNG; kwargs...)
+    instance = build_my_instance(rng)
+    x = compute_features(instance)
+    return DataSample(; x=x, instance=instance)   # y = nothing
+end
+
+
+DFLBenchmarks.generate_statistical_model(bench::MyStaticBenchmark; seed=nothing) =
+    Chain(Dense(10 => 32, relu), Dense(32 => 5))
+
+DFLBenchmarks.generate_maximizer(bench::MyStaticBenchmark) =
+    (θ; instance, kwargs...) -> solve_my_problem(θ, instance)
+```
+
+### Stochastic benchmark
+
+```julia
+
+struct MyStochasticBenchmark <: AbstractStochasticBenchmark{true} end
+
+function DFLBenchmarks.generate_instance(bench::MyStochasticBenchmark, rng::AbstractRNG; kwargs...)
+    instance = build_my_instance(rng)
+    x = compute_features(instance)
+    return DataSample(; x=x, instance=instance)
+end
+
+function DFLBenchmarks.generate_scenario(bench::MyStochasticBenchmark, rng::AbstractRNG; instance, kwargs...)
+    return sample_scenario(instance, rng)
+end
+
+DFLBenchmarks.generate_anticipative_solver(bench::MyStochasticBenchmark) =
+    (scenario; instance, kwargs...) -> solve_with_scenario(instance, scenario)
+```
+
+### Dynamic benchmark
+
+```julia
+struct MyDynamicBenchmark <: AbstractDynamicBenchmark{true} end
+
+mutable struct MyEnv <: AbstractEnvironment
+    const instance::MyInstance
+    const seed::Int
+    state::MyState
+end
+
+DFLBenchmarks.get_seed(env::MyEnv) = env.seed
+DFLBenchmarks.reset!(env::MyEnv; reset_rng=true, seed=env.seed) = (env.state = initial_state(env.instance))
+DFLBenchmarks.observe(env::MyEnv) = (env.state, nothing)
+DFLBenchmarks.step!(env::MyEnv, action) = apply_action!(env.state, action)
+DFLBenchmarks.is_terminated(env::MyEnv) = env.state.done
+
+function DFLBenchmarks.generate_environment(bench::MyDynamicBenchmark, rng::AbstractRNG; kwargs...)
+    inst = build_my_instance(rng)
+    seed = rand(rng, Int)
+    return MyEnv(inst, seed, initial_state(inst))
+end
+
+function DFLBenchmarks.generate_baseline_policies(bench::MyDynamicBenchmark)
+    greedy = function(env)
+        samples = DataSample[]
+        reset!(env)
+        while !is_terminated(env)
+            obs, _ = observe(env)
+            x = compute_features(obs)
+            y = greedy_action(obs)
+            r = step!(env, y)
+            push!(samples, DataSample(; x=x, y=y, instance=obs, extra=(; reward=r)))
+        end
+        return samples
+    end
+    return (; greedy)
+end
+```
diff --git a/docs/src/using_benchmarks.md b/docs/src/using_benchmarks.md
new file mode 100644
index 0000000..a1e95dd
--- /dev/null
+++ b/docs/src/using_benchmarks.md
@@ -0,0 +1,173 @@
+# Using Benchmarks
+
+This guide covers everything you need to work with existing benchmarks in
+DecisionFocusedLearningBenchmarks.jl: generating datasets, assembling DFL pipeline
+components, and evaluating results.
+
+---
+
+## DataSample at a glance
+
+All data in the package is represented as [`DataSample`](@ref) objects.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `x` | any | Input features (fed to the statistical model) |
+| `θ` | any | Intermediate cost parameters |
+| `y` | any | Output decision / solution |
+| `context` | `NamedTuple` | Solver kwargs — spread into `maximizer(θ; sample.context...)` |
+| `extra` | `NamedTuple` | Non-solver data (scenario, reward, step, …) — never passed to the solver |
+
+Not all fields are populated in every sample. For convenience, named entries inside
+`context` and `extra` can be accessed directly on the sample via property forwarding:
+
+```julia
+sample.instance   # looks up :instance in context first, then in extra
+sample.scenario   # looks up :scenario in context first, then in extra
+```
+
+---
+
+## Generating datasets for training
+
+### Static benchmarks
+
+For static benchmarks (`<:AbstractBenchmark`) the framework already computes the
+ground-truth label `y`:
+
+```julia
+bench = ArgmaxBenchmark()
+dataset = generate_dataset(bench, 100; seed=0)   # Vector{DataSample} with x, y, context
+```
+
+You can override the labels by providing a `target_policy`:
+
+```julia
+my_policy = sample -> DataSample(; sample.context..., x=sample.x,
+                                   y=my_algorithm(sample.instance))
+dataset = generate_dataset(bench, 100; seed=0, target_policy=my_policy)
+```
+
+### Stochastic benchmarks (exogenous)
+
+For `AbstractStochasticBenchmark{true}` benchmarks the default call returns
+*unlabeled* samples, each sample carries one scenario in `sample.extra.scenario`:
+
+```julia
+bench   = StochasticVehicleSchedulingBenchmark()
+dataset = generate_dataset(bench, 20; seed=0)   # y = nothing
+```
+
+Request multiple scenarios per instance with `nb_scenarios`:
+
+```julia
+dataset = generate_dataset(bench, 20; seed=0, nb_scenarios=5)
+# returns 20 × 5 = 100 samples
+```
+
+To compute labels, wrap your algorithm as a `target_policy`:
+
+```julia
+anticipative = generate_anticipative_solver(bench)   # (scenario; kwargs...) -> y
+
+policy = (sample, scenarios) -> [
+    DataSample(; sample.context..., x=sample.x,
+                 y=anticipative(ξ; sample.context...))
+    for ξ in scenarios
+]
+labeled = generate_dataset(bench, 20; seed=0, nb_scenarios=5, target_policy=policy)
+```
+
+### Dynamic benchmarks
+
+Dynamic benchmarks use a two-step workflow:
+
+```julia
+bench = DynamicVehicleSchedulingBenchmark()
+
+# Step 1 — create environments (reusable across experiments)
+envs = generate_environments(bench, 10; seed=0)
+
+# Step 2 — roll out a policy to collect training trajectories
+policy = generate_baseline_policies(bench)[1]          # e.g. lazy policy
+dataset = generate_dataset(bench, envs; target_policy=policy)
+# dataset is a flat Vector{DataSample} of all steps across all trajectories
+```
+
+`target_policy` is **required** for dynamic benchmarks (there is no default label).
+It must be a callable `(env) -> Vector{DataSample}` that performs a full episode
+rollout and returns the resulting trajectory.
+
+### Seed / RNG control
+
+All `generate_dataset` and `generate_environments` calls accept either `seed`
+(creates an internal `MersenneTwister`) or `rng` for full control:
+
+```julia
+using Random
+rng = MersenneTwister(42)
+dataset = generate_dataset(bench, 50; rng=rng)
+```
+
+---
+
+## DFL pipeline components
+
+```julia
+model = generate_statistical_model(bench; seed=0)   # untrained Flux model
+maximizer = generate_maximizer(bench)                   # combinatorial oracle
+```
+
+These two pieces compose naturally:
+
+```julia
+θ = model(sample.x)                  # predict cost parameters
+y = maximizer(θ; sample.context...)      # solve the optimization problem
+```
+
+---
+
+## Evaluation
+
+```julia
+# Average relative optimality gap across a dataset
+gap = compute_gap(bench, dataset, model, maximizer)
+```
+
+# Objective value for a single decision
+```julia
+obj = objective_value(bench, sample, y)
+```
+
+---
+
+## Baseline policies
+
+`generate_baseline_policies` returns a collection of named callables that can serve as
+reference points or as `target_policy` arguments:
+
+```julia
+policies = generate_baseline_policies(bench)
+pol = policies[1]   # e.g. greedy, lazy, or anticipative policy
+```
+
+- **Static / stochastic:** `pol(sample) -> DataSample`
+- **Dynamic:** `pol(env) -> Vector{DataSample}` (full episode trajectory)
+
+For dynamic benchmarks you can evaluate a policy over multiple episodes:
+
+```julia
+rewards, samples = evaluate_policy!(pol, envs, n_episodes)
+```
+
+---
+
+## Visualization
+
+Where implemented, benchmarks provide benchmark-specific plotting helpers:
+
+```julia
+plot_data(bench, sample)            # overview of a data sample
+plot_instance(bench, instance)      # raw problem instance
+plot_solution(bench, sample, y)     # overlay solution on instance
+```
diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index 2f1c320..6561b7a 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -67,9 +67,10 @@ export AbstractEnvironment, get_seed, is_terminated, observe, reset!, step!
 
 export Policy, evaluate_policy!
 
-export generate_sample, generate_dataset, generate_environments, generate_environment
+export generate_instance,
+    generate_sample, generate_dataset, generate_environments, generate_environment
 export generate_scenario
-export generate_policies
+export generate_baseline_policies
 export generate_statistical_model
 export generate_maximizer
 export generate_anticipative_solution
diff --git a/src/DynamicAssortment/DynamicAssortment.jl b/src/DynamicAssortment/DynamicAssortment.jl
index c943dba..df0e64e 100644
--- a/src/DynamicAssortment/DynamicAssortment.jl
+++ b/src/DynamicAssortment/DynamicAssortment.jl
@@ -110,12 +110,13 @@ end
 """
 $TYPEDSIGNATURES
 
-Creates an [`Environment`](@ref) from an [`Instance`](@ref) of the dynamic assortment benchmark.
-The seed of the environment is randomly generated using the provided random number generator.
+Creates an [`Environment`](@ref) for the dynamic assortment benchmark.
+The instance and seed are randomly generated using the provided random number generator.
 """
 function Utils.generate_environment(
-    ::DynamicAssortmentBenchmark, instance::Instance, rng::AbstractRNG; kwargs...
+    b::DynamicAssortmentBenchmark, rng::AbstractRNG; kwargs...
 )
+    instance = Instance(b, rng)
     seed = rand(rng, 1:typemax(Int))
     return Environment(instance; seed)
 end
@@ -127,7 +128,7 @@ Returns two policies for the dynamic assortment benchmark:
 - `Greedy`: selects the assortment containing items with the highest prices
 - `Expert`: selects the assortment with the highest expected revenue (through brute-force enumeration)
 """
-function Utils.generate_policies(::DynamicAssortmentBenchmark)
+function Utils.generate_baseline_policies(::DynamicAssortmentBenchmark)
     greedy = Policy(
         "Greedy",
         "policy that selects the assortment with items with the highest prices",
diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
index fb0ea7e..1eba500 100644
--- a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
+++ b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -18,6 +18,25 @@ using Random: Random, AbstractRNG, MersenneTwister, seed!, randperm
 using Requires: @require
 using Statistics: mean, quantile
 
+"""
+$TYPEDEF
+
+Abstract type for dynamic vehicle scheduling benchmarks.
+
+# Fields
+$TYPEDFIELDS
+"""
+@kwdef struct DynamicVehicleSchedulingBenchmark <: AbstractDynamicBenchmark{true}
+    "maximum number of customers entering the system per epoch"
+    max_requests_per_epoch::Int = 10
+    "time between decision and dispatch of a vehicle"
+    Δ_dispatch::Float64 = 1.0
+    "duration of an epoch"
+    epoch_duration::Float64 = 1.0
+    "whether to use two-dimensional features"
+    two_dimensional_features::Bool = false
+end
+
 include("utils.jl")
 
 # static vsp stuff
@@ -38,46 +57,35 @@ include("anticipative_solver.jl")
 include("features.jl")
 include("policy.jl")
 
-"""
-$TYPEDEF
-
-Abstract type for dynamic vehicle scheduling benchmarks.
-
-# Fields
-$TYPEDFIELDS
-"""
-@kwdef struct DynamicVehicleSchedulingBenchmark <: AbstractDynamicBenchmark{true}
-    "maximum number of customers entering the system per epoch"
-    max_requests_per_epoch::Int = 10
-    "time between decision and dispatch of a vehicle"
-    Δ_dispatch::Float64 = 1.0
-    "duration of an epoch"
-    epoch_duration::Float64 = 1.0
-    "whether to use two-dimensional features"
-    two_dimensional_features::Bool = false
-end
-
 """
 $TYPEDSIGNATURES
 
-Generate a dataset for the dynamic vehicle scheduling benchmark.
-Returns a vector of [`DataSample`](@ref) objects, each containing an [`Instance`](@ref).
-The dataset is generated from pre-existing DVRPTW files.
+Generate environments for the dynamic vehicle scheduling benchmark.
+Reads from pre-existing DVRPTW files and creates [`DVSPEnv`](@ref) environments.
 """
-function Utils.generate_dataset(b::DynamicVehicleSchedulingBenchmark, dataset_size::Int=1)
+function Utils.generate_environments(
+    b::DynamicVehicleSchedulingBenchmark,
+    n::Int;
+    seed=nothing,
+    rng=MersenneTwister(seed),
+    kwargs...,
+)
     (; max_requests_per_epoch, Δ_dispatch, epoch_duration, two_dimensional_features) = b
     files = readdir(datadep"dvrptw"; join=true)
-    dataset_size = min(dataset_size, length(files))
+    n = min(n, length(files))
     return [
-        DataSample(;
-            instance=Instance(
+        generate_environment(
+            b,
+            Instance(
                 read_vsp_instance(files[i]);
                 max_requests_per_epoch,
                 Δ_dispatch,
                 epoch_duration,
                 two_dimensional_features,
             ),
-        ) for i in 1:dataset_size
+            rng;
+            kwargs...,
+        ) for i in 1:n
     ]
 end
 
@@ -87,7 +95,7 @@ $TYPEDSIGNATURES
 Creates an environment from an [`Instance`](@ref) of the dynamic vehicle scheduling benchmark.
 The seed of the environment is randomly generated using the provided random number generator.
 """
-function Utils.generate_environment(
+function generate_environment(
     ::DynamicVehicleSchedulingBenchmark, instance::Instance, rng::AbstractRNG; kwargs...
 )
     seed = rand(rng, 1:typemax(Int))
@@ -107,16 +115,6 @@ end
 """
 $TYPEDSIGNATURES
 
-Generate a scenario for the dynamic vehicle scheduling benchmark.
-This is a wrapper around the generic scenario generation function.
-"""
-function Utils.generate_scenario(b::DynamicVehicleSchedulingBenchmark, args...; kwargs...)
-    return Utils.generate_scenario(args...; kwargs...)
-end
-
-"""
-$TYPEDSIGNATURES
-
 Generate an anticipative solution for the dynamic vehicle scheduling benchmark.
 The solution is computed using the anticipative solver with the benchmark's feature configuration.
 """
@@ -131,12 +129,27 @@ end
 """
 $TYPEDSIGNATURES
 
+Return the anticipative solver for the dynamic vehicle scheduling benchmark.
+The callable takes a scenario and solver kwargs (including `instance`) and returns a
+training trajectory as a `Vector{DataSample}`.
+"""
+function Utils.generate_anticipative_solver(::DynamicVehicleSchedulingBenchmark)
+    return (scenario; instance, kwargs...) -> begin
+        env = DVSPEnv(instance, scenario)
+        _, trajectory = anticipative_solver(env; reset_env=false, kwargs...)
+        return trajectory
+    end
+end
+
+"""
+$TYPEDSIGNATURES
+
 Generate baseline policies for the dynamic vehicle scheduling benchmark.
 Returns a tuple containing:
 - `lazy`: A policy that dispatches vehicles only when they are ready
 - `greedy`: A policy that dispatches vehicles to the nearest customer
 """
-function Utils.generate_policies(b::DynamicVehicleSchedulingBenchmark)
+function Utils.generate_baseline_policies(::DynamicVehicleSchedulingBenchmark)
     lazy = Policy(
         "Lazy",
         "Lazy policy that dispatches vehicles only when they are ready.",
diff --git a/src/DynamicVehicleScheduling/environment.jl b/src/DynamicVehicleScheduling/environment.jl
index 339bd78..a7f4c17 100644
--- a/src/DynamicVehicleScheduling/environment.jl
+++ b/src/DynamicVehicleScheduling/environment.jl
@@ -23,6 +23,17 @@ function DVSPEnv(instance::Instance; seed=nothing)
     return DVSPEnv(instance, initial_state, scenario, rng, seed)
 end
 
+"""
+$TYPEDSIGNATURES
+
+Constructor for [`DVSPEnv`](@ref) from a pre-existing scenario.
+"""
+function DVSPEnv(instance::Instance, scenario::Scenario; seed=nothing)
+    rng = MersenneTwister(seed)
+    initial_state = DVSPState(instance; scenario[1]...)
+    return DVSPEnv(instance, initial_state, scenario, rng, seed)
+end
+
 currrent_epoch(env::DVSPEnv) = current_epoch(env.state)
 epoch_duration(env::DVSPEnv) = epoch_duration(env.instance)
 last_epoch(env::DVSPEnv) = last_epoch(env.instance)
diff --git a/src/DynamicVehicleScheduling/scenario.jl b/src/DynamicVehicleScheduling/scenario.jl
index 4f7746e..eb189e8 100644
--- a/src/DynamicVehicleScheduling/scenario.jl
+++ b/src/DynamicVehicleScheduling/scenario.jl
@@ -1,4 +1,3 @@
-
 struct Scenario
     "indices of the new requests in each epoch"
     indices::Vector{Vector{Int}}
@@ -51,6 +50,8 @@ function Utils.generate_scenario(
     return Scenario(new_indices, new_service_time, new_start_time)
 end
 
-function Utils.generate_scenario(sample::DataSample; kwargs...)
-    return Utils.generate_scenario(sample.instance; kwargs...)
+function Utils.generate_scenario(
+    ::DynamicVehicleSchedulingBenchmark, rng::AbstractRNG; instance, kwargs...
+)
+    return generate_scenario(instance; rng)
 end
diff --git a/src/FixedSizeShortestPath/FixedSizeShortestPath.jl b/src/FixedSizeShortestPath/FixedSizeShortestPath.jl
index 3a350e5..ee0586a 100644
--- a/src/FixedSizeShortestPath/FixedSizeShortestPath.jl
+++ b/src/FixedSizeShortestPath/FixedSizeShortestPath.jl
@@ -133,7 +133,10 @@ $TYPEDSIGNATURES
 
 Initialize a linear model for `bench` using `Flux`.
 """
-function Utils.generate_statistical_model(bench::FixedSizeShortestPathBenchmark)
+function Utils.generate_statistical_model(
+    bench::FixedSizeShortestPathBenchmark; seed=nothing
+)
+    Random.seed!(seed)
     (; p, graph) = bench
     return Chain(Dense(p, ne(graph)))
 end
diff --git a/src/Maintenance/Maintenance.jl b/src/Maintenance/Maintenance.jl
index 5dc0580..64e5ec5 100644
--- a/src/Maintenance/Maintenance.jl
+++ b/src/Maintenance/Maintenance.jl
@@ -114,12 +114,11 @@ end
 """
 $TYPEDSIGNATURES
 
-Creates an [`Environment`](@ref) from an [`Instance`](@ref) of the maintenance benchmark.
-The seed of the environment is randomly generated using the provided random number generator.
+Creates an [`Environment`](@ref) for the maintenance benchmark.
+The instance and seed are randomly generated using the provided random number generator.
 """
-function Utils.generate_environment(
-    ::MaintenanceBenchmark, instance::Instance, rng::AbstractRNG; kwargs...
-)
+function Utils.generate_environment(b::MaintenanceBenchmark, rng::AbstractRNG; kwargs...)
+    instance = Instance(b, rng)
     seed = rand(rng, 1:typemax(Int))
     return Environment(instance; seed)
 end
@@ -130,7 +129,7 @@ $TYPEDSIGNATURES
 Returns two policies for the dynamic assortment benchmark:
 - `Greedy`: maintains components when they are in the last state before failure, up to the maintenance capacity
 """
-function Utils.generate_policies(::MaintenanceBenchmark)
+function Utils.generate_baseline_policies(::MaintenanceBenchmark)
     greedy = Policy(
         "Greedy",
         "policy that maintains components when they are in the last state before failure, up to the maintenance capacity",
diff --git a/src/PortfolioOptimization/PortfolioOptimization.jl b/src/PortfolioOptimization/PortfolioOptimization.jl
index f79f488..37631eb 100644
--- a/src/PortfolioOptimization/PortfolioOptimization.jl
+++ b/src/PortfolioOptimization/PortfolioOptimization.jl
@@ -7,7 +7,7 @@ using Flux: Chain, Dense
 using Ipopt: Ipopt
 using JuMP: @variable, @objective, @constraint, optimize!, value, Model, set_silent
 using LinearAlgebra: I
-using Random: AbstractRNG, MersenneTwister
+using Random: Random, AbstractRNG, MersenneTwister
 
 """
 $TYPEDEF
@@ -107,7 +107,10 @@ $TYPEDSIGNATURES
 
 Initialize a linear model for `bench` using `Flux`.
 """
-function Utils.generate_statistical_model(bench::PortfolioOptimizationBenchmark)
+function Utils.generate_statistical_model(
+    bench::PortfolioOptimizationBenchmark; seed=nothing
+)
+    Random.seed!(seed)
     (; p, d) = bench
     return Dense(p, d)
 end
diff --git a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
index b9f2099..f8ba775 100644
--- a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
+++ b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
@@ -73,32 +73,29 @@ end
 """
 $TYPEDSIGNATURES
 
-Generate a sample for the given `StochasticVehicleSchedulingBenchmark`.
-If you want to not add label solutions in the sample, set `compute_solutions=false`.
-By default, they will be computed using column generation.
-Note that computing solutions can be time-consuming, especially for large instances.
-You can also use instead `compact_mip` or `compact_linearized_mip` as the algorithm to compute solutions.
-If you want to provide a custom algorithm to compute solutions, you can pass it as the `algorithm` keyword argument.
-If `algorithm` takes keyword arguments, you can pass them as well directly in `kwargs...`.
-If `store_city=false`, the coordinates and unnecessary information about instances will not be stored in the sample.
-"""
-function Utils.generate_sample(
+Generate an unlabeled instance for the given `StochasticVehicleSchedulingBenchmark`.
+Returns a [`DataSample`](@ref) with features `x` and `instance` set, but `y=nothing`.
+
+To obtain labeled samples, pass a `target_policy` to [`generate_dataset`](@ref):
+
+```julia
+policy = sample -> DataSample(; sample.context..., x=sample.x,
+                                y=column_generation_algorithm(sample.instance))
+dataset = generate_dataset(benchmark, N; target_policy=policy)
+```
+
+If `store_city=false`, coordinates and city information are not stored in the instance.
+"""
+function Utils.generate_instance(
     benchmark::StochasticVehicleSchedulingBenchmark,
     rng::AbstractRNG;
     store_city=true,
-    compute_solutions=true,
-    algorithm=column_generation_algorithm,
     kwargs...,
 )
     (; nb_tasks, nb_scenarios) = benchmark
     instance = Instance(; nb_tasks, nb_scenarios, rng, store_city)
     x = get_features(instance)
-    y_true = if compute_solutions
-        algorithm(instance; kwargs...)
-    else
-        nothing
-    end
-    return DataSample(; x, instance, y=y_true)
+    return DataSample(; x, instance)
 end
 
 """
diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl
index 0989c85..89a6c67 100644
--- a/src/Utils/Utils.jl
+++ b/src/Utils/Utils.jl
@@ -27,11 +27,11 @@ export TopKMaximizer
 export AbstractEnvironment, get_seed, is_terminated, observe, reset!, step!
 
 export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark
-export generate_sample, generate_dataset
+export generate_instance, generate_sample, generate_dataset
 export generate_statistical_model, generate_maximizer
 export generate_scenario
 export generate_environment, generate_environments
-export generate_policies
+export generate_baseline_policies
 export generate_anticipative_solution
 
 export plot_data, compute_gap
diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl
index 6d23f12..23d6eb5 100644
--- a/src/Utils/interface.jl
+++ b/src/Utils/interface.jl
@@ -3,69 +3,111 @@ $TYPEDEF
 
 Abstract type interface for benchmark problems.
 
-The following methods are mandatory for benchmarks:
-- [`generate_dataset`](@ref) or [`generate_sample`](@ref)
+# Mandatory methods to implement for any benchmark:
+Choose one of three primary implementation strategies:
+- Implement [`generate_instance`](@ref) (returns a [`DataSample`](@ref) with `y=nothing`).
+  The default [`generate_sample`](@ref) then applies `target_policy` if provided.
+- Override [`generate_sample`](@ref) directly when the sample requires custom logic. In this case,
+  [`generate_dataset`](@ref) applies `target_policy` to the result after the call returns.
+- Override [`generate_dataset`](@ref) directly when samples cannot be drawn independently.
+
+Also implement:
 - [`generate_statistical_model`](@ref)
 - [`generate_maximizer`](@ref)
 
-The following methods are optional:
-- [`plot_data`](@ref)
-- [`objective_value`](@ref)
-- [`compute_gap`](@ref)
+# Optional methods (defaults provided)
+- [`is_minimization_problem`](@ref): defaults to `true`
+- [`objective_value`](@ref): defaults to `dot(θ, y)`
+- [`compute_gap`](@ref): default implementation provided; override for custom evaluation
+
+# Optional methods (no default)
+- [`plot_data`](@ref), [`plot_instance`](@ref), [`plot_solution`](@ref)
+- [`generate_baseline_policies`](@ref)
 """
 abstract type AbstractBenchmark end
 
 """
-    generate_sample(::AbstractBenchmark, rng::AbstractRNG; kwargs...) -> DataSample
+    generate_instance(::AbstractBenchmark, rng::AbstractRNG; kwargs...) -> DataSample
 
-Generate a single [`DataSample`](@ref) for given benchmark.
-This is a low-level function that is used by [`generate_dataset`](@ref) to create
-a dataset of samples. It is not mandatory to implement this method, but it is
-recommended for benchmarks that have a well-defined way to generate individual samples.
-An alternative is to directly implement [`generate_dataset`](@ref) to create a dataset
-without generating individual samples.
+Generate a single unlabeled [`DataSample`](@ref) (with `y=nothing`) for the benchmark.
 """
-function generate_sample end
+function generate_instance(bench::AbstractBenchmark, rng::AbstractRNG; kwargs...)
+    return error(
+        "`generate_instance` is not implemented for $(typeof(bench)). " *
+        "Implement `generate_instance(::$(typeof(bench)), rng; kwargs...) -> DataSample` " *
+        "or override `generate_sample` directly.",
+    )
+end
 
 """
-    generate_dataset(::AbstractBenchmark, dataset_size::Int; kwargs...) -> Vector{<:DataSample}
+    generate_sample(::AbstractBenchmark, rng::AbstractRNG; target_policy=nothing, kwargs...) -> DataSample
+
+Generate a single [`DataSample`](@ref) for the benchmark.
+
+**Framework default** (when [`generate_instance`](@ref) is implemented):
+Calls [`generate_instance`](@ref), then applies `target_policy(sample)` if provided.
+
+Override directly (instead of implementing [`generate_instance`](@ref)) when the sample
+requires custom logic. In this case, [`generate_dataset`](@ref) applies `target_policy`
+after the call returns.
+"""
+function generate_sample(bench::AbstractBenchmark, rng; target_policy=nothing, kwargs...)
+    sample = generate_instance(bench, rng; kwargs...)
+    return isnothing(target_policy) ? sample : target_policy(sample)
+end
+
+"""
+    generate_dataset(::AbstractBenchmark, dataset_size::Int; target_policy=nothing, kwargs...) -> Vector{<:DataSample}
 
 Generate a `Vector` of [`DataSample`](@ref) of length `dataset_size` for given benchmark.
 Content of the dataset can be visualized using [`plot_data`](@ref), when it applies.
 
-By default, it uses [`generate_sample`](@ref) to create each sample in the dataset, and passes any keyword arguments to it.
+By default, it uses [`generate_sample`](@ref) to create each sample in the dataset, and passes any
+keyword arguments to it. If `target_policy` is provided, it is applied to each sample after
+[`generate_sample`](@ref) returns.
 """
 function generate_dataset(
     bench::AbstractBenchmark,
     dataset_size::Int;
+    target_policy=nothing,
     seed=nothing,
     rng=MersenneTwister(seed),
     kwargs...,
 )
     Random.seed!(rng, seed)
-    return [generate_sample(bench, rng; kwargs...) for _ in 1:dataset_size]
+    return [
+        begin
+            sample = generate_sample(bench, rng; kwargs...)
+            isnothing(target_policy) ? sample : target_policy(sample)
+        end for _ in 1:dataset_size
+    ]
 end
 
 """
     generate_maximizer(::AbstractBenchmark; kwargs...)
 
-Generates a maximizer function.
-Returns a callable f: (θ; kwargs...) -> y, where θ is a cost array and y is a solution.
+Returns a callable `f(θ; kwargs...) -> y`, solving a maximization problem.
 """
 function generate_maximizer end
 
 """
-    generate_statistical_model(::AbstractBenchmark; kwargs...)
+    generate_statistical_model(::AbstractBenchmark, seed=nothing; kwargs...)
 
-Initializes and return an untrained statistical model of the CO-ML pipeline.
-It's usually a Flux model, that takes a feature matrix x as input, and returns a cost array θ as output.
+Returns an untrained statistical model (usually a Flux neural network) that maps a
+feature matrix `x` to an output array `θ`. The `seed` parameter controls initialization
+randomness for reproducibility.
 """
 function generate_statistical_model end
 
 """
-    generate_policies(::AbstractBenchmark) -> Vector{Policy}
+    generate_baseline_policies(::AbstractBenchmark) -> NamedTuple or Tuple
+
+Return named baseline policies for the benchmark. Each policy is a callable.
+
+- For static/stochastic benchmarks: signature `(sample) -> DataSample`.
+- For dynamic benchmarks: signature `(env) -> Vector{DataSample}` (full trajectory).
 """
-function generate_policies end
+function generate_baseline_policies end
 
 """
     plot_data(::AbstractBenchmark, ::DataSample; kwargs...)
@@ -99,7 +141,7 @@ function compute_gap end
 """
 $TYPEDSIGNATURES
 
-Default behaviour of `objective_value`.
+Compute `dot(θ, y)`. Override for non-linear objectives.
 """
 function objective_value(::AbstractBenchmark, θ::AbstractArray, y::AbstractArray)
     return dot(θ, y)
@@ -139,7 +181,8 @@ end
 """
 $TYPEDSIGNATURES
 
-Default behaviour of `compute_gap` for a benchmark problem where `features`, `solutions` and `costs` are all defined.
+Default implementation of [`compute_gap`](@ref): average relative optimality gap over `dataset`.
+Requires samples with `x`, `θ`, and `y` fields. Override for custom evaluation logic.
 """
 function compute_gap(
     bench::AbstractBenchmark,
@@ -166,11 +209,43 @@ end
 """
 $TYPEDEF
 
-Abstract type interface for stochastic benchmark problems.
-This type should be used for benchmarks that involve single stage stochastic optimization problems.
+Abstract type interface for single-stage stochastic benchmark problems.
+
+A stochastic benchmark separates the problem into an **instance** (the
+context known before the scenario is revealed) and a **random scenario** (the uncertain
+part). Decisions are taken by seeing only the instance. Scenarios are used to generate
+anticipative targets and compute objective values.
+
+# Required methods (exogenous benchmarks, `{true}` only)
+- [`generate_instance`](@ref)`(bench, rng)`: returns a [`DataSample`](@ref) with instance
+  and features but **no scenario**. Scenarios are added later by [`generate_dataset`](@ref)
+  via [`generate_scenario`](@ref).
+- [`generate_scenario`](@ref)`(bench, rng; kwargs...)`: draws a random scenario.
+  Instance and context fields are passed as keyword arguments spread from `sample.context`.
+
+# Optional methods
+- [`generate_anticipative_solver`](@ref)`(bench)`: returns a callable
+  `(scenario; kwargs...) -> y` that computes the anticipative solution per scenario.
+- [`generate_parametric_anticipative_solver`](@ref)`(bench)`: returns a callable
+  `(θ, scenario; kwargs...) -> y` for the parametric anticipative subproblem
+  `argmin_{y ∈ Y} c(y, scenario) + θᵀy`.
+
+# Dataset generation (exogenous only)
+[`generate_dataset`](@ref) is specialised for `AbstractStochasticBenchmark{true}` and
+supports all three standard structures via `nb_scenarios`:
 
-It follows the same interface as [`AbstractBenchmark`](@ref), with the addition of the following methods:
-- TODO
+| Setting | Call |
+|---------|------|
+| 1 instance with K scenarios  | `generate_dataset(bench, 1; nb_scenarios=K)` |
+| N instances with 1 scenario  | `generate_dataset(bench, N)` (default) |
+| N instances with K scenarios | `generate_dataset(bench, N; nb_scenarios=K)` |
+
+By default (no `target_policy`), each [`DataSample`](@ref) has `context` holding the
+instance (solver kwargs) and `extra=(; scenario)` holding one scenario.
+
+Provide a `target_policy(sample, scenarios) -> Vector{DataSample}` to compute labels.
+This covers both anticipative (K samples, one per scenario) and SAA (1 sample from all K
+scenarios) labeling strategies.
 """
 abstract type AbstractStochasticBenchmark{exogenous} <: AbstractBenchmark end
 
@@ -178,59 +253,219 @@ is_exogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = exoge
 is_endogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = !exogenous
 
 """
-    generate_scenario(::AbstractStochasticBenchmark{true}, instance; kwargs...)
+    generate_scenario(::AbstractStochasticBenchmark{true}, rng::AbstractRNG; kwargs...) -> scenario
+
+Draw a random scenario. Instance and context fields are passed as keyword arguments,
+spread from `sample.context`:
+
+    scenario = generate_scenario(bench, rng; sample.context...)
 """
 function generate_scenario end
 
 """
-    generate_anticipative_solution(::AbstractStochasticBenchmark{true}, instance, scenario; kwargs...)
+    generate_anticipative_solver(::AbstractStochasticBenchmark{true}) -> callable
+
+Return a callable that computes the anticipative solution for a given scenario.
+The instance and other solver-relevant fields are spread from the sample context.
+
+- For [`AbstractStochasticBenchmark`](@ref): returns `(scenario; context...) -> y`.
+- For [`AbstractDynamicBenchmark`](@ref): returns
+  `(scenario; context...) -> Vector{DataSample}` — a full training trajectory.
+
+    solver = generate_anticipative_solver(bench)
+    y          = solver(scenario; sample.context...)  # stochastic
+    trajectory = solver(scenario; sample.context...)  # dynamic
+"""
+function generate_anticipative_solver end
+
+"""
+    generate_parametric_anticipative_solver(::AbstractStochasticBenchmark{true}) -> callable
+
+**Optional.** Return a callable `(θ, scenario; kwargs...) -> y` that solves the
+parametric anticipative subproblem:
+
+    argmin_{y ∈ Y(instance)}  c(y, scenario) + θᵀy
+"""
+function generate_parametric_anticipative_solver end
+
+"""
+    generate_anticipative_solution(::AbstractStochasticBenchmark, instance, scenario; kwargs...)
+
+!!! warning "Deprecated"
+    Use [`generate_anticipative_solver`](@ref) instead, which returns a callable
+    `(scenario; kwargs...) -> y` consistent with the [`generate_maximizer`](@ref)
+    convention.
 """
 function generate_anticipative_solution end
 
+"""
+$TYPEDSIGNATURES
+
+Default [`generate_sample`](@ref) for exogenous stochastic benchmarks.
+
+Calls [`generate_instance`](@ref), draws `nb_scenarios` scenarios via
+[`generate_scenario`](@ref), then:
+- Without `target_policy`: returns K unlabeled samples, each with one scenario in
+  `extra=(; scenario=ξ)`.
+- With `target_policy`: calls `target_policy(sample, scenarios)` and returns the result.
+
+`target_policy(sample, scenarios) -> Vector{DataSample}` enables anticipative labeling
+(K samples, one per scenario) or SAA (1 sample aggregating all K scenarios).
+"""
+function generate_sample(
+    bench::AbstractStochasticBenchmark{true},
+    rng;
+    target_policy=nothing,
+    nb_scenarios::Int=1,
+    kwargs...,
+)
+    sample = generate_instance(bench, rng; kwargs...)
+    scenarios = [generate_scenario(bench, rng; sample.context...) for _ in 1:nb_scenarios]
+    if isnothing(target_policy)
+        return [
+            DataSample(; x=sample.x, θ=sample.θ, sample.context..., extra=(; scenario=ξ))
+            for ξ in scenarios
+        ]
+    else
+        return target_policy(sample, scenarios)
+    end
+end
+
+"""
+$TYPEDSIGNATURES
+
+Specialised [`generate_dataset`](@ref) for exogenous stochastic benchmarks.
+
+Generates `nb_instances` problem instances, each with `nb_scenarios` independent
+scenario draws. The scenario→sample mapping is controlled by the `target_policy`:
+- Without `target_policy` (default): K scenarios produce K unlabeled samples (1:1).
+- With `target_policy(sample, scenarios) -> Vector{DataSample}`: enables anticipative
+  labeling (K labeled samples) or SAA (1 sample aggregating all K scenarios).
+
+# Keyword arguments
+- `nb_scenarios::Int = 1`: scenarios per instance (K).
+- `target_policy`: when provided, called as `target_policy(sample, scenarios)` to
+  compute labels. Defaults to `nothing` (unlabeled samples).
+- `seed`: passed to `MersenneTwister` when `rng` is not provided.
+- `rng`: random number generator; overrides `seed` when provided.
+- `kwargs...`: forwarded to [`generate_sample`](@ref).
+"""
+function generate_dataset(
+    bench::AbstractStochasticBenchmark{true},
+    nb_instances::Int;
+    target_policy=nothing,
+    nb_scenarios::Int=1,
+    seed=nothing,
+    rng=MersenneTwister(seed),
+    kwargs...,
+)
+    Random.seed!(rng, seed)
+    samples = DataSample[]
+    for _ in 1:nb_instances
+        new_samples = generate_sample(bench, rng; target_policy, nb_scenarios, kwargs...)
+        append!(samples, new_samples)
+    end
+    return samples
+end
+
 """
 $TYPEDEF
 
-Abstract type interface for dynamic benchmark problems.
-This type should be used for benchmarks that involve multi-stage stochastic optimization problems.
+Abstract type interface for multi-stage stochastic (dynamic) benchmark problems.
+
+Extends [`AbstractStochasticBenchmark`](@ref). The `{exogenous}` parameter retains its
+meaning (whether uncertainty is independent of decisions).
 
-It follows the same interface as [`AbstractStochasticBenchmark`](@ref), with the addition of the following methods:
-TODO
+# Primary entry point
+- [`generate_environments`](@ref)`(bench, n; rng)`: mandatory (or implement
+  [`generate_environment`](@ref)`(bench, rng)`). The count-based default calls
+  [`generate_environment`](@ref) once per environment.
+
+# Additional optional methods
+- [`generate_environment`](@ref)`(bench, rng)`: initialize a single rollout environment.
+  Implement this instead of overriding [`generate_environments`](@ref) when environments
+  can be drawn independently.
+- [`generate_baseline_policies`](@ref)`(bench)`: returns named baseline callables of
+  signature `(env) -> Vector{DataSample}` (full trajectory rollout).
+- [`generate_dataset`](@ref)`(bench, environments; target_policy, ...)`: generates
+  training-ready [`DataSample`](@ref)s by calling `target_policy(env)` for each environment.
+  Requires `target_policy` as a mandatory keyword argument.
 """
 abstract type AbstractDynamicBenchmark{exogenous} <: AbstractStochasticBenchmark{exogenous} end
 
 """
-    generate_environment(::AbstractDynamicBenchmark, instance, rng::AbstractRNG; kwargs...)
+    generate_environment(::AbstractDynamicBenchmark, rng::AbstractRNG; kwargs...)
 
-Initialize an environment for the given dynamic benchmark instance.
+Initialize a single environment for the given dynamic benchmark.
+Primary implementation target for the count-based [`generate_environments`](@ref) default.
+Override [`generate_environments`](@ref) directly when environments cannot be drawn
+independently (e.g. loading from files).
 """
 function generate_environment end
 
 """
 $TYPEDSIGNATURES
 
-Default behaviour of `generate_environment` applied to a data sample.
-Uses the info field of the sample as the instance.
+Generate `n` environments for the given dynamic benchmark.
+Primary entry point for dynamic training algorithms.
+Override when environments cannot be drawn independently (e.g. loading from files).
 """
-function generate_environment(
-    bench::AbstractDynamicBenchmark, sample::DataSample, rng::AbstractRNG; kwargs...
+function generate_environments(
+    bench::AbstractDynamicBenchmark,
+    n::Int;
+    seed=nothing,
+    rng=MersenneTwister(seed),
+    kwargs...,
 )
-    return generate_environment(bench, sample.instance, rng; kwargs...)
+    Random.seed!(rng, seed)
+    return [generate_environment(bench, rng; kwargs...) for _ in 1:n]
 end
 
 """
 $TYPEDSIGNATURES
 
-Generate a vector of environments for the given dynamic benchmark and dataset.
+Generate a training dataset from pre-built environments for an exogenous dynamic benchmark.
+
+For each environment, calls `target_policy(env)` to obtain a training trajectory
+(`Vector{DataSample}`). The trajectories are concatenated into a flat dataset.
+
+`target_policy` is a **required** keyword argument. Use [`generate_baseline_policies`](@ref)
+to obtain standard baseline callables (e.g. the anticipative solver).
+
+# Keyword arguments
+- `target_policy`: **required** callable `(env) -> Vector{DataSample}`.
+- `seed`: passed to `MersenneTwister` when `rng` is not provided.
+- `rng`: random number generator.
 """
-function generate_environments(
-    bench::AbstractDynamicBenchmark,
-    dataset::AbstractArray;
+function generate_dataset(
+    bench::AbstractDynamicBenchmark{true},
+    environments::AbstractVector;
+    target_policy,
     seed=nothing,
     rng=MersenneTwister(seed),
     kwargs...,
 )
     Random.seed!(rng, seed)
-    return map(dataset) do sample
-        generate_environment(bench, sample, rng; kwargs...)
+    samples = DataSample[]
+    for env in environments
+        trajectory = target_policy(env)
+        append!(samples, trajectory)
     end
+    return samples
+end
+
+"""
+$TYPEDSIGNATURES
+
+Convenience wrapper for exogenous dynamic benchmarks: generates `n` environments
+via [`generate_environments`](@ref), then calls
+[`generate_dataset`](@ref)`(bench, environments; target_policy, ...)`.
+
+`target_policy` is a **required** keyword argument.
+"""
+function generate_dataset(
+    bench::AbstractDynamicBenchmark{true}, n::Int; target_policy, seed=nothing, kwargs...
+)
+    environments = generate_environments(bench, n; seed)
+    return generate_dataset(bench, environments; target_policy, seed, kwargs...)
 end
diff --git a/test/dynamic_assortment.jl b/test/dynamic_assortment.jl
index 1504421..a1a136e 100644
--- a/test/dynamic_assortment.jl
+++ b/test/dynamic_assortment.jl
@@ -276,11 +276,10 @@ end
     b = DynamicAssortmentBenchmark(; N=5, d=2, K=3, max_steps=20)
 
     # Generate test data
-    dataset = generate_dataset(b, 10; seed=0)
-    environments = generate_environments(b, dataset)
+    environments = generate_environments(b, 10; seed=0)
 
     # Get policies
-    policies = generate_policies(b)
+    policies = generate_baseline_policies(b)
     expert = policies[1]
     greedy = policies[2]
 
@@ -311,6 +310,28 @@ end
     @test sum(greedy_action) == DAP.assortment_size(env)
 end
 
+@testset "DynamicAssortment - generate_dataset with environments (exogenous)" begin
+    b = DynamicAssortmentBenchmark(; N=4, d=2, K=2, max_steps=10, exogenous=true)
+    envs = generate_environments(b, 4; seed=0)
+    policies = generate_baseline_policies(b)
+    expert = policies[1]
+
+    # target_policy: env -> Vector{DataSample} (full trajectory)
+    target_policy = env -> evaluate_policy!(expert, env)[2]
+
+    # vector-of-environments overload
+    dataset = generate_dataset(b, envs; target_policy=target_policy)
+    @test dataset isa Vector{DataSample}
+    @test !isempty(dataset)
+    @test all(!isnothing(s.x) for s in dataset)
+    @test all(!isnothing(s.y) for s in dataset)
+
+    # count-based wrapper
+    dataset2 = generate_dataset(b, 3; seed=7, target_policy=target_policy)
+    @test dataset2 isa Vector{DataSample}
+    @test !isempty(dataset2)
+end
+
 @testset "DynamicAssortment - Model and Maximizer Integration" begin
     b = DynamicAssortmentBenchmark(; N=4, d=3, K=2)
 
@@ -323,11 +344,10 @@ end
     sample = generate_sample(b, MersenneTwister(42))
     @test hasfield(typeof(sample), :context)
 
-    dataset = generate_dataset(b, 3; seed=42)
-    environments = generate_environments(b, dataset)
+    environments = generate_environments(b, 3; seed=42)
 
     # Evaluate policy to get data samples
-    policies = generate_policies(b)
+    policies = generate_baseline_policies(b)
     _, data_samples = evaluate_policy!(policies[1], environments)
 
     # Test model-maximizer pipeline
diff --git a/test/dynamic_vsp.jl b/test/dynamic_vsp.jl
index 39a00aa..8564a2e 100644
--- a/test/dynamic_vsp.jl
+++ b/test/dynamic_vsp.jl
@@ -8,13 +8,12 @@
     @test is_exogenous(b)
     @test !is_endogenous(b)
 
-    dataset = generate_dataset(b, 10)
-    environments = generate_environments(b, dataset; seed=0)
+    environments = generate_environments(b, 10; seed=0)
 
     env = environments[1]
     get_seed(env)
 
-    policies = generate_policies(b)
+    policies = generate_baseline_policies(b)
     lazy = policies[1]
     greedy = policies[2]
 
@@ -26,8 +25,7 @@
     @test mean(r_lazy) <= mean(r_greedy)
 
     env = environments[1]
-    instance = dataset[1].instance
-    scenario = generate_scenario(b, instance)
+    scenario = env.scenario
     v, y = generate_anticipative_solution(b, env, scenario; nb_epochs=2, reset_env=true)
 
     maximizer = generate_maximizer(b)
@@ -37,8 +35,7 @@
     θ = model(x)
     y = maximizer(θ; instance)
 
-    dataset2 = generate_dataset(b2, 10)
-    environments2 = generate_environments(b2, dataset2; seed=0)
+    environments2 = generate_environments(b2, 10; seed=0)
     env2 = environments2[1]
     x2, instance2 = observe(env2)
     model2 = generate_statistical_model(b2)
@@ -54,3 +51,32 @@
     @test isapprox(cost, anticipative_value; atol=1e-5)
     @test isapprox(cost, cost2; atol=1e-5)
 end
+
+@testset "DVSP - generate_dataset with environments" begin
+    using DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling
+
+    b = DynamicVehicleSchedulingBenchmark(; two_dimensional_features=true)
+    envs = generate_environments(b, 5; seed=0)
+    policies = generate_baseline_policies(b)
+    lazy = policies[1]
+
+    # target_policy takes env -> Vector{DataSample} (full trajectory)
+    target_policy = env -> evaluate_policy!(lazy, env)[2]
+
+    # vector-of-environments overload
+    dataset = generate_dataset(b, envs; target_policy=target_policy)
+    @test dataset isa Vector{DataSample}
+    @test !isempty(dataset)
+    @test all(!isnothing(s.x) for s in dataset)
+    @test all(!isnothing(s.y) for s in dataset)
+
+    # count-based wrapper
+    dataset2 = generate_dataset(b, 3; seed=1, target_policy=target_policy)
+    @test dataset2 isa Vector{DataSample}
+    @test !isempty(dataset2)
+
+    # seed keyword is forwarded: same seed → same dataset
+    dataset3a = generate_dataset(b, 3; seed=42, target_policy=target_policy)
+    dataset3b = generate_dataset(b, 3; seed=42, target_policy=target_policy)
+    @test length(dataset3a) == length(dataset3b)
+end
diff --git a/test/dynamic_vsp_plots.jl b/test/dynamic_vsp_plots.jl
index 1fc822b..345e823 100644
--- a/test/dynamic_vsp_plots.jl
+++ b/test/dynamic_vsp_plots.jl
@@ -4,22 +4,20 @@
 
     # Create test benchmark and data (similar to scripts/a.jl)
     b = DynamicVehicleSchedulingBenchmark(; two_dimensional_features=true)
-    dataset = generate_dataset(b, 3)
-    environments = generate_environments(b, dataset; seed=0)
+    environments = generate_environments(b, 3; seed=0)
     env = environments[1]
 
     # Test basic plotting functions
     fig1 = DVSP.plot_instance(env)
     @test fig1 isa Plots.Plot
 
-    instance = dataset[1].instance
-    scenario = generate_scenario(b, instance; seed=0)
+    scenario = env.scenario
     v, y = generate_anticipative_solution(b, env, scenario; nb_epochs=3, reset_env=true)
 
     fig2 = DVSP.plot_epochs(y)
     @test fig2 isa Plots.Plot
 
-    policies = generate_policies(b)
+    policies = generate_baseline_policies(b)
     lazy = policies[1]
     _, d = evaluate_policy!(lazy, env)
     fig3 = DVSP.plot_routes(d[1].instance, d[1].y)
diff --git a/test/maintenance.jl b/test/maintenance.jl
index 32f90ed..a2a9983 100644
--- a/test/maintenance.jl
+++ b/test/maintenance.jl
@@ -165,11 +165,10 @@ end
     b = MaintenanceBenchmark()
 
     # Generate test data
-    dataset = generate_dataset(b, 10; seed=0)
-    environments = generate_environments(b, dataset)
+    environments = generate_environments(b, 10; seed=0)
 
     # Get policies
-    policies = generate_policies(b)
+    policies = generate_baseline_policies(b)
     greedy = policies[1]
 
     @test greedy.name == "Greedy"
@@ -200,11 +199,10 @@ end
     sample = generate_sample(b, MersenneTwister(42))
     @test hasfield(typeof(sample), :context)
 
-    dataset = generate_dataset(b, 3; seed=42)
-    environments = generate_environments(b, dataset)
+    environments = generate_environments(b, 3; seed=42)
 
     # Evaluate policy to get data samples
-    policies = generate_policies(b)
+    policies = generate_baseline_policies(b)
     _, data_samples = evaluate_policy!(policies[1], environments)
 
     # Test model-maximizer pipeline
diff --git a/test/vsp.jl b/test/vsp.jl
index 0d6f5d2..2493d3e 100644
--- a/test/vsp.jl
+++ b/test/vsp.jl
@@ -8,16 +8,29 @@
     b = StochasticVehicleSchedulingBenchmark(; nb_tasks=25, nb_scenarios=10)
 
     N = 5
-    dataset = generate_dataset(b, N; seed=0, rng=StableRNG(0))
-    mip_dataset = generate_dataset(b, N; seed=0, rng=StableRNG(0), algorithm=compact_mip)
+
+    # Helper to build a target_policy that wraps a given algorithm
+    function make_svs_target_policy(algorithm)
+        return sample ->
+            DataSample(; sample.context..., x=sample.x, y=algorithm(sample.instance))
+    end
+
+    col_gen_policy = make_svs_target_policy(column_generation_algorithm)
+    mip_policy = make_svs_target_policy(compact_mip)
+    mipl_policy = make_svs_target_policy(compact_linearized_mip)
+    local_search_policy = make_svs_target_policy(local_search)
+    deterministic_policy = make_svs_target_policy(deterministic_mip)
+
+    dataset = generate_dataset(b, N; seed=0, rng=StableRNG(0), target_policy=col_gen_policy)
+    mip_dataset = generate_dataset(b, N; seed=0, rng=StableRNG(0), target_policy=mip_policy)
     mipl_dataset = generate_dataset(
-        b, N; seed=0, rng=StableRNG(0), algorithm=compact_linearized_mip
+        b, N; seed=0, rng=StableRNG(0), target_policy=mipl_policy
     )
     local_search_dataset = generate_dataset(
-        b, N; seed=0, rng=StableRNG(0), algorithm=local_search
+        b, N; seed=0, rng=StableRNG(0), target_policy=local_search_policy
     )
     deterministic_dataset = generate_dataset(
-        b, N; seed=0, rng=StableRNG(0), algorithm=deterministic_mip
+        b, N; seed=0, rng=StableRNG(0), target_policy=deterministic_policy
     )
     @test length(dataset) == N