diff --git a/docs/src/custom_benchmarks.md b/docs/src/custom_benchmarks.md
index f4a4e7f..968d9e6 100644
--- a/docs/src/custom_benchmarks.md
+++ b/docs/src/custom_benchmarks.md
@@ -10,13 +10,14 @@ problems to the benchmark suite or integrate their own domains.
 
 ```
 AbstractBenchmark
-└── AbstractStochasticBenchmark{exogenous}
-    └── AbstractDynamicBenchmark{exogenous}
+├── AbstractStaticBenchmark
+├── AbstractStochasticBenchmark{exogenous}
+└── AbstractDynamicBenchmark{exogenous}
 ```
 
 | Type | Use case |
 |------|----------|
-| `AbstractBenchmark` | Static, single-stage optimization (e.g. shortest path, portfolio) |
+| `AbstractStaticBenchmark` | Static, single-stage optimization (e.g. shortest path, portfolio) |
 | `AbstractStochasticBenchmark{true}` | Single-stage with exogenous uncertainty (scenarios drawn independently of decisions) |
 | `AbstractStochasticBenchmark{false}` | Single-stage with endogenous uncertainty |
 | `AbstractDynamicBenchmark{true}` | Multi-stage sequential decisions with exogenous uncertainty |
@@ -41,7 +42,7 @@ repeatedly and applies `target_policy` to each result.
 
 ---
 
-## `AbstractBenchmark`: required methods
+## `AbstractStaticBenchmark`: required methods
 
 ### Data generation (choose one strategy)
 
@@ -70,7 +71,7 @@ generate_maximizer(bench::MyBenchmark)
 
 ```julia
 generate_baseline_policies(bench::MyBenchmark) -> collection of callables
-is_minimization_problem(bench::MyBenchmark) -> Bool   # default: false (maximization)
+is_minimization_problem(bench::MyBenchmark) -> Bool   # default: true (minimization)
 objective_value(bench::MyBenchmark, sample::DataSample, y) -> Real
 compute_gap(bench::MyBenchmark, dataset, model, maximizer) -> Float64
 has_visualization(bench::MyBenchmark) -> Bool                            # default: false; return true when plot methods are implemented/available
@@ -80,7 +81,7 @@ plot_solution(bench::MyBenchmark, sample::DataSample; kwargs...)
 
 ---
 
-## `AbstractStochasticBenchmark{true}`: additional methods
+## `AbstractStochasticBenchmark{true}`
 
 For stochastic benchmarks with exogenous uncertainty, implement:
 
@@ -113,7 +114,7 @@ DataSample(; x=features, y=nothing,
 
 ---
 
-## `AbstractDynamicBenchmark`: additional methods
+## `AbstractDynamicBenchmark`
 
 Dynamic benchmarks extend stochastic ones with an environment-based rollout interface.
 
@@ -148,6 +149,15 @@ generate_baseline_policies(bench::MyDynamicBenchmark)
 # Each callable performs a full episode rollout and returns the trajectory.
 ```
 
+### Anticipative solver (optional)
+
+```julia
+generate_anticipative_solver(bench::MyDynamicBenchmark)
+# Returns a callable: (env; reset_env=true, kwargs...) -> Vector{DataSample}
+# reset_env=true  → reset environment before solving
+# reset_env=false → solve from current state
+```
+
 ### Optional visualization methods
 
 ```julia
diff --git a/docs/src/index.md b/docs/src/index.md
index ac486e9..4294eb6 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -43,7 +43,7 @@ Where:
 
 The package organizes benchmarks into three main categories based on their problem structure:
 
-### Static Benchmarks (`AbstractBenchmark`)
+### Static Benchmarks (`AbstractStaticBenchmark`)
 Single-stage optimization problems with no randomness involved:
 - [`ArgmaxBenchmark`](@ref): argmax toy problem
 - [`Argmax2DBenchmark`](@ref): 2D argmax toy problem
diff --git a/docs/src/using_benchmarks.md b/docs/src/using_benchmarks.md
index bd64437..d9ab6fc 100644
--- a/docs/src/using_benchmarks.md
+++ b/docs/src/using_benchmarks.md
@@ -8,7 +8,7 @@ This guide covers everything you need to work with existing benchmarks in Decisi
 
 A benchmark bundles a problem family (an instance generator, a combinatorial solver, and a statistical model architecture) into a single object. It provides everything needed to run a Decision-Focused Learning experiment out of the box, without having to create each component from scratch.
 Three abstract types cover the main settings:
-- **`AbstractBenchmark`**: static problems (one instance, one decision)
+- **`AbstractStaticBenchmark`**: static problems (one instance, one decision)
 - **`AbstractStochasticBenchmark{exogenous}`**: stochastic problems (type parameter indicates whether uncertainty is exogenous)
 - **`AbstractDynamicBenchmark{exogenous}`**: sequential / multi-stage problems
 
@@ -65,7 +65,7 @@ sample.scenario   # looks up :scenario in context first, then in extra
 
 ### Static benchmarks
 
-For static benchmarks (`<:AbstractBenchmark`), `generate_dataset` may compute a default ground-truth label `y` if the benchmark implements it:
+For static benchmarks (`<:AbstractStaticBenchmark`), `generate_dataset` may compute a default ground-truth label `y` if the benchmark implements it:
 
 ```julia
 bench = ArgmaxBenchmark()
@@ -150,7 +150,8 @@ dataset = generate_dataset(bench, 50; rng=rng)
 gap = compute_gap(bench, dataset, model, maximizer)
 ```
 
-# Objective value for a single decision
+Objective value for a single decision:
+
 ```julia
 obj = objective_value(bench, sample, y)
 ```
diff --git a/src/Argmax/Argmax.jl b/src/Argmax/Argmax.jl
index 6775f9c..fa9a306 100644
--- a/src/Argmax/Argmax.jl
+++ b/src/Argmax/Argmax.jl
@@ -15,7 +15,7 @@ Basic benchmark problem with an argmax as the CO algorithm.
 # Fields
 $TYPEDFIELDS
 """
-struct ArgmaxBenchmark{E} <: AbstractBenchmark
+struct ArgmaxBenchmark{E} <: AbstractStaticBenchmark
     "instances dimension, total number of classes"
     instance_dim::Int
     "number of features"
diff --git a/src/Argmax2D/Argmax2D.jl b/src/Argmax2D/Argmax2D.jl
index 2bae9f5..1cc1bf0 100644
--- a/src/Argmax2D/Argmax2D.jl
+++ b/src/Argmax2D/Argmax2D.jl
@@ -16,7 +16,7 @@ Argmax becnhmark on a 2d polytope.
 # Fields
 $TYPEDFIELDS
 """
-struct Argmax2DBenchmark{E,R} <: AbstractBenchmark
+struct Argmax2DBenchmark{E,R} <: AbstractStaticBenchmark
     "number of features"
     nb_features::Int
     "true mapping between features and costs"
diff --git a/src/DynamicAssortment/DynamicAssortment.jl b/src/DynamicAssortment/DynamicAssortment.jl
index 8c372ca..f187a47 100644
--- a/src/DynamicAssortment/DynamicAssortment.jl
+++ b/src/DynamicAssortment/DynamicAssortment.jl
@@ -78,17 +78,6 @@ include("policies.jl")
 """
 $TYPEDSIGNATURES
 
-Outputs a data sample containing an [`Instance`](@ref).
-"""
-function Utils.generate_sample(
-    b::DynamicAssortmentBenchmark, rng::AbstractRNG=MersenneTwister(0)
-)
-    return DataSample(; instance=Instance(b, rng))
-end
-
-"""
-$TYPEDSIGNATURES
-
 Generates a statistical model for the dynamic assortment benchmark.
 The model is a small neural network with one hidden layer of size 5 and no activation function.
 """
diff --git a/src/FixedSizeShortestPath/FixedSizeShortestPath.jl b/src/FixedSizeShortestPath/FixedSizeShortestPath.jl
index 700ff86..86d5bdb 100644
--- a/src/FixedSizeShortestPath/FixedSizeShortestPath.jl
+++ b/src/FixedSizeShortestPath/FixedSizeShortestPath.jl
@@ -21,7 +21,7 @@ Data is generated using the process described in: <https://arxiv.org/abs/2307.13
 # Fields
 $TYPEDFIELDS
 """
-struct FixedSizeShortestPathBenchmark <: AbstractBenchmark
+struct FixedSizeShortestPathBenchmark <: AbstractStaticBenchmark
     "grid graph instance"
     graph::SimpleDiGraph{Int64}
     "grid size of graphs"
diff --git a/src/PortfolioOptimization/PortfolioOptimization.jl b/src/PortfolioOptimization/PortfolioOptimization.jl
index 4f374ca..dd90e63 100644
--- a/src/PortfolioOptimization/PortfolioOptimization.jl
+++ b/src/PortfolioOptimization/PortfolioOptimization.jl
@@ -19,7 +19,7 @@ Data is generated using the process described in: <https://arxiv.org/abs/2307.13
 # Fields
 $TYPEDFIELDS
 """
-struct PortfolioOptimizationBenchmark <: AbstractBenchmark
+struct PortfolioOptimizationBenchmark <: AbstractStaticBenchmark
     "number of assets"
     d::Int
     "size of feature vectors"
diff --git a/src/Ranking/Ranking.jl b/src/Ranking/Ranking.jl
index b826575..fdff60e 100644
--- a/src/Ranking/Ranking.jl
+++ b/src/Ranking/Ranking.jl
@@ -15,7 +15,7 @@ Basic benchmark problem with ranking as the CO algorithm.
 # Fields
 $TYPEDFIELDS
 """
-struct RankingBenchmark{E} <: AbstractBenchmark
+struct RankingBenchmark{E} <: AbstractStaticBenchmark
     "instances dimension, total number of classes"
     instance_dim::Int
     "number of features"
diff --git a/src/SubsetSelection/SubsetSelection.jl b/src/SubsetSelection/SubsetSelection.jl
index 98fa0ea..af151e4 100644
--- a/src/SubsetSelection/SubsetSelection.jl
+++ b/src/SubsetSelection/SubsetSelection.jl
@@ -18,7 +18,7 @@ without knowing their values, but only observing some features.
 # Fields
 $TYPEDFIELDS
 """
-struct SubsetSelectionBenchmark{M} <: AbstractBenchmark
+struct SubsetSelectionBenchmark{M} <: AbstractStaticBenchmark
     "total number of items"
     n::Int
     "number of items to select"
diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl
index 3b6b4fc..e9eb16e 100644
--- a/src/Utils/Utils.jl
+++ b/src/Utils/Utils.jl
@@ -15,7 +15,10 @@ include("data_sample.jl")
 include("maximizers.jl")
 include("environment.jl")
 include("policy.jl")
-include("interface.jl")
+include("interface/abstract_benchmark.jl")
+include("interface/static_benchmark.jl")
+include("interface/stochastic_benchmark.jl")
+include("interface/dynamic_benchmark.jl")
 include("grid_graph.jl")
 include("misc.jl")
 include("model_builders.jl")
@@ -26,7 +29,8 @@ export TopKMaximizer, one_hot_argmax
 
 export AbstractEnvironment, get_seed, is_terminated, observe, reset!, step!
 
-export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark
+export AbstractBenchmark,
+    AbstractStaticBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark
 export ExogenousStochasticBenchmark,
     EndogenousStochasticBenchmark, ExogenousDynamicBenchmark, EndogenousDynamicBenchmark
 export generate_instance, generate_sample, generate_dataset
diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl
deleted file mode 100644
index 6af5979..0000000
--- a/src/Utils/interface.jl
+++ /dev/null
@@ -1,673 +0,0 @@
-"""
-$TYPEDEF
-
-Abstract type interface for benchmark problems.
-
-# Mandatory methods to implement for any benchmark:
-Choose one of three primary implementation strategies:
-- Implement [`generate_instance`](@ref) (returns a [`DataSample`](@ref) with `y=nothing`).
-  The default [`generate_sample`](@ref) forwards the call directly; [`generate_dataset`](@ref)
-  applies `target_policy` afterwards if provided.
-- Override [`generate_sample`](@ref) directly when the sample requires custom logic
-  that cannot be expressed via [`generate_instance`](@ref). Applies to static benchmarks
-  only, stochastic benchmarks should implement the finer-grained hooks instead
-  ([`generate_instance`](@ref), [`generate_context`](@ref), [`generate_scenario`](@ref)).
-  [`generate_dataset`](@ref) applies `target_policy` to the result after the call returns.
-- Override [`generate_dataset`](@ref) directly when samples cannot be drawn independently.
-
-Also implement:
-- [`generate_statistical_model`](@ref)
-- [`generate_maximizer`](@ref)
-
-# Optional methods (defaults provided)
-- [`is_minimization_problem`](@ref): defaults to `true`
-- [`objective_value`](@ref): defaults to `dot(θ, y)`
-- [`compute_gap`](@ref): default implementation provided; override for custom evaluation
-- [`has_visualization`](@ref): defaults to `false`
-
-# Optional methods (no default, require `Plots` to be loaded)
-- [`plot_instance`](@ref), [`plot_solution`](@ref)
-- [`generate_baseline_policies`](@ref)
-"""
-abstract type AbstractBenchmark end
-
-"""
-    generate_instance(::AbstractBenchmark, rng::AbstractRNG; kwargs...) -> DataSample
-
-Generate a single unlabeled [`DataSample`](@ref) (with `y=nothing`) for the benchmark.
-"""
-function generate_instance(bench::AbstractBenchmark, rng::AbstractRNG; kwargs...)
-    return error(
-        "`generate_instance` is not implemented for $(typeof(bench)). " *
-        "Implement `generate_instance(::$(typeof(bench)), rng; kwargs...) -> DataSample` " *
-        "or override `generate_sample` directly.",
-    )
-end
-
-"""
-    generate_sample(::AbstractBenchmark, rng::AbstractRNG; kwargs...) -> DataSample
-
-Generate a single [`DataSample`](@ref) for the benchmark.
-
-**Default** (when [`generate_instance`](@ref) is implemented):
-Calls [`generate_instance`](@ref) and returns the result directly.
-
-Override this method when sample generation requires custom logic. Labeling via
-`target_policy` is always applied by [`generate_dataset`](@ref) after this call returns.
-
-!!! note
-    This is an internal hook called by [`generate_dataset`](@ref). Prefer calling
-    [`generate_dataset`](@ref) rather than this method directly. For stochastic
-    benchmarks, implement [`generate_instance`](@ref), [`generate_context`](@ref),
-    and [`generate_scenario`](@ref) instead of overriding this method.
-"""
-function generate_sample(bench::AbstractBenchmark, rng; kwargs...)
-    return generate_instance(bench, rng; kwargs...)
-end
-
-"""
-    generate_dataset(::AbstractBenchmark, dataset_size::Int; target_policy=nothing, kwargs...) -> Vector{<:DataSample}
-
-Generate a `Vector` of [`DataSample`](@ref) of length `dataset_size` for given benchmark.
-Content of the dataset can be visualized using [`plot_solution`](@ref), when it applies.
-
-By default, it uses [`generate_sample`](@ref) to create each sample in the dataset, and passes any
-keyword arguments to it. `target_policy` is applied if provided, it is called on each sample
-after [`generate_sample`](@ref) returns.
-"""
-function generate_dataset(
-    bench::AbstractBenchmark,
-    dataset_size::Int;
-    target_policy=nothing,
-    seed=nothing,
-    rng=MersenneTwister(seed),
-    kwargs...,
-)
-    return [
-        begin
-            sample = generate_sample(bench, rng; kwargs...)
-            isnothing(target_policy) ? sample : target_policy(sample)
-        end for _ in 1:dataset_size
-    ]
-end
-
-"""
-    generate_maximizer(::AbstractBenchmark; kwargs...)
-
-Returns a callable `f(θ; kwargs...) -> y`, solving a maximization problem.
-"""
-function generate_maximizer(bench::AbstractBenchmark; kwargs...)
-    return error(
-        "`generate_maximizer` is not implemented for $(typeof(bench)). " *
-        "Implement `generate_maximizer(::$(typeof(bench)); kwargs...) -> f(θ; kwargs...) -> y`.",
-    )
-end
-
-"""
-    generate_statistical_model(::AbstractBenchmark, seed=nothing; kwargs...)
-
-Returns an untrained statistical model (usually a Flux neural network) that maps a
-feature matrix `x` to an output array `θ`. The `seed` parameter controls initialization
-randomness for reproducibility.
-"""
-function generate_statistical_model(bench::AbstractBenchmark, seed=nothing; kwargs...)
-    return error(
-        "`generate_statistical_model` is not implemented for $(typeof(bench)). " *
-        "Implement `generate_statistical_model(::$(typeof(bench)), seed=nothing; kwargs...) -> model`.",
-    )
-end
-
-"""
-    generate_baseline_policies(::AbstractBenchmark) -> NamedTuple or Tuple
-
-Return named baseline policies for the benchmark. Each policy is a callable.
-
-- For static/stochastic benchmarks: signature `(sample) -> DataSample`.
-- For dynamic benchmarks: signature `(env) -> Vector{DataSample}` (full trajectory).
-"""
-function generate_baseline_policies end
-
-"""
-    has_visualization(::AbstractBenchmark) -> Bool
-
-Return `true` if `plot_instance` and `plot_solution` are implemented for this benchmark
-(requires `Plots` to be loaded). Default is `false`.
-"""
-has_visualization(::AbstractBenchmark) = false
-
-"""
-    plot_instance(bench::AbstractBenchmark, sample::DataSample; kwargs...)
-
-Plot the problem instance (no solution). Only available when `Plots` is loaded.
-"""
-function plot_instance end
-
-"""
-    plot_solution(bench::AbstractBenchmark, sample::DataSample; kwargs...)
-
-Plot the instance with `sample.y` overlaid. Only available when `Plots` is loaded.
-"""
-function plot_solution end
-
-"""
-$TYPEDSIGNATURES
-
-Compute the objective value of given solution `y` for a specific benchmark.
-Must be implemented by each concrete benchmark type. For stochastic benchmarks,
-an additional `scenario` argument is required.
-"""
-function objective_value end
-
-"""
-$TYPEDSIGNATURES
-
-Compute the objective value of the target in the sample (needs to exist).
-"""
-function objective_value(
-    bench::AbstractBenchmark, sample::DataSample{CTX,EX,F,S,C}
-) where {CTX,EX,F,S<:AbstractArray,C}
-    return objective_value(bench, sample, sample.y)
-end
-
-"""
-$TYPEDSIGNATURES
-
-Check if the benchmark is a minimization problem.
-
-Defaults to `true`. **Maximization benchmarks must override this method**, forgetting to do
-so will cause `compute_gap` to compute the gap with the wrong sign without any error or warning.
-"""
-function is_minimization_problem(::AbstractBenchmark)
-    return true
-end
-
-"""
-$TYPEDSIGNATURES
-
-Default implementation of [`compute_gap`](@ref): average relative optimality gap over `dataset`.
-Requires labeled samples (`y ≠ nothing`), `x`, and `context` fields.
-Override for custom evaluation logic.
-"""
-function compute_gap(
-    bench::AbstractBenchmark,
-    dataset::AbstractVector{<:DataSample{<:Any,<:Any,<:Any,<:AbstractArray}},
-    statistical_model,
-    maximizer,
-    op=mean,
-)
-    check = is_minimization_problem(bench)
-
-    return op(
-        map(dataset) do sample
-            target_obj = objective_value(bench, sample)
-            x = sample.x
-            θ = statistical_model(x)
-            y = maximizer(θ; sample.context...)
-            obj = objective_value(bench, sample, y)
-            Δ = check ? obj - target_obj : target_obj - obj
-            return Δ / abs(target_obj)
-        end,
-    )
-end
-
-"""
-$TYPEDEF
-
-Abstract type interface for single-stage stochastic benchmark problems.
-
-A stochastic benchmark separates the problem into an **instance** (the
-context known before the scenario is revealed) and a **random scenario** (the uncertain
-part). Decisions are taken by seeing only the instance. Scenarios are used to generate
-anticipative targets and compute objective values.
-
-# Required methods ([`ExogenousStochasticBenchmark`](@ref) only)
-- [`generate_instance`](@ref)`(bench, rng)`: returns a [`DataSample`](@ref) with the
-  problem instance (solver kwargs) and, if not overriding [`generate_context`](@ref),
-  the ML features `x`. Scenarios are added later by [`generate_dataset`](@ref) via
-  [`generate_scenario`](@ref). When [`generate_context`](@ref) is overridden, `x` may
-  be absent here and constructed there instead.
-- [`generate_scenario`](@ref)`(bench, rng; kwargs...)`: draws a random scenario.
-  Solver kwargs are spread from `ctx.context`.
-
-# Optional methods
-- [`generate_context`](@ref)`(bench, rng, instance_sample)`: enriches the instance with
-  observable context (default: identity). Override for contextual stochastic benchmarks.
-- [`generate_anticipative_solver`](@ref)`(bench)`: returns a callable
-  `(scenario; kwargs...) -> y` that computes the anticipative solution per scenario.
-- [`generate_parametric_anticipative_solver`](@ref)`(bench)`: returns a callable
-  `(θ, scenario; kwargs...) -> y` for the parametric anticipative subproblem
-  `argmin_{y ∈ Y} c(y, scenario) + θᵀy`.
-
-# Dataset generation (exogenous only)
-[`generate_dataset`](@ref) is specialised for [`ExogenousStochasticBenchmark`](@ref) and
-supports all three standard structures via `nb_scenarios` and `contexts_per_instance`:
-
-| Setting | Call |
-|---------|------|
-| 1 instance with K scenarios  | `generate_dataset(bench, 1; nb_scenarios=K)` |
-| N instances with 1 scenario  | `generate_dataset(bench, N)` (default) |
-| N instances with K scenarios | `generate_dataset(bench, N; nb_scenarios=K)` |
-| N instances with M contexts × K scenarios | `generate_dataset(bench, N; contexts_per_instance=M, nb_scenarios=K)` |
-
-By default (no `target_policy`), each [`DataSample`](@ref) has `context` holding
-the solver kwargs and `extra=(; scenario)` holding one scenario.
-
-Provide a `target_policy(ctx_sample, scenarios) -> Vector{DataSample}`
-to compute labels. This covers both anticipative (K samples, one per scenario) and SAA
-(1 sample from all K scenarios) labeling strategies.
-"""
-abstract type AbstractStochasticBenchmark{exogenous} <: AbstractBenchmark end
-
-is_exogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = exogenous
-is_endogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = !exogenous
-
-"Alias for [`AbstractStochasticBenchmark`](@ref)`{true}`. Uncertainty is independent of decisions."
-const ExogenousStochasticBenchmark = AbstractStochasticBenchmark{true}
-
-"Alias for [`AbstractStochasticBenchmark`](@ref)`{false}`. Uncertainty depends on decisions."
-const EndogenousStochasticBenchmark = AbstractStochasticBenchmark{false}
-
-"""
-    generate_scenario(::ExogenousStochasticBenchmark, rng::AbstractRNG; kwargs...) -> scenario
-
-Draw a random scenario. Solver kwargs are passed as keyword arguments spread from
-`sample.context`:
-
-    ξ = generate_scenario(bench, rng; ctx.context...)
-"""
-function generate_scenario end
-
-"""
-    generate_context(bench::AbstractStochasticBenchmark, rng, instance_sample::DataSample)
-        -> DataSample
-
-Enrich `instance_sample` with observable context drawn from `rng`.
-Returns a new `DataSample` extending the instance: `.x` is the final ML feature vector
-(possibly augmented with context features) and `.extra` holds any latent context fields
-needed by [`generate_scenario`](@ref).
-
-**Default**: returns `instance_sample` unchanged — no context augmentation.
-Non-contextual benchmarks (e.g. SVS) use this default.
-
-**Override** to add per-sample context features:
-```julia
-function generate_context(bench::MyBench, rng, instance_sample::DataSample)
-    x_raw = randn(rng, Float32, bench.d)
-    return DataSample(;
-        x=vcat(instance_sample.x, x_raw),
-        instance_sample.context...,
-        x_raw,
-    )
-end
-```
-Fields in `.context` are spread into [`generate_scenario`](@ref) as kwargs.
-"""
-function generate_context(::AbstractStochasticBenchmark, rng, instance_sample::DataSample)
-    return instance_sample
-end
-
-"""
-    generate_anticipative_solver(::AbstractBenchmark) -> callable
-
-Return a callable that computes the anticipative (oracle) solution.
-The calling convention differs by benchmark category:
-
-**Stochastic benchmarks** ([`AbstractStochasticBenchmark`](@ref)):
-Returns `(scenario; context...) -> y`.
-Called once per scenario to obtain the optimal label.
-
-**Dynamic benchmarks** ([`AbstractDynamicBenchmark`](@ref)):
-Returns `(env; reset_env=true, kwargs...) -> Vector{DataSample}`, a full trajectory.
-`reset_env=true` resets the environment before solving (used for initial dataset building);
-`reset_env=false` starts from the current environment state (used inside DAgger rollouts).
-"""
-function generate_anticipative_solver end
-
-"""
-    generate_parametric_anticipative_solver(::ExogenousStochasticBenchmark) -> callable
-
-**Optional.** Return a callable `(θ, scenario; kwargs...) -> y` that solves the
-parametric anticipative subproblem:
-
-    argmin_{y ∈ Y(instance)}  c(y, scenario) + θᵀy
-"""
-function generate_parametric_anticipative_solver end
-
-"""
-$TYPEDSIGNATURES
-
-Default [`generate_sample`](@ref) for exogenous stochastic benchmarks.
-
-Calls [`generate_instance`](@ref), then [`generate_context`](@ref) (default: identity),
-draws scenarios via [`generate_scenario`](@ref), then:
-- Without `target_policy`: returns M×K unlabeled samples (`contexts_per_instance` contexts ×
-  `nb_scenarios` scenarios each), each with one scenario in `extra=(; scenario=ξ)`.
-- With `target_policy`: calls `target_policy(ctx_sample, scenarios)`
-  per context and returns the result.
-
-`target_policy(ctx_sample, scenarios) -> Vector{DataSample}` enables
-anticipative labeling (K samples, one per scenario) or SAA (1 sample aggregating all K
-scenarios).
-
-!!! note
-    This is an internal override of [`generate_sample`](@ref) for the stochastic pipeline,
-    called by [`generate_dataset`](@ref). New stochastic benchmarks should implement
-    [`generate_instance`](@ref), [`generate_context`](@ref), and [`generate_scenario`](@ref)
-    rather than overriding this method. Note that the return type is `Vector{DataSample}`
-    (one per context × scenario combination), unlike the base method which returns a
-    single `DataSample`.
-"""
-function generate_sample(
-    bench::ExogenousStochasticBenchmark,
-    rng;
-    target_policy=nothing,
-    nb_scenarios::Int=1,
-    contexts_per_instance::Int=1,
-    kwargs...,
-)
-    instance_sample = generate_instance(bench, rng; kwargs...)
-    return reduce(
-        vcat,
-        (
-            let ctx = generate_context(bench, rng, instance_sample)
-                if isnothing(target_policy)
-                    [
-                        DataSample(;
-                            x=ctx.x,
-                            θ=ctx.θ,
-                            ctx.context...,
-                            extra=(;
-                                ctx.extra...,
-                                scenario=generate_scenario(bench, rng; ctx.context...),
-                            ),
-                        ) for _ in 1:nb_scenarios
-                    ]
-                else
-                    scenarios = [
-                        generate_scenario(bench, rng; ctx.context...) for
-                        _ in 1:nb_scenarios
-                    ]
-                    target_policy(ctx, scenarios)
-                end
-            end for _ in 1:contexts_per_instance
-        ),
-    )
-end
-
-"""
-$TYPEDSIGNATURES
-
-Specialised [`generate_dataset`](@ref) for exogenous stochastic benchmarks.
-
-Generates `nb_instances` problem instances, each with `contexts_per_instance` context draws
-and `nb_scenarios` scenario draws per context. The scenario→sample mapping is controlled
-by the `target_policy`:
-- Without `target_policy` (default): M contexts × K scenarios produce M×K unlabeled
-  samples per instance.
-- With `target_policy(ctx_sample, scenarios) -> Vector{DataSample}`:
-  enables anticipative labeling (K labeled samples) or SAA (1 sample aggregating all K
-  scenarios).
-
-# Keyword arguments
-- `nb_scenarios::Int = 1`: scenarios per context (K).
-- `contexts_per_instance::Int = 1`: context draws per instance (M).
-- `target_policy`: when provided, called as
-  `target_policy(ctx_sample, scenarios)` to compute labels.
-  Defaults to `nothing` (unlabeled samples).
-- `seed`: passed to `MersenneTwister` when `rng` is not provided.
-- `rng`: random number generator; overrides `seed` when provided.
-- `kwargs...`: forwarded to [`generate_sample`](@ref).
-"""
-function generate_dataset(
-    bench::ExogenousStochasticBenchmark,
-    nb_instances::Int;
-    target_policy=nothing,
-    nb_scenarios::Int=1,
-    contexts_per_instance::Int=1,
-    seed=nothing,
-    rng=MersenneTwister(seed),
-    kwargs...,
-)
-    return reduce(
-        vcat,
-        (
-            generate_sample(
-                bench, rng; target_policy, nb_scenarios, contexts_per_instance, kwargs...
-            ) for _ in 1:nb_instances
-        ),
-    )
-end
-
-"""
-$TYPEDEF
-
-Transforms an [`ExogenousStochasticBenchmark`](@ref) into a static benchmark via
-Sample Average Approximation (SAA).
-
-For each (instance, context) pair, draws `nb_scenarios` fixed scenarios. These are stored
-in the sample and used for feature computation, target labeling (via `target_policy`),
-and gap evaluation.
-
-!!! note
-    `SampleAverageApproximation <: AbstractBenchmark`, not `AbstractStochasticBenchmark`.
-    This is intentional: after wrapping, the scenarios are fixed at dataset-generation time
-    and the benchmark behaves as a static problem. Functions dispatching on
-    `AbstractStochasticBenchmark` (e.g. `is_exogenous`) will not match SAA instances.
-
-# Fields
-$TYPEDFIELDS
-"""
-struct SampleAverageApproximation{B<:ExogenousStochasticBenchmark} <: AbstractBenchmark
-    "inner stochastic benchmark"
-    benchmark::B
-    "number of scenarios to draw per (instance, context) pair"
-    nb_scenarios::Int
-end
-
-function is_minimization_problem(saa::SampleAverageApproximation)
-    return is_minimization_problem(saa.benchmark)
-end
-
-function generate_maximizer(saa::SampleAverageApproximation; kwargs...)
-    return generate_maximizer(saa.benchmark; kwargs...)
-end
-
-function generate_statistical_model(saa::SampleAverageApproximation; kwargs...)
-    return generate_statistical_model(saa.benchmark; kwargs...)
-end
-
-function generate_sample(
-    saa::SampleAverageApproximation, rng; target_policy=nothing, kwargs...
-)
-    inner = saa.benchmark
-    instance_sample = generate_instance(inner, rng; kwargs...)
-    ctx = generate_context(inner, rng, instance_sample)
-    scenarios = [
-        generate_scenario(inner, rng; ctx.context...) for _ in 1:(saa.nb_scenarios)
-    ]
-    if isnothing(target_policy)
-        return [DataSample(; x=ctx.x, ctx.context..., extra=(; ctx.extra..., scenarios))]
-    else
-        return target_policy(ctx, scenarios)
-    end
-end
-
-"""
-$TYPEDSIGNATURES
-
-Specialised [`generate_dataset`](@ref) for [`SampleAverageApproximation`](@ref).
-
-- Without `target_policy`: returns one static [`DataSample`](@ref) per instance, with
-  `nb_scenarios` stored in `extra.scenarios`.
-- With `target_policy(ctx_sample, scenarios) -> Vector{DataSample}`:
-  labels each instance using all stored scenarios (same signature as
-  [`ExogenousStochasticBenchmark`](@ref) policies).
-"""
-function generate_dataset(
-    saa::SampleAverageApproximation,
-    nb_instances::Int;
-    target_policy=nothing,
-    seed=nothing,
-    rng=MersenneTwister(seed),
-    kwargs...,
-)
-    return reduce(
-        vcat, (generate_sample(saa, rng; target_policy, kwargs...) for _ in 1:nb_instances)
-    )
-end
-
-"""
-$TYPEDSIGNATURES
-
-Evaluate a decision `y` against stored scenarios (average over scenarios).
-"""
-function objective_value(
-    saa::SampleAverageApproximation, sample::DataSample, y::AbstractArray
-)
-    return mean(
-        objective_value(saa.benchmark, sample, y, ξ) for ξ in sample.extra.scenarios
-    )
-end
-
-"""
-$TYPEDSIGNATURES
-
-Evaluate the target solution in the sample against stored scenarios.
-"""
-function objective_value(
-    saa::SampleAverageApproximation, sample::DataSample{CTX,EX,F,S,C}
-) where {CTX,EX,F,S<:AbstractArray,C}
-    return objective_value(saa, sample, sample.y)
-end
-
-"""
-$TYPEDEF
-
-Abstract type interface for multi-stage stochastic (dynamic) benchmark problems.
-
-Extends [`AbstractStochasticBenchmark`](@ref). The `{exogenous}` parameter retains its
-meaning (whether uncertainty is independent of decisions).
-
-# Primary entry point
-- [`generate_environments`](@ref)`(bench, n; rng)`: mandatory (or implement
-  [`generate_environment`](@ref)`(bench, rng)`). The count-based default calls
-  [`generate_environment`](@ref) once per environment.
-
-# Additional optional methods
-- [`generate_environment`](@ref)`(bench, rng)`: initialize a single rollout environment.
-  Must return an [`AbstractEnvironment`](@ref) (see `environment.jl` for the full protocol:
-  [`reset!`](@ref), [`observe`](@ref), [`step!`](@ref), [`is_terminated`](@ref)).
-  Implement this instead of overriding [`generate_environments`](@ref) when environments
-  can be drawn independently.
-- [`generate_baseline_policies`](@ref)`(bench)`: returns named baseline callables of
-  signature `(env) -> Vector{DataSample}` (full trajectory rollout).
-- [`generate_dataset`](@ref)`(bench, environments; target_policy, ...)`: generates
-  training-ready [`DataSample`](@ref)s by calling `target_policy(env)` for each environment.
-  Requires `target_policy` as a mandatory keyword argument.
-
-# Optional visualization methods (require `Plots` to be loaded)
-- [`plot_trajectory`](@ref)`(bench, traj)`: plot a full episode as a grid of subplots.
-- [`animate_trajectory`](@ref)`(bench, traj)`: animate a full episode.
-"""
-abstract type AbstractDynamicBenchmark{exogenous} <: AbstractStochasticBenchmark{exogenous} end
-
-"""
-$TYPEDSIGNATURES
-
-Intercepts accidental calls to `generate_sample` on dynamic benchmarks and throws a
-descriptive error pointing at the correct entry point.
-"""
-function generate_sample(bench::AbstractDynamicBenchmark, rng; kwargs...)
-    return error(
-        "`generate_sample` is not supported for dynamic benchmarks ($(typeof(bench))). " *
-        "Use `generate_environments` and " *
-        "`generate_dataset(bench, environments; target_policy=...)` instead.",
-    )
-end
-
-"Alias for [`AbstractDynamicBenchmark`](@ref)`{true}`. Uncertainty is independent of decisions."
-const ExogenousDynamicBenchmark = AbstractDynamicBenchmark{true}
-
-"Alias for [`AbstractDynamicBenchmark`](@ref)`{false}`. Uncertainty depends on decisions."
-const EndogenousDynamicBenchmark = AbstractDynamicBenchmark{false}
-
-"""
-    generate_environment(::AbstractDynamicBenchmark, rng::AbstractRNG; kwargs...) -> AbstractEnvironment
-
-Initialize a single environment for the given dynamic benchmark.
-Primary implementation target for the count-based [`generate_environments`](@ref) default.
-Override [`generate_environments`](@ref) directly when environments cannot be drawn
-independently (e.g. loading from files).
-"""
-function generate_environment end
-
-"""
-$TYPEDSIGNATURES
-
-Generate `n` environments for the given dynamic benchmark.
-Primary entry point for dynamic training algorithms.
-Override when environments cannot be drawn independently (e.g. loading from files).
-"""
-function generate_environments(
-    bench::AbstractDynamicBenchmark,
-    n::Int;
-    seed=nothing,
-    rng=MersenneTwister(seed),
-    kwargs...,
-)
-    return [generate_environment(bench, rng; kwargs...) for _ in 1:n]
-end
-
-"""
-$TYPEDSIGNATURES
-
-Generate a training dataset from pre-built environments for an exogenous dynamic benchmark.
-
-For each environment, calls `target_policy(env)` to obtain a training trajectory
-(`Vector{DataSample}`). The trajectories are concatenated into a flat dataset.
-
-`target_policy` is a **required** keyword argument. Use [`generate_baseline_policies`](@ref)
-to obtain standard baseline callables (e.g. the anticipative solver).
-
-# Keyword arguments
-- `target_policy`: **required** callable `(env) -> Vector{DataSample}`.
-- `seed`: passed to `MersenneTwister` when `rng` is not provided.
-- `rng`: random number generator.
-"""
-function generate_dataset(
-    bench::ExogenousDynamicBenchmark, environments::AbstractVector; target_policy, kwargs...
-)
-    return reduce(vcat, (target_policy(env) for env in environments))
-end
-
-"""
-$TYPEDSIGNATURES
-
-Convenience wrapper for exogenous dynamic benchmarks: generates `n` environments
-via [`generate_environments`](@ref), then calls
-[`generate_dataset`](@ref)`(bench, environments; target_policy, ...)`.
-
-`target_policy` is a **required** keyword argument.
-"""
-function generate_dataset(
-    bench::ExogenousDynamicBenchmark, n::Int; target_policy, seed=nothing, kwargs...
-)
-    environments = generate_environments(bench, n; seed)
-    return generate_dataset(bench, environments; target_policy, seed, kwargs...)
-end
-
-"""
-    plot_trajectory(bench::AbstractDynamicBenchmark, trajectory::Vector{<:DataSample}; kwargs...)
-
-Plot a full dynamic episode as a grid of state/decision subplots.
-Only available when `Plots` is loaded.
-"""
-function plot_trajectory end
-
-"""
-    animate_trajectory(bench::AbstractDynamicBenchmark, trajectory::Vector{<:DataSample}; kwargs...)
-
-Animate a full dynamic episode. Returns a `Plots.Animation` object
-(save with `gif(result, filename)`). Only available when `Plots` is loaded.
-"""
-function animate_trajectory end
diff --git a/src/Utils/interface/abstract_benchmark.jl b/src/Utils/interface/abstract_benchmark.jl
new file mode 100644
index 0000000..bedf527
--- /dev/null
+++ b/src/Utils/interface/abstract_benchmark.jl
@@ -0,0 +1,144 @@
+"""
+$TYPEDEF
+
+Abstract root type for all benchmark problems.
+"""
+abstract type AbstractBenchmark end
+
+"""
+    generate_instance(::AbstractBenchmark, rng::AbstractRNG; kwargs...) -> DataSample
+
+Generate a single unlabeled [`DataSample`](@ref) (with `y=nothing`) for the benchmark.
+"""
+function generate_instance(bench::AbstractBenchmark, rng::AbstractRNG; kwargs...)
+    return error(
+        "`generate_instance` is not implemented for $(typeof(bench)). " *
+        "Implement `generate_instance(::$(typeof(bench)), rng; kwargs...) -> DataSample`. " *
+        "For static benchmarks, you may also override `generate_sample` directly instead.",
+    )
+end
+
+"""
+    generate_maximizer(::AbstractBenchmark; kwargs...)
+
+Returns a callable `f(θ; kwargs...) -> y`, solving a maximization problem.
+"""
+function generate_maximizer(bench::AbstractBenchmark; kwargs...)
+    return error(
+        "`generate_maximizer` is not implemented for $(typeof(bench)). " *
+        "Implement `generate_maximizer(::$(typeof(bench)); kwargs...) -> f(θ; kwargs...) -> y`.",
+    )
+end
+
+"""
+    generate_statistical_model(::AbstractBenchmark, seed=nothing; kwargs...)
+
+Returns an untrained statistical model (usually a Flux neural network) that maps a
+feature matrix `x` to an output array `θ`. The `seed` parameter controls initialization
+randomness for reproducibility.
+"""
+function generate_statistical_model(bench::AbstractBenchmark, seed=nothing; kwargs...)
+    return error(
+        "`generate_statistical_model` is not implemented for $(typeof(bench)). " *
+        "Implement `generate_statistical_model(::$(typeof(bench)), seed=nothing; kwargs...) -> model`.",
+    )
+end
+
+"""
+    generate_baseline_policies(::AbstractBenchmark) -> NamedTuple or Tuple
+
+Return named baseline policies for the benchmark. Each policy is a callable.
+The calling convention matches the `target_policy` signature for the benchmark category:
+
+- **Static:** `(sample) -> DataSample`
+- **Stochastic:** `(ctx_sample, scenarios) -> Vector{DataSample}`
+- **Dynamic:** `(env) -> Vector{DataSample}` (full trajectory rollout)
+"""
+function generate_baseline_policies end
+
+"""
+    has_visualization(::AbstractBenchmark) -> Bool
+
+Return `true` if `plot_instance` and `plot_solution` are implemented for this benchmark
+(requires `Plots` to be loaded). Default is `false`.
+"""
+has_visualization(::AbstractBenchmark) = false
+
+"""
+    plot_instance(bench::AbstractBenchmark, sample::DataSample; kwargs...)
+
+Plot the problem instance (no solution). Only available when `Plots` is loaded.
+"""
+function plot_instance end
+
+"""
+    plot_solution(bench::AbstractBenchmark, sample::DataSample; kwargs...)
+
+Plot the instance with `sample.y` overlaid. Only available when `Plots` is loaded.
+"""
+function plot_solution end
+
+"""
+    objective_value(bench::AbstractBenchmark, sample::DataSample, y) -> Real
+
+Compute the objective value of solution `y` for the benchmark instance encoded in `sample`.
+Must be implemented by each concrete [`AbstractStaticBenchmark`](@ref).
+
+For stochastic benchmarks, implement the 4-arg form instead (see
+[`ExogenousStochasticBenchmark`](@ref)):
+
+    objective_value(bench, sample, y, scenario) -> Real
+"""
+function objective_value end
+
+"""
+$TYPEDSIGNATURES
+
+Compute the objective value of the target in the sample (needs to exist).
+"""
+function objective_value(
+    bench::AbstractBenchmark, sample::DataSample{CTX,EX,F,S,C}
+) where {CTX,EX,F,S<:AbstractArray,C}
+    return objective_value(bench, sample, sample.y)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Check if the benchmark is a minimization problem.
+
+Defaults to `true`. **Maximization benchmarks must override this method**, forgetting to do
+so will cause `compute_gap` to compute the gap with the wrong sign without any error or warning.
+"""
+function is_minimization_problem(::AbstractBenchmark)
+    return true
+end
+
+"""
+$TYPEDSIGNATURES
+
+Default implementation of [`compute_gap`](@ref): average relative optimality gap over `dataset`.
+Requires labeled samples (`y ≠ nothing`), `x`, and `context` fields.
+Override for custom evaluation logic.
+"""
+function compute_gap(
+    bench::AbstractBenchmark,
+    dataset::AbstractVector{<:DataSample{<:Any,<:Any,<:Any,<:AbstractArray}},
+    statistical_model,
+    maximizer,
+    op=mean,
+)
+    check = is_minimization_problem(bench)
+
+    return op(
+        map(dataset) do sample
+            target_obj = objective_value(bench, sample)
+            x = sample.x
+            θ = statistical_model(x)
+            y = maximizer(θ; sample.context...)
+            obj = objective_value(bench, sample, y)
+            Δ = check ? obj - target_obj : target_obj - obj
+            return Δ / abs(target_obj)
+        end,
+    )
+end
diff --git a/src/Utils/interface/dynamic_benchmark.jl b/src/Utils/interface/dynamic_benchmark.jl
new file mode 100644
index 0000000..6b8c68d
--- /dev/null
+++ b/src/Utils/interface/dynamic_benchmark.jl
@@ -0,0 +1,135 @@
+"""
+$TYPEDEF
+
+Abstract type interface for multi-stage stochastic (dynamic) benchmark problems.
+The `{exogenous}` parameter has the same meaning (whether uncertainty is independent
+of decisions) as in [`AbstractStochasticBenchmark`](@ref).
+
+# Primary entry point
+- [`generate_environments`](@ref)`(bench, n; rng)`: mandatory (or implement
+  [`generate_environment`](@ref)`(bench, rng)`). The count-based default calls
+  [`generate_environment`](@ref) once per environment.
+
+# Additional optional methods
+- [`generate_environment`](@ref)`(bench, rng)`: initialize a single rollout environment.
+  Must return an [`AbstractEnvironment`](@ref) (see `environment.jl` for the full protocol:
+  [`reset!`](@ref), [`observe`](@ref), [`step!`](@ref), [`is_terminated`](@ref)).
+  Implement this instead of overriding [`generate_environments`](@ref) when environments
+  can be drawn independently.
+- [`generate_baseline_policies`](@ref)`(bench)`: returns named baseline callables of
+  signature `(env) -> Vector{DataSample}` (full trajectory rollout).
+- [`generate_anticipative_solver`](@ref)`(bench)`: returns a callable
+  `(env; reset_env=true, kwargs...) -> Vector{DataSample}` that runs the anticipative solver over a full episode. `reset_env=true` resets the environment
+  before solving. `reset_env=false` starts from the current state.
+- [`generate_dataset`](@ref)`(bench, environments; target_policy, ...)`: generates
+  training-ready [`DataSample`](@ref)s by calling `target_policy(env)` for each environment.
+  Requires `target_policy` as a mandatory keyword argument.
+
+# Optional visualization methods (require `Plots` to be loaded)
+- [`plot_trajectory`](@ref)`(bench, traj)`: plot a full episode as a grid of subplots.
+- [`animate_trajectory`](@ref)`(bench, traj)`: animate a full episode.
+"""
+abstract type AbstractDynamicBenchmark{exogenous} <: AbstractBenchmark end
+
+is_exogenous(::AbstractDynamicBenchmark{exogenous}) where {exogenous} = exogenous
+is_endogenous(::AbstractDynamicBenchmark{exogenous}) where {exogenous} = !exogenous
+
+"""
+$TYPEDSIGNATURES
+
+Intercepts accidental calls to the default `compute_gap` on dynamic benchmarks and throws a
+descriptive error. Dynamic benchmarks do not have a generic single-sample gap computation;
+override `compute_gap` directly on the concrete type if needed.
+"""
+function compute_gap(bench::AbstractDynamicBenchmark, args...; kwargs...)
+    return error(
+        "`compute_gap` is not supported for dynamic benchmarks ($(typeof(bench))). " *
+        "Override `compute_gap` on the concrete type with trajectory-based evaluation logic.",
+    )
+end
+
+"Alias for [`AbstractDynamicBenchmark`](@ref)`{true}`. Uncertainty is independent of decisions."
+const ExogenousDynamicBenchmark = AbstractDynamicBenchmark{true}
+
+"Alias for [`AbstractDynamicBenchmark`](@ref)`{false}`. Uncertainty depends on decisions."
+const EndogenousDynamicBenchmark = AbstractDynamicBenchmark{false}
+
+"""
+    generate_environment(::AbstractDynamicBenchmark, rng::AbstractRNG; kwargs...) -> AbstractEnvironment
+
+Initialize a single environment for the given dynamic benchmark.
+Primary implementation target for the count-based [`generate_environments`](@ref) default.
+Override [`generate_environments`](@ref) directly when environments cannot be drawn
+independently (e.g. loading from files).
+"""
+function generate_environment end
+
+"""
+$TYPEDSIGNATURES
+
+Generate `n` environments for the given dynamic benchmark.
+Primary entry point for dynamic training algorithms.
+Override when environments cannot be drawn independently (e.g. loading from files).
+"""
+function generate_environments(
+    bench::AbstractDynamicBenchmark,
+    n::Int;
+    seed=nothing,
+    rng=MersenneTwister(seed),
+    kwargs...,
+)
+    return [generate_environment(bench, rng; kwargs...) for _ in 1:n]
+end
+
+"""
+$TYPEDSIGNATURES
+
+Generate a training dataset from pre-built environments for an exogenous dynamic benchmark.
+
+For each environment, calls `target_policy(env)` to obtain a training trajectory
+(`Vector{DataSample}`). The trajectories are concatenated into a flat dataset.
+
+`target_policy` is a **required** keyword argument. Use [`generate_baseline_policies`](@ref)
+to obtain standard baseline callables (e.g. the anticipative solver).
+
+# Keyword arguments
+- `target_policy`: **required** callable `(env) -> Vector{DataSample}`.
+"""
+function generate_dataset(
+    bench::ExogenousDynamicBenchmark, environments::AbstractVector; target_policy, kwargs...
+)
+    isempty(environments) && return DataSample[]
+    return reduce(vcat, (target_policy(env) for env in environments))
+end
+
+"""
+$TYPEDSIGNATURES
+
+Convenience wrapper for exogenous dynamic benchmarks: generates `n` environments
+via [`generate_environments`](@ref), then calls
+[`generate_dataset`](@ref)`(bench, environments; target_policy, ...)`.
+
+`target_policy` is a **required** keyword argument.
+"""
+function generate_dataset(
+    bench::ExogenousDynamicBenchmark, n::Int; target_policy, seed=nothing, kwargs...
+)
+    environments = generate_environments(bench, n; seed)
+    return generate_dataset(bench, environments; target_policy, kwargs...)
+end
+
+"""
+    plot_trajectory(bench::AbstractDynamicBenchmark, trajectory::Vector{<:DataSample}; kwargs...)
+
+Plot a full dynamic episode as a grid of state/decision subplots.
+Only available when `Plots` is loaded.
+"""
+function plot_trajectory end
+
+"""
+    animate_trajectory(bench::AbstractDynamicBenchmark, trajectory::Vector{<:DataSample}; kwargs...)
+
+Animate a full dynamic episode. Returns a `Plots.Animation` object
+(save with `gif(result, filename)`). Only available when `Plots` is loaded.
+"""
+function animate_trajectory end
diff --git a/src/Utils/interface/static_benchmark.jl b/src/Utils/interface/static_benchmark.jl
new file mode 100644
index 0000000..d809e05
--- /dev/null
+++ b/src/Utils/interface/static_benchmark.jl
@@ -0,0 +1,79 @@
+"""
+$TYPEDEF
+
+Abstract type interface for static benchmark problems.
+
+# Mandatory methods to implement for any static benchmark:
+Choose one of three primary implementation strategies:
+- Implement [`generate_instance`](@ref) (returns a [`DataSample`](@ref) with `y=nothing`).
+  The default [`generate_sample`](@ref) forwards the call directly; [`generate_dataset`](@ref)
+  applies `target_policy` afterwards if provided.
+- Override [`generate_sample`](@ref) directly when the sample requires custom logic
+  that cannot be expressed via [`generate_instance`](@ref). Applies to static benchmarks
+  only, stochastic benchmarks should implement the finer-grained hooks instead
+  ([`generate_instance`](@ref), [`generate_context`](@ref), [`generate_scenario`](@ref)).
+  [`generate_dataset`](@ref) applies `target_policy` to the result after the call returns.
+- Override [`generate_dataset`](@ref) directly when samples cannot be drawn independently.
+
+Also implement:
+- [`generate_statistical_model`](@ref)
+- [`generate_maximizer`](@ref)
+
+# Optional methods (defaults provided)
+- [`is_minimization_problem`](@ref): defaults to `true`
+- [`compute_gap`](@ref): default implementation provided; override for custom evaluation
+- [`has_visualization`](@ref): defaults to `false`
+
+# Mandatory methods (no default)
+- [`objective_value`](@ref)`(bench, sample, y)`: must be implemented by every static benchmark
+
+# Optional methods (no default, require `Plots` to be loaded)
+- [`plot_instance`](@ref), [`plot_solution`](@ref)
+- [`generate_baseline_policies`](@ref)
+"""
+abstract type AbstractStaticBenchmark <: AbstractBenchmark end
+
+"""
+    generate_sample(::AbstractStaticBenchmark, rng::AbstractRNG; kwargs...) -> DataSample
+
+Generate a single [`DataSample`](@ref) for the benchmark.
+
+**Default** (when [`generate_instance`](@ref) is implemented):
+Calls [`generate_instance`](@ref) and returns the result directly.
+
+Override this method when sample generation requires custom logic. Labeling via
+`target_policy` is always applied by [`generate_dataset`](@ref) after this call returns.
+
+!!! note
+    This is an internal hook called by [`generate_dataset`](@ref). Prefer calling
+    [`generate_dataset`](@ref) rather than this method directly.
+"""
+function generate_sample(bench::AbstractStaticBenchmark, rng; kwargs...)
+    return generate_instance(bench, rng; kwargs...)
+end
+
+"""
+    generate_dataset(::AbstractStaticBenchmark, dataset_size::Int; target_policy=nothing, kwargs...) -> Vector{<:DataSample}
+
+Generate a `Vector` of [`DataSample`](@ref) of length `dataset_size` for given benchmark.
+Content of the dataset can be visualized using [`plot_solution`](@ref), when it applies.
+
+By default, it uses [`generate_sample`](@ref) to create each sample in the dataset, and passes any
+keyword arguments to it. `target_policy` is applied if provided, it is called on each sample
+after [`generate_sample`](@ref) returns.
+"""
+function generate_dataset(
+    bench::AbstractStaticBenchmark,
+    dataset_size::Int;
+    target_policy=nothing,
+    seed=nothing,
+    rng=MersenneTwister(seed),
+    kwargs...,
+)
+    return [
+        begin
+            sample = generate_sample(bench, rng; kwargs...)
+            isnothing(target_policy) ? sample : target_policy(sample)
+        end for _ in 1:dataset_size
+    ]
+end
diff --git a/src/Utils/interface/stochastic_benchmark.jl b/src/Utils/interface/stochastic_benchmark.jl
new file mode 100644
index 0000000..5b2c8f2
--- /dev/null
+++ b/src/Utils/interface/stochastic_benchmark.jl
@@ -0,0 +1,349 @@
+"""
+$TYPEDEF
+
+Abstract type interface for single-stage stochastic benchmark problems.
+
+A stochastic benchmark separates the problem into an **instance** (the
+context known before the scenario is revealed) and a **random scenario** (the uncertain
+part). Decisions are taken by seeing only the instance. Scenarios are used to generate
+anticipative targets and compute objective values.
+
+# Required methods ([`ExogenousStochasticBenchmark`](@ref) only)
+- [`generate_instance`](@ref)`(bench, rng)`: returns a [`DataSample`](@ref) with the
+  problem instance (solver kwargs) and, if not overriding [`generate_context`](@ref),
+  the ML features `x`. Scenarios are added later by [`generate_dataset`](@ref) via
+  [`generate_scenario`](@ref). When [`generate_context`](@ref) is overridden, `x` may
+  be absent here and constructed there instead.
+- [`generate_scenario`](@ref)`(bench, rng; kwargs...)`: draws a random scenario.
+  Solver kwargs are spread from `ctx.context`.
+
+# Optional methods
+- [`generate_context`](@ref)`(bench, rng, instance_sample)`: enriches the instance with
+  observable context (default: identity). Override for contextual stochastic benchmarks.
+- [`generate_anticipative_solver`](@ref)`(bench)`: returns a callable
+  `(scenario; kwargs...) -> y` that computes the anticipative solution per scenario.
+- [`generate_parametric_anticipative_solver`](@ref)`(bench)`: returns a callable
+  `(θ, scenario; kwargs...) -> y` for the parametric anticipative subproblem
+  `argmin_{y ∈ Y} c(y, scenario) + θᵀy`.
+
+# Dataset generation (exogenous only)
+[`generate_dataset`](@ref) is specialised for [`ExogenousStochasticBenchmark`](@ref) and
+supports all three standard structures via `nb_scenarios` and `contexts_per_instance`:
+
+| Setting | Call |
+|---------|------|
+| 1 instance with K scenarios  | `generate_dataset(bench, 1; nb_scenarios=K)` |
+| N instances with 1 scenario  | `generate_dataset(bench, N)` (default) |
+| N instances with K scenarios | `generate_dataset(bench, N; nb_scenarios=K)` |
+| N instances with M contexts × K scenarios | `generate_dataset(bench, N; contexts_per_instance=M, nb_scenarios=K)` |
+
+By default (no `target_policy`), each [`DataSample`](@ref) has `context` holding
+the solver kwargs and `extra=(; scenario)` holding one scenario.
+
+Provide a `target_policy(ctx_sample, scenarios) -> Vector{DataSample}`
+to compute labels. This covers both anticipative (K samples, one per scenario) and SAA
+(1 sample from all K scenarios) labeling strategies.
+"""
+abstract type AbstractStochasticBenchmark{exogenous} <: AbstractBenchmark end
+
+is_exogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = exogenous
+is_endogenous(::AbstractStochasticBenchmark{exogenous}) where {exogenous} = !exogenous
+
+"Alias for [`AbstractStochasticBenchmark`](@ref)`{true}`. Uncertainty is independent of decisions."
+const ExogenousStochasticBenchmark = AbstractStochasticBenchmark{true}
+
+"Alias for [`AbstractStochasticBenchmark`](@ref)`{false}`. Uncertainty depends on decisions."
+const EndogenousStochasticBenchmark = AbstractStochasticBenchmark{false}
+
+"""
+    generate_scenario(::ExogenousStochasticBenchmark, rng::AbstractRNG; kwargs...) -> scenario
+
+Draw a random scenario. Solver kwargs are passed as keyword arguments spread from
+`sample.context`:
+
+    ξ = generate_scenario(bench, rng; ctx.context...)
+"""
+function generate_scenario end
+
+"""
+    generate_context(bench::AbstractStochasticBenchmark, rng, instance_sample::DataSample)
+        -> DataSample
+
+Enrich `instance_sample` with observable context drawn from `rng`.
+Returns a new `DataSample` extending the instance: `.x` is the final ML feature vector
+(possibly augmented with context features). Any latent fields needed by
+[`generate_scenario`](@ref) must go into `.context` (they are spread as kwargs via
+`ctx.context...`), not into `.extra`.
+
+**Default**: returns `instance_sample` unchanged — no context augmentation.
+Non-contextual benchmarks (e.g. SVS) use this default.
+
+**Override** to add per-sample context features:
+```julia
+function generate_context(bench::MyBench, rng, instance_sample::DataSample)
+    x_raw = randn(rng, Float32, bench.d)
+    return DataSample(;
+        x=vcat(instance_sample.x, x_raw),
+        instance_sample.context...,
+        x_raw,
+    )
+end
+```
+Fields in `.context` are spread into [`generate_scenario`](@ref) as kwargs.
+"""
+function generate_context(::AbstractStochasticBenchmark, rng, instance_sample::DataSample)
+    return instance_sample
+end
+
+"""
+    generate_anticipative_solver(::AbstractBenchmark) -> callable
+
+Return a callable that computes the anticipative (oracle) solution.
+The calling convention differs by benchmark category:
+
+**Stochastic benchmarks** ([`AbstractStochasticBenchmark`](@ref)):
+Returns `(scenario; context...) -> y`.
+Called once per scenario to obtain the optimal label.
+
+**Dynamic benchmarks** ([`AbstractDynamicBenchmark`](@ref)):
+Returns `(env; reset_env=true, kwargs...) -> Vector{DataSample}`, a full trajectory.
+`reset_env=true` resets the environment before solving (used for initial dataset building);
+`reset_env=false` starts from the current environment state (used inside DAgger rollouts).
+"""
+function generate_anticipative_solver end
+
+"""
+    objective_value(::ExogenousStochasticBenchmark, sample::DataSample, y, scenario) -> Real
+
+Compute the objective value of solution `y` for a given `scenario`.
+Must be implemented by each concrete [`ExogenousStochasticBenchmark`](@ref).
+
+This is the primary evaluation hook for stochastic benchmarks. The 2-arg fallback
+`objective_value(bench, sample, y)` dispatches here using the scenario stored in
+`sample.extra.scenario` (or averages over `sample.extra.scenarios`).
+"""
+function objective_value end
+
+"""
+    generate_parametric_anticipative_solver(::ExogenousStochasticBenchmark) -> callable
+
+**Optional.** Return a callable `(θ, scenario; kwargs...) -> y` that solves the
+parametric anticipative subproblem:
+
+    argmin_{y ∈ Y(instance)}  c(y, scenario) + θᵀy
+"""
+function generate_parametric_anticipative_solver end
+
+"""
+$TYPEDSIGNATURES
+
+Default [`generate_sample`](@ref) for exogenous stochastic benchmarks.
+
+Calls [`generate_instance`](@ref), then [`generate_context`](@ref) (default: identity),
+draws scenarios via [`generate_scenario`](@ref), then:
+- Without `target_policy`: returns M×K unlabeled samples (`contexts_per_instance` contexts ×
+  `nb_scenarios` scenarios each), each with one scenario in `extra=(; scenario=ξ)`.
+- With `target_policy`: calls `target_policy(ctx_sample, scenarios)`
+  per context and returns the result.
+
+`target_policy(ctx_sample, scenarios) -> Vector{DataSample}` enables
+anticipative labeling (K samples, one per scenario) or SAA (1 sample aggregating all K
+scenarios).
+
+!!! note
+    This is an internal override of [`generate_sample`](@ref) for the stochastic pipeline,
+    called by [`generate_dataset`](@ref). New stochastic benchmarks should implement
+    [`generate_instance`](@ref), [`generate_context`](@ref), and [`generate_scenario`](@ref)
+    rather than overriding this method. Note that the return type is `Vector{DataSample}`
+    (one per context × scenario combination), unlike the base method which returns a
+    single `DataSample`.
+"""
+function generate_sample(
+    bench::ExogenousStochasticBenchmark,
+    rng;
+    target_policy=nothing,
+    nb_scenarios::Int=1,
+    contexts_per_instance::Int=1,
+    kwargs...,
+)
+    instance_sample = generate_instance(bench, rng; kwargs...)
+    return reduce(
+        vcat,
+        (
+            let ctx = generate_context(bench, rng, instance_sample)
+                if isnothing(target_policy)
+                    [
+                        DataSample(;
+                            x=ctx.x,
+                            θ=ctx.θ,
+                            ctx.context...,
+                            extra=(;
+                                ctx.extra...,
+                                scenario=generate_scenario(bench, rng; ctx.context...),
+                            ),
+                        ) for _ in 1:nb_scenarios
+                    ]
+                else
+                    scenarios = [
+                        generate_scenario(bench, rng; ctx.context...) for
+                        _ in 1:nb_scenarios
+                    ]
+                    target_policy(ctx, scenarios)
+                end
+            end for _ in 1:contexts_per_instance
+        ),
+    )
+end
+
+"""
+$TYPEDSIGNATURES
+
+Specialised [`generate_dataset`](@ref) for exogenous stochastic benchmarks.
+
+Generates `nb_instances` problem instances, each with `contexts_per_instance` context draws
+and `nb_scenarios` scenario draws per context. The scenario→sample mapping is controlled
+by the `target_policy`:
+- Without `target_policy` (default): M contexts × K scenarios produce M×K unlabeled
+  samples per instance.
+- With `target_policy(ctx_sample, scenarios) -> Vector{DataSample}`:
+  enables anticipative labeling (K labeled samples) or SAA (1 sample aggregating all K
+  scenarios).
+
+# Keyword arguments
+- `nb_scenarios::Int = 1`: scenarios per context (K).
+- `contexts_per_instance::Int = 1`: context draws per instance (M).
+- `target_policy`: when provided, called as
+  `target_policy(ctx_sample, scenarios)` to compute labels.
+  Defaults to `nothing` (unlabeled samples).
+- `seed`: passed to `MersenneTwister` when `rng` is not provided.
+- `rng`: random number generator; overrides `seed` when provided.
+- `kwargs...`: forwarded to [`generate_sample`](@ref).
+"""
+function generate_dataset(
+    bench::ExogenousStochasticBenchmark,
+    nb_instances::Int;
+    target_policy=nothing,
+    nb_scenarios::Int=1,
+    contexts_per_instance::Int=1,
+    seed=nothing,
+    rng=MersenneTwister(seed),
+    kwargs...,
+)
+    nb_instances == 0 && return DataSample[]
+    return reduce(
+        vcat,
+        (
+            generate_sample(
+                bench, rng; target_policy, nb_scenarios, contexts_per_instance, kwargs...
+            ) for _ in 1:nb_instances
+        ),
+    )
+end
+
+"""
+$TYPEDEF
+
+Transforms an [`ExogenousStochasticBenchmark`](@ref) into a static benchmark via
+Sample Average Approximation (SAA).
+
+For each (instance, context) pair, draws `nb_scenarios` fixed scenarios. These are stored
+in the sample and used for feature computation, target labeling (via `target_policy`),
+and gap evaluation.
+
+!!! note
+    `SampleAverageApproximation <: AbstractStaticBenchmark`, not `AbstractStochasticBenchmark`.
+    This is intentional: after wrapping, the scenarios are fixed at dataset-generation time
+    and the benchmark behaves as a static problem. Functions dispatching on
+    `AbstractStochasticBenchmark` (e.g. `is_exogenous`) will not match SAA instances.
+
+# Fields
+$TYPEDFIELDS
+"""
+struct SampleAverageApproximation{B<:ExogenousStochasticBenchmark} <:
+       AbstractStaticBenchmark
+    "inner stochastic benchmark"
+    benchmark::B
+    "number of scenarios to draw per (instance, context) pair"
+    nb_scenarios::Int
+end
+
+function is_minimization_problem(saa::SampleAverageApproximation)
+    return is_minimization_problem(saa.benchmark)
+end
+
+function generate_maximizer(saa::SampleAverageApproximation; kwargs...)
+    return generate_maximizer(saa.benchmark; kwargs...)
+end
+
+function generate_statistical_model(saa::SampleAverageApproximation; kwargs...)
+    return generate_statistical_model(saa.benchmark; kwargs...)
+end
+
+function generate_sample(
+    saa::SampleAverageApproximation, rng; target_policy=nothing, kwargs...
+)
+    inner = saa.benchmark
+    instance_sample = generate_instance(inner, rng; kwargs...)
+    ctx = generate_context(inner, rng, instance_sample)
+    scenarios = [
+        generate_scenario(inner, rng; ctx.context...) for _ in 1:(saa.nb_scenarios)
+    ]
+    if isnothing(target_policy)
+        return [
+            DataSample(;
+                x=ctx.x, θ=ctx.θ, ctx.context..., extra=(; ctx.extra..., scenarios)
+            ),
+        ]
+    else
+        return target_policy(ctx, scenarios)
+    end
+end
+
+"""
+$TYPEDSIGNATURES
+
+Specialised [`generate_dataset`](@ref) for [`SampleAverageApproximation`](@ref).
+
+- Without `target_policy`: returns one static [`DataSample`](@ref) per instance, with
+  `nb_scenarios` stored in `extra.scenarios`.
+- With `target_policy(ctx_sample, scenarios) -> Vector{DataSample}`:
+  labels each instance using all stored scenarios (same signature as
+  [`ExogenousStochasticBenchmark`](@ref) policies).
+"""
+function generate_dataset(
+    saa::SampleAverageApproximation,
+    nb_instances::Int;
+    target_policy=nothing,
+    seed=nothing,
+    rng=MersenneTwister(seed),
+    kwargs...,
+)
+    nb_instances == 0 && return DataSample[]
+    return reduce(
+        vcat, (generate_sample(saa, rng; target_policy, kwargs...) for _ in 1:nb_instances)
+    )
+end
+
+"""
+$TYPEDSIGNATURES
+
+Evaluate a decision `y` against stored scenarios (average over scenarios).
+"""
+function objective_value(
+    saa::SampleAverageApproximation, sample::DataSample, y::AbstractArray
+)
+    return mean(
+        objective_value(saa.benchmark, sample, y, ξ) for ξ in sample.extra.scenarios
+    )
+end
+
+"""
+$TYPEDSIGNATURES
+
+Evaluate the target solution in the sample against stored scenarios.
+"""
+function objective_value(
+    saa::SampleAverageApproximation, sample::DataSample{CTX,EX,F,S,C}
+) where {CTX,EX,F,S<:AbstractArray,C}
+    return objective_value(saa, sample, sample.y)
+end
diff --git a/src/Warcraft/Warcraft.jl b/src/Warcraft/Warcraft.jl
index 6be07f9..62a24e3 100644
--- a/src/Warcraft/Warcraft.jl
+++ b/src/Warcraft/Warcraft.jl
@@ -22,7 +22,7 @@ $TYPEDEF
 Benchmark for the Warcraft shortest path problem.
 Does not have any field.
 """
-struct WarcraftBenchmark <: AbstractBenchmark end
+struct WarcraftBenchmark <: AbstractStaticBenchmark end
 
 function Utils.objective_value(::WarcraftBenchmark, sample::DataSample, y::AbstractArray)
     return -dot(sample.θ, y)
@@ -36,9 +36,17 @@ Downloads and decompresses the Warcraft dataset the first time it is called.
 !!! warning
     `dataset_size` is capped at 10000, i.e. the number of available samples in the dataset files.
 """
-function Utils.generate_dataset(::WarcraftBenchmark, dataset_size::Int=10)
+function Utils.generate_dataset(
+    ::WarcraftBenchmark,
+    dataset_size::Int=10;
+    target_policy=nothing,
+    seed=nothing,
+    rng=MersenneTwister(seed),
+    kwargs...,
+)
     decompressed_path = datadep"warcraft/data"
-    return create_dataset(decompressed_path, dataset_size)
+    dataset = create_dataset(decompressed_path, dataset_size)
+    return isnothing(target_policy) ? dataset : target_policy.(dataset)
 end
 
 """
diff --git a/test/dynamic_assortment.jl b/test/dynamic_assortment.jl
index 3189f57..93c4f1e 100644
--- a/test/dynamic_assortment.jl
+++ b/test/dynamic_assortment.jl
@@ -340,10 +340,6 @@ end
     # Test maximizer generation
     maximizer = generate_maximizer(b)
 
-    # Test integration with sample data
-    sample = generate_sample(b, MersenneTwister(42))
-    @test hasfield(typeof(sample), :context)
-
     environments = generate_environments(b, 3; seed=42)
 
     # Evaluate policy to get data samples
diff --git a/test/interface.jl b/test/interface.jl
new file mode 100644
index 0000000..ead393e
--- /dev/null
+++ b/test/interface.jl
@@ -0,0 +1,27 @@
+using DecisionFocusedLearningBenchmarks
+using Random
+using Test
+
+@testset "AbstractBenchmark interface" begin
+    struct DummyBenchmark <: AbstractStaticBenchmark end
+    b = DummyBenchmark()
+    rng = MersenneTwister(1234)
+    @test_throws ErrorException generate_instance(b, rng)
+    @test_throws ErrorException generate_maximizer(b)
+    @test_throws ErrorException generate_statistical_model(b; seed=0)
+    @test !has_visualization(b)
+
+    function DecisionFocusedLearningBenchmarks.generate_instance(
+        ::DummyBenchmark, rng::AbstractRNG
+    )
+        return DataSample(; x=rand(rng, 5))
+    end
+
+    dataset = generate_dataset(b, 10; seed=0)
+    @test length(dataset) == 10
+    @test all(x -> length(x.x) == 5, dataset)
+
+    struct DummyDynamicBenchmark <: AbstractDynamicBenchmark{true} end
+    db = DummyDynamicBenchmark()
+    @test_throws ErrorException compute_gap(db)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index f1ba02d..575c875 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -8,6 +8,7 @@ using Random
     end
 
     include("utils.jl")
+    include("interface.jl")
 
     include("argmax.jl")
     include("argmax_2d.jl")
diff --git a/test/vsp.jl b/test/vsp.jl
index fd10c5e..87553a6 100644
--- a/test/vsp.jl
+++ b/test/vsp.jl
@@ -7,6 +7,8 @@
 
     b = StochasticVehicleSchedulingBenchmark(; nb_tasks=25, nb_scenarios=10)
 
+    @test is_exogenous(b)
+
     N = 2
     K = 3