diff --git a/docs/src/api.md b/docs/src/api.md
index 16d3658..0cff868 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -36,6 +36,18 @@ Modules = [DecisionFocusedLearningBenchmarks.Argmax]
 Public = false
 ```
 
+## Contextual Stochastic Argmax
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.ContextualStochasticArgmax]
+Private = false
+```
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.ContextualStochasticArgmax]
+Public = false
+```
+
 ## Dynamic Vehicle Scheduling
 
 ```@autodocs
diff --git a/docs/src/benchmarks/contextual_stochastic_argmax.md b/docs/src/benchmarks/contextual_stochastic_argmax.md
new file mode 100644
index 0000000..59f588f
--- /dev/null
+++ b/docs/src/benchmarks/contextual_stochastic_argmax.md
@@ -0,0 +1,37 @@
+# Contextual Stochastic Argmax
+
+[`ContextualStochasticArgmaxBenchmark`](@ref) is a minimalist contextual stochastic optimization benchmark problem.
+
+The decision maker selects one item out of ``n``. Item values are uncertain at decision time: they depend on a base utility plus a context-correlated perturbation revealed only after the decision is made. An observable context vector, correlated with the perturbation via a fixed linear map ``W``, allows the learner to anticipate the perturbation and pick the right item.
+
+## Problem Formulation
+
+**Instance**: ``c_{\text{base}} \sim \mathcal{U}[0,1]^n``, base values for ``n`` items.
+
+**Context**: ``x_{\text{raw}} \sim \mathcal{N}(0, I_d)``, a ``d``-dimensional signal correlated with item values. The feature vector passed to the model is ``x = [c_{\text{base}};\, x_{\text{raw}}] \in \mathbb{R}^{n+d}``.
+
+**Scenario**: the realized item values are
+```math
+\xi = c_{\text{base}} + W x_{\text{raw}} + \varepsilon, \quad \varepsilon \sim \mathcal{N}(0, \sigma^2 I_n)
+```
+where ``W \in \mathbb{R}^{n \times d}`` is a fixed matrix unknown to the learner.
+
+**Decision**: ``y \in \{e_1, \ldots, e_n\}`` (one-hot vector selecting one item).
+
+## Policies
+
+### DFL Policy
+
+```math
+\xrightarrow[\text{Features}]{x}
+\fbox{Neural network $\varphi_w$}
+\xrightarrow[\text{Predicted values}]{\hat{\theta}}
+\fbox{\texttt{one\_hot\_argmax}}
+\xrightarrow[\text{Decision}]{y}
+```
+
+The neural network predicts item values ``\hat{\theta} \in \mathbb{R}^n`` from the feature vector ``x \in \mathbb{R}^{n+d}``. The default architecture is `Dense(n+d => n; bias=false)`, which can exactly recover the optimal linear predictor ``[I_n \mid W]``, so a well-trained model should reach near-zero gap.
+
+### SAA Policy
+
+``y_{\text{SAA}} = \operatorname{argmax}\bigl(\frac{1}{S}\sum_s \xi^{(s)}\bigr)`` — the exact SAA-optimal decision for linear argmax, accessible via `generate_baseline_policies(bench).saa`.
diff --git a/docs/src/tutorials/warcraft_tutorial.jl b/docs/src/tutorials/warcraft_tutorial.jl
index b801d7a..8b7b8d9 100644
--- a/docs/src/tutorials/warcraft_tutorial.jl
+++ b/docs/src/tutorials/warcraft_tutorial.jl
@@ -30,8 +30,8 @@ x = sample.x
 θ_true = sample.θ
 # `y` correspond to the optimal shortest path, encoded as a binary matrix:
 y_true = sample.y
-# `context` is not used in this benchmark (no solver kwargs needed), so it is empty:
-isempty(sample.context)
+# `maximizer_kwargs` is not used in this benchmark (no solver kwargs needed), so it is empty:
+isempty(sample.maximizer_kwargs)
 
 # For some benchmarks, we provide the following plotting method [`plot_solution`](@ref) to visualize the data:
 plot_solution(b, sample)
diff --git a/ext/DFLBenchmarksPlotsExt.jl b/ext/DFLBenchmarksPlotsExt.jl
index 0a5caae..23fe0d5 100644
--- a/ext/DFLBenchmarksPlotsExt.jl
+++ b/ext/DFLBenchmarksPlotsExt.jl
@@ -21,7 +21,9 @@ Reconstruct a new sample with `y` overridden and delegate to the 2-arg
 function plot_solution(bench::AbstractBenchmark, sample::DataSample, y; kwargs...)
     return plot_solution(
         bench,
-        DataSample(; sample.context..., x=sample.x, θ=sample.θ, y=y, extra=sample.extra);
+        DataSample(;
+            sample.maximizer_kwargs..., x=sample.x, θ=sample.θ, y=y, extra=sample.extra
+        );
         kwargs...,
     )
 end
diff --git a/src/ContextualStochasticArgmax/ContextualStochasticArgmax.jl b/src/ContextualStochasticArgmax/ContextualStochasticArgmax.jl
new file mode 100644
index 0000000..fb27286
--- /dev/null
+++ b/src/ContextualStochasticArgmax/ContextualStochasticArgmax.jl
@@ -0,0 +1,105 @@
+module ContextualStochasticArgmax
+
+using ..Utils
+using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
+using Flux: Dense
+using Random: Random, AbstractRNG, MersenneTwister
+using Statistics: mean
+
+"""
+$TYPEDEF
+
+Minimal contextual stochastic argmax benchmark.
+
+Per instance: `c_base ~ U[0,1]^n` (base utility, stored in `extra` of the instance sample).
+Per context draw: `x_raw ~ N(0, I_d)` (observable context). Features: `x = [c_base; x_raw]`.
+Per scenario: `ξ = c_base + W * x_raw + noise`, `noise ~ N(0, noise_std² I)`.
+The learner sees `x` and must predict `θ̂` so that `argmax(θ̂)` ≈ `argmax(ξ)`.
+
+A linear model `Dense(n+d → n; bias=false)` can exactly recover `[I | W]`.
+
+# Fields
+$TYPEDFIELDS
+"""
+struct ContextualStochasticArgmaxBenchmark{M<:AbstractMatrix} <:
+       AbstractStochasticBenchmark{true}
+    "number of items (argmax dimension)"
+    n::Int
+    "number of context features"
+    d::Int
+    "fixed perturbation matrix W ∈ R^{n×d}, unknown to the learner"
+    W::M
+    "noise std for scenario draws"
+    noise_std::Float32
+end
+
+function ContextualStochasticArgmaxBenchmark(;
+    n::Int=10, d::Int=5, noise_std::Float32=0.1f0, seed=nothing
+)
+    rng = MersenneTwister(seed)
+    W = randn(rng, Float32, n, d)
+    return ContextualStochasticArgmaxBenchmark(n, d, W, noise_std)
+end
+
+Utils.is_minimization_problem(::ContextualStochasticArgmaxBenchmark) = false
+Utils.generate_maximizer(::ContextualStochasticArgmaxBenchmark) = one_hot_argmax
+
+"""
+    generate_instance(::ContextualStochasticArgmaxBenchmark, rng)
+
+Draw `c_base ~ U[0,1]^n` and store it in `extra`. No solver kwargs are needed
+(the maximizer is `one_hot_argmax`, which takes no kwargs).
+"""
+function Utils.generate_instance(
+    bench::ContextualStochasticArgmaxBenchmark, rng::AbstractRNG; kwargs...
+)
+    c_base = rand(rng, Float32, bench.n)
+    return DataSample(; extra=(; c_base))
+end
+
+"""
+    generate_context(::ContextualStochasticArgmaxBenchmark, rng, instance_sample)
+
+Draw `x_raw ~ N(0, I_d)` and return a context sample with:
+- `x = [c_base; x_raw]`: full feature vector seen by the ML model.
+- `extra = (; c_base, x_raw)`: latents spread into [`generate_scenario`](@ref).
+"""
+function Utils.generate_context(
+    bench::ContextualStochasticArgmaxBenchmark,
+    rng::AbstractRNG,
+    instance_sample::DataSample,
+)
+    c_base = instance_sample.c_base
+    x_raw = randn(rng, Float32, bench.d)
+    return DataSample(; x=vcat(c_base, x_raw), extra=(; x_raw, c_base))
+end
+
+"""
+    generate_scenario(::ContextualStochasticArgmaxBenchmark, rng; c_base, x_raw, kwargs...)
+
+Draw `ξ = c_base + W * x_raw + noise`, `noise ~ N(0, noise_std² I)`.
+`c_base` and `x_raw` are spread from `ctx.extra` by the framework.
+"""
+function Utils.generate_scenario(
+    bench::ContextualStochasticArgmaxBenchmark,
+    rng::AbstractRNG;
+    c_base::AbstractVector,
+    x_raw::AbstractVector,
+    kwargs...,
+)
+    θ_true = c_base + bench.W * x_raw
+    return θ_true + bench.noise_std * randn(rng, Float32, bench.n)
+end
+
+function Utils.generate_statistical_model(
+    bench::ContextualStochasticArgmaxBenchmark; seed=nothing
+)
+    Random.seed!(seed)
+    return Dense(bench.n + bench.d => bench.n; bias=false)
+end
+
+include("policies.jl")
+
+export ContextualStochasticArgmaxBenchmark
+
+end
diff --git a/src/ContextualStochasticArgmax/policies.jl b/src/ContextualStochasticArgmax/policies.jl
new file mode 100644
index 0000000..76244ab
--- /dev/null
+++ b/src/ContextualStochasticArgmax/policies.jl
@@ -0,0 +1,30 @@
+using Statistics: mean
+
+"""
+$TYPEDSIGNATURES
+
+SAA baseline policy: returns `argmax(mean(scenarios))`.
+For a linear argmax problem this is the exact SAA-optimal decision.
+Returns a single labeled [`DataSample`](@ref) with `extra=(; scenarios)`.
+"""
+function csa_saa_policy(ctx_sample, scenarios)
+    y = one_hot_argmax(mean(scenarios))
+    return [
+        DataSample(;
+            ctx_sample.maximizer_kwargs...,
+            x=ctx_sample.x,
+            y=y,
+            extra=(; ctx_sample.extra..., scenarios),
+        ),
+    ]
+end
+
+"""
+$TYPEDSIGNATURES
+
+Return the named baseline policies for [`ContextualStochasticArgmaxBenchmark`](@ref).
+Each policy has signature `(ctx_sample, scenarios) -> Vector{DataSample}`.
+"""
+function Utils.generate_baseline_policies(::ContextualStochasticArgmaxBenchmark)
+    return (; saa=Policy("SAA", "argmax of mean scenarios", csa_saa_policy))
+end
diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index f0b4f5b..6dc5db6 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -55,6 +55,7 @@ include("Warcraft/Warcraft.jl")
 include("FixedSizeShortestPath/FixedSizeShortestPath.jl")
 include("PortfolioOptimization/PortfolioOptimization.jl")
 include("StochasticVehicleScheduling/StochasticVehicleScheduling.jl")
+include("ContextualStochasticArgmax/ContextualStochasticArgmax.jl")
 include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl")
 include("DynamicAssortment/DynamicAssortment.jl")
 include("Maintenance/Maintenance.jl")
@@ -71,8 +72,9 @@ export Policy, evaluate_policy!
 
 export generate_instance,
     generate_sample, generate_dataset, generate_environments, generate_environment
-export generate_scenario
+export generate_scenario, generate_context
 export generate_baseline_policies
+export SampleAverageApproximation
 export generate_statistical_model
 export generate_maximizer
 export generate_anticipative_solver, generate_parametric_anticipative_solver
@@ -91,6 +93,7 @@ using .Warcraft
 using .FixedSizeShortestPath
 using .PortfolioOptimization
 using .StochasticVehicleScheduling
+using .ContextualStochasticArgmax
 using .DynamicVehicleScheduling
 using .DynamicAssortment
 using .Maintenance
@@ -106,5 +109,6 @@ export StochasticVehicleSchedulingBenchmark
 export SubsetSelectionBenchmark
 export WarcraftBenchmark
 export MaintenanceBenchmark
+export ContextualStochasticArgmaxBenchmark
 
 end # module DecisionFocusedLearningBenchmarks
diff --git a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
index 2b31618..ca28004 100644
--- a/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
+++ b/src/StochasticVehicleScheduling/StochasticVehicleScheduling.jl
@@ -116,7 +116,7 @@ Returns a [`DataSample`](@ref) with features `x` and `instance` set, but `y=noth
 To obtain labeled samples, pass a `target_policy` to [`generate_dataset`](@ref):
 
 ```julia
-policy = sample -> DataSample(; sample.context..., x=sample.x,
+policy = sample -> DataSample(; sample.maximizer_kwargs..., x=sample.x,
                                 y=column_generation_algorithm(sample.instance))
 dataset = generate_dataset(benchmark, N; target_policy=policy)
 ```
diff --git a/src/StochasticVehicleScheduling/policies.jl b/src/StochasticVehicleScheduling/policies.jl
index d7de1f8..49d6607 100644
--- a/src/StochasticVehicleScheduling/policies.jl
+++ b/src/StochasticVehicleScheduling/policies.jl
@@ -5,10 +5,17 @@ SAA baseline policy: builds a stochastic instance from all K scenarios and solve
 via column generation.
 Returns a single labeled [`DataSample`](@ref) with `extra=(; scenarios)`.
 """
-function svs_saa_policy(sample, scenarios)
-    stochastic_inst = build_stochastic_instance(sample.instance, scenarios)
+function svs_saa_policy(ctx_sample, scenarios)
+    stochastic_inst = build_stochastic_instance(ctx_sample.instance, scenarios)
     y = column_generation_algorithm(stochastic_inst)
-    return [DataSample(; sample.context..., x=sample.x, y, extra=(; scenarios))]
+    return [
+        DataSample(;
+            ctx_sample.maximizer_kwargs...,
+            x=ctx_sample.x,
+            y,
+            extra=(; ctx_sample.extra..., scenarios),
+        ),
+    ]
 end
 
 """
@@ -17,9 +24,16 @@ $TYPEDSIGNATURES
 Deterministic baseline policy: solves the deterministic MIP (ignores scenario delays).
 Returns a single labeled [`DataSample`](@ref) with `extra=(; scenarios)`.
 """
-function svs_deterministic_policy(sample, scenarios; model_builder=highs_model)
-    y = deterministic_mip(sample.instance; model_builder)
-    return [DataSample(; sample.context..., x=sample.x, y, extra=(; scenarios))]
+function svs_deterministic_policy(ctx_sample, scenarios; model_builder=highs_model)
+    y = deterministic_mip(ctx_sample.instance; model_builder)
+    return [
+        DataSample(;
+            ctx_sample.maximizer_kwargs...,
+            x=ctx_sample.x,
+            y,
+            extra=(; ctx_sample.extra..., scenarios),
+        ),
+    ]
 end
 
 """
@@ -29,17 +43,46 @@ Local search baseline policy: builds a stochastic instance from all K scenarios
 solves via local search heuristic.
 Returns a single labeled [`DataSample`](@ref) with `extra=(; scenarios)`.
 """
-function svs_local_search_policy(sample, scenarios)
-    stochastic_inst = build_stochastic_instance(sample.instance, scenarios)
+function svs_local_search_policy(ctx_sample, scenarios)
+    stochastic_inst = build_stochastic_instance(ctx_sample.instance, scenarios)
     y = local_search(stochastic_inst)
-    return [DataSample(; sample.context..., x=sample.x, y, extra=(; scenarios))]
+    return [
+        DataSample(;
+            ctx_sample.maximizer_kwargs...,
+            x=ctx_sample.x,
+            y,
+            extra=(; ctx_sample.extra..., scenarios),
+        ),
+    ]
+end
+
+"""
+$TYPEDSIGNATURES
+
+Exact SAA MIP policy (linearized): solves the stochastic VSP exactly for the given
+scenarios via [`compact_linearized_mip`](@ref).
+Returns a single labeled [`DataSample`](@ref) with `extra=(; scenarios)`.
+
+Prefer this over [`svs_saa_policy`](@ref) when an exact solution is needed; requires
+SCIP (default) or Gurobi.
+"""
+function svs_saa_mip_policy(ctx_sample, scenarios; model_builder=scip_model)
+    y = compact_linearized_mip(ctx_sample.instance, scenarios; model_builder)
+    return [
+        DataSample(;
+            ctx_sample.maximizer_kwargs...,
+            x=ctx_sample.x,
+            y,
+            extra=(; ctx_sample.extra..., scenarios),
+        ),
+    ]
 end
 
 """
 $TYPEDSIGNATURES
 
 Return the named baseline policies for [`StochasticVehicleSchedulingBenchmark`](@ref).
-Each policy has signature `(sample, scenarios) -> Vector{DataSample}`.
+Each policy has signature `(ctx_sample, scenarios) -> Vector{DataSample}`.
 """
 function svs_generate_baseline_policies(::StochasticVehicleSchedulingBenchmark)
     return (;
@@ -47,6 +90,11 @@ function svs_generate_baseline_policies(::StochasticVehicleSchedulingBenchmark)
             "Deterministic MIP", "Ignores delays", svs_deterministic_policy
         ),
         saa=Policy("SAA (col gen)", "Stochastic MIP over K scenarios", svs_saa_policy),
+        saa_mip=Policy(
+            "SAA (exact MIP)",
+            "Exact stochastic MIP over K scenarios via compact linearized formulation",
+            svs_saa_mip_policy,
+        ),
         local_search=Policy(
             "Local search", "Heuristic with K scenarios", svs_local_search_policy
         ),
diff --git a/src/StochasticVehicleScheduling/solution/algorithms/mip.jl b/src/StochasticVehicleScheduling/solution/algorithms/mip.jl
index 10b0b40..70a559e 100644
--- a/src/StochasticVehicleScheduling/solution/algorithms/mip.jl
+++ b/src/StochasticVehicleScheduling/solution/algorithms/mip.jl
@@ -84,6 +84,18 @@ end
 """
 $TYPEDSIGNATURES
 
+SAA variant: build stochastic instance from `scenarios` then solve via
+[`compact_linearized_mip`](@ref).
+"""
+function compact_linearized_mip(
+    instance::Instance, scenarios::Vector{VSPScenario}; kwargs...
+)
+    return compact_linearized_mip(build_stochastic_instance(instance, scenarios); kwargs...)
+end
+
+"""
+$TYPEDSIGNATURES
+
 Returns the optimal solution of the Stochastic VSP instance, by solving the associated compact quadratic MIP.
 Note: If you have Gurobi, use `grb_model` as `model_builder` instead of `highs_model`.
 
@@ -151,3 +163,13 @@ function compact_mip(
     sol = solution_from_JuMP_array(solution, graph)
     return sol.value
 end
+
+"""
+$TYPEDSIGNATURES
+
+SAA variant: build stochastic instance from `scenarios` then solve via
+[`compact_mip`](@ref).
+"""
+function compact_mip(instance::Instance, scenarios::Vector{VSPScenario}; kwargs...)
+    return compact_mip(build_stochastic_instance(instance, scenarios); kwargs...)
+end
diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl
index 3711916..51e0834 100644
--- a/src/Utils/Utils.jl
+++ b/src/Utils/Utils.jl
@@ -22,7 +22,7 @@ include("model_builders.jl")
 
 export DataSample, Policy
 export evaluate_policy!
-export TopKMaximizer
+export TopKMaximizer, one_hot_argmax
 
 export AbstractEnvironment, get_seed, is_terminated, observe, reset!, step!
 
@@ -31,8 +31,9 @@ export ExogenousStochasticBenchmark,
     EndogenousStochasticBenchmark, ExogenousDynamicBenchmark, EndogenousDynamicBenchmark
 export generate_instance, generate_sample, generate_dataset
 export generate_statistical_model, generate_maximizer
-export generate_scenario
+export generate_scenario, generate_context
 export generate_environment, generate_environments
+export SampleAverageApproximation
 export generate_baseline_policies
 export generate_anticipative_solver, generate_parametric_anticipative_solver
 
diff --git a/src/Utils/data_sample.jl b/src/Utils/data_sample.jl
index ccdf2c0..a22963f 100644
--- a/src/Utils/data_sample.jl
+++ b/src/Utils/data_sample.jl
@@ -4,8 +4,8 @@ $TYPEDEF
 Data sample data structure.
 Its main purpose is to store datasets generated by the benchmarks.
 It has 3 main (optional) fields: features `x`, cost parameters `θ`, and solution `y`.
-Additionally, it has a `context` field (solver kwargs, spread into the maximizer as
-`maximizer(θ; sample.context...)`) and an `extra` field (non-solver data, never passed
+Additionally, it has an `maximizer_kwargs` field (solver kwargs, spread into the maximizer as
+`maximizer(θ; sample.maximizer_kwargs...)`) and an `extra` field (non-solver data, never passed
 to the maximizer).
 
 The separation prevents silent breakage from accidentally passing non-solver data
@@ -15,8 +15,8 @@ The separation prevents silent breakage from accidentally passing non-solver dat
 $TYPEDFIELDS
 """
 struct DataSample{
-    CTX<:NamedTuple,
-    EX<:NamedTuple,
+    K<:NamedTuple,
+    E<:NamedTuple,
     F<:Union{AbstractArray,Nothing},
     S<:Union{AbstractArray,Nothing},
     C<:Union{AbstractArray,Nothing},
@@ -27,11 +27,11 @@ struct DataSample{
     θ::C
     "output solution (optional)"
     y::S
-    "context information as solver kwargs, e.g. instance, graph, etc."
-    context::CTX
+    "solver kwargs, e.g. instance, graph, etc."
+    maximizer_kwargs::K
     "additional data, never passed to the maximizer, e.g. scenario, objective value, reward,
     step count, etc. Can be used for any purpose by the user, such as plotting utilities."
-    extra::EX
+    extra::E
 end
 
 """
@@ -39,35 +39,37 @@ $TYPEDSIGNATURES
 
 Constructor for `DataSample` with keyword arguments.
 
-All keyword arguments beyond `x`, `θ`, `y`, and `extra` are collected into the `context`
+All keyword arguments beyond `x`, `θ`, `y`, and `extra` are collected into the `maximizer_kwargs`
 field (solver kwargs). The `extra` keyword accepts a `NamedTuple` of non-solver data.
 
-Fields in `context` and `extra` must be disjoint. An error is thrown if they overlap.
+Fields in `maximizer_kwargs` and `extra` must be disjoint. An error is thrown if they overlap.
 Both can be accessed directly via property forwarding.
 
 # Examples
 ```julia
-# Instance goes in context
+# Instance goes in maximizer_kwargs
 d = DataSample(x=[1,2,3], θ=[4,5,6], y=[7,8,9], instance="my_instance")
-d.instance  # "my_instance" (from context)
+d.instance  # "my_instance" (from maximizer_kwargs)
 
 # Scenario goes in extra
 d = DataSample(x=x, y=y, instance=inst, extra=(; scenario=ξ))
 d.scenario  # ξ (from extra)
 
-# State goes in context, reward in extra
+# State goes in maximizer_kwargs, reward in extra
 d = DataSample(x=x, y=y, instance=state, extra=(; reward=-1.5))
-d.instance  # state (from context)
+d.instance  # state (from maximizer_kwargs)
 d.reward    # -1.5 (from extra)
 ```
 """
 function DataSample(; x=nothing, θ=nothing, y=nothing, extra=NamedTuple(), kwargs...)
-    context = (; kwargs...)
-    overlap = intersect(keys(context), keys(extra))
+    maximizer_kwargs = (; kwargs...)
+    overlap = intersect(keys(maximizer_kwargs), keys(extra))
     if !isempty(overlap)
-        error("Keys $(collect(overlap)) appear in both context and extra of DataSample")
+        error(
+            "Keys $(collect(overlap)) appear in both maximizer_kwargs and extra of DataSample",
+        )
     end
-    return DataSample(x, θ, y, context, extra)
+    return DataSample(x, θ, y, maximizer_kwargs, extra)
 end
 
 """
@@ -75,14 +77,14 @@ $TYPEDSIGNATURES
 
 Extended property access for `DataSample`.
 
-Allows accessing `context` and `extra` fields directly as properties.
-`context` is searched first; if the key is not found there, `extra` is searched.
+Allows accessing `maximizer_kwargs` and `extra` fields directly as properties.
+`maximizer_kwargs` is searched first; if the key is not found there, `extra` is searched.
 """
 function Base.getproperty(d::DataSample, name::Symbol)
-    if name in (:x, :θ, :y, :context, :extra)
+    if name in (:x, :θ, :y, :maximizer_kwargs, :extra)
         return getfield(d, name)
     else
-        ctx = getfield(d, :context)
+        ctx = getfield(d, :maximizer_kwargs)
         if haskey(ctx, name)
             return getproperty(ctx, name)
         end
@@ -94,12 +96,12 @@ end
 $TYPEDSIGNATURES
 
 Return all property names of a `DataSample`, including both struct fields and forwarded
-fields from `context` and `extra`.
+fields from `maximizer_kwargs` and `extra`.
 
 This enables tab completion for all available properties.
 """
 function Base.propertynames(d::DataSample, private::Bool=false)
-    ctx_names = propertynames(getfield(d, :context), private)
+    ctx_names = propertynames(getfield(d, :maximizer_kwargs), private)
     extra_names = propertynames(getfield(d, :extra), private)
     return (fieldnames(DataSample)..., ctx_names..., extra_names...)
 end
@@ -126,7 +128,7 @@ function Base.show(io::IO, d::DataSample)
         y_str = sprint(show, d.y; context=io_limited)
         push!(fields, "y_true=$y_str")
     end
-    for (key, value) in pairs(d.context)
+    for (key, value) in pairs(d.maximizer_kwargs)
         value_str = sprint(show, value; context=io_limited)
         push!(fields, "$key=$value_str")
     end
@@ -154,8 +156,8 @@ Transform the features in the dataset.
 """
 function StatsBase.transform(t, dataset::AbstractVector{<:DataSample})
     return map(dataset) do d
-        (; context, extra, x, θ, y) = d
-        DataSample(StatsBase.transform(t, x), θ, y, context, extra)
+        (; maximizer_kwargs, extra, x, θ, y) = d
+        DataSample(StatsBase.transform(t, x), θ, y, maximizer_kwargs, extra)
     end
 end
 
@@ -177,8 +179,8 @@ Reconstruct the features in the dataset.
 """
 function StatsBase.reconstruct(t, dataset::AbstractVector{<:DataSample})
     return map(dataset) do d
-        (; context, extra, x, θ, y) = d
-        DataSample(StatsBase.reconstruct(t, x), θ, y, context, extra)
+        (; maximizer_kwargs, extra, x, θ, y) = d
+        DataSample(StatsBase.reconstruct(t, x), θ, y, maximizer_kwargs, extra)
     end
 end
 
diff --git a/src/Utils/interface.jl b/src/Utils/interface.jl
index efc9c26..206ca4f 100644
--- a/src/Utils/interface.jl
+++ b/src/Utils/interface.jl
@@ -198,7 +198,7 @@ function compute_gap(
             target_obj = objective_value(bench, sample)
             x = sample.x
             θ = statistical_model(x)
-            y = maximizer(θ; sample.context...)
+            y = maximizer(θ; sample.maximizer_kwargs...)
             obj = objective_value(bench, sample, y)
             Δ = check ? obj - target_obj : target_obj - obj
             return Δ / abs(target_obj)
@@ -221,9 +221,11 @@ anticipative targets and compute objective values.
   and features but **no scenario**. Scenarios are added later by [`generate_dataset`](@ref)
   via [`generate_scenario`](@ref).
 - [`generate_scenario`](@ref)`(bench, rng; kwargs...)`: draws a random scenario.
-  Instance and context fields are passed as keyword arguments spread from `sample.context`.
+  Solver kwargs are spread from `sample.maximizer_kwargs`; context latents from `ctx.extra`.
 
 # Optional methods
+- [`generate_context`](@ref)`(bench, rng, instance_sample)`: enriches the instance with
+  observable context (default: identity). Override for contextual stochastic benchmarks.
 - [`generate_anticipative_solver`](@ref)`(bench)`: returns a callable
   `(scenario; kwargs...) -> y` that computes the anticipative solution per scenario.
 - [`generate_parametric_anticipative_solver`](@ref)`(bench)`: returns a callable
@@ -232,20 +234,21 @@ anticipative targets and compute objective values.
 
 # Dataset generation (exogenous only)
 [`generate_dataset`](@ref) is specialised for [`ExogenousStochasticBenchmark`](@ref) and
-supports all three standard structures via `nb_scenarios`:
+supports all three standard structures via `nb_scenarios` and `nb_contexts`:
 
 | Setting | Call |
 |---------|------|
 | 1 instance with K scenarios  | `generate_dataset(bench, 1; nb_scenarios=K)` |
 | N instances with 1 scenario  | `generate_dataset(bench, N)` (default) |
 | N instances with K scenarios | `generate_dataset(bench, N; nb_scenarios=K)` |
+| N instances with M contexts × K scenarios | `generate_dataset(bench, N; nb_contexts=M, nb_scenarios=K)` |
 
-By default (no `target_policy`), each [`DataSample`](@ref) has `context` holding the
-instance (solver kwargs) and `extra=(; scenario)` holding one scenario.
+By default (no `target_policy`), each [`DataSample`](@ref) has `maximizer_kwargs` holding
+the solver kwargs and `extra=(; scenario)` holding one scenario.
 
-Provide a `target_policy(sample, scenarios) -> Vector{DataSample}` to compute labels.
-This covers both anticipative (K samples, one per scenario) and SAA (1 sample from all K
-scenarios) labeling strategies.
+Provide a `target_policy(ctx_sample, scenarios) -> Vector{DataSample}`
+to compute labels. This covers both anticipative (K samples, one per scenario) and SAA
+(1 sample from all K scenarios) labeling strategies.
 """
 abstract type AbstractStochasticBenchmark{exogenous} <: AbstractBenchmark end
 
@@ -261,13 +264,42 @@ const EndogenousStochasticBenchmark = AbstractStochasticBenchmark{false}
 """
     generate_scenario(::ExogenousStochasticBenchmark, rng::AbstractRNG; kwargs...) -> scenario
 
-Draw a random scenario. Instance and context fields are passed as keyword arguments,
-spread from `sample.context`:
+Draw a random scenario. Solver kwargs are passed as keyword arguments spread from
+`sample.maximizer_kwargs`, and context latents (if any) are spread from `ctx.extra`:
 
-    scenario = generate_scenario(bench, rng; sample.context...)
+    ξ = generate_scenario(bench, rng; ctx.extra..., ctx.maximizer_kwargs...)
 """
 function generate_scenario end
 
+"""
+    generate_context(bench::AbstractStochasticBenchmark, rng, instance_sample::DataSample)
+        -> DataSample
+
+Enrich `instance_sample` with observable context drawn from `rng`.
+Returns a new `DataSample` extending the instance: `.x` is the final ML feature vector
+(possibly augmented with context features) and `.extra` holds any latent context fields
+needed by [`generate_scenario`](@ref).
+
+**Default**: returns `instance_sample` unchanged — no context augmentation.
+Non-contextual benchmarks (e.g. SVS) use this default.
+
+**Override** to add per-sample context features:
+```julia
+function generate_context(bench::MyBench, rng, instance_sample::DataSample)
+    x_raw = randn(rng, Float32, bench.d)
+    return DataSample(;
+        x=vcat(instance_sample.x, x_raw),
+        instance_sample.maximizer_kwargs...,
+        extra=(; x_raw),
+    )
+end
+```
+Fields in `.extra` are spread into [`generate_scenario`](@ref) as kwargs.
+"""
+function generate_context(::AbstractStochasticBenchmark, rng, instance_sample::DataSample)
+    return instance_sample
+end
+
 """
     generate_anticipative_solver(::AbstractBenchmark) -> callable
 
@@ -296,32 +328,51 @@ $TYPEDSIGNATURES
 
 Default [`generate_sample`](@ref) for exogenous stochastic benchmarks.
 
-Calls [`generate_instance`](@ref), draws `nb_scenarios` scenarios via
-[`generate_scenario`](@ref), then:
-- Without `target_policy`: returns K unlabeled samples, each with one scenario in
-  `extra=(; scenario=ξ)`.
-- With `target_policy`: calls `target_policy(sample, scenarios)` and returns the result.
+Calls [`generate_instance`](@ref), then [`generate_context`](@ref) (default: identity),
+draws scenarios via [`generate_scenario`](@ref), then:
+- Without `target_policy`: returns M×K unlabeled samples (`nb_contexts` contexts ×
+  `nb_scenarios` scenarios each), each with one scenario in `extra=(; scenario=ξ)`.
+- With `target_policy`: calls `target_policy(ctx_sample, scenarios)`
+  per context and returns the result.
 
-`target_policy(sample, scenarios) -> Vector{DataSample}` enables anticipative labeling
-(K samples, one per scenario) or SAA (1 sample aggregating all K scenarios).
+`target_policy(ctx_sample, scenarios) -> Vector{DataSample}` enables
+anticipative labeling (K samples, one per scenario) or SAA (1 sample aggregating all K
+scenarios).
 """
 function generate_sample(
     bench::ExogenousStochasticBenchmark,
     rng;
     target_policy=nothing,
     nb_scenarios::Int=1,
+    nb_contexts::Int=1,
     kwargs...,
 )
-    sample = generate_instance(bench, rng; kwargs...)
-    scenarios = [generate_scenario(bench, rng; sample.context...) for _ in 1:nb_scenarios]
-    if isnothing(target_policy)
-        return [
-            DataSample(; x=sample.x, θ=sample.θ, sample.context..., extra=(; scenario=ξ))
-            for ξ in scenarios
-        ]
-    else
-        return target_policy(sample, scenarios)
+    instance_sample = generate_instance(bench, rng; kwargs...)
+    result = DataSample[]
+    for _ in 1:nb_contexts
+        ctx = generate_context(bench, rng, instance_sample)
+        if isnothing(target_policy)
+            for _ in 1:nb_scenarios
+                ξ = generate_scenario(bench, rng; ctx.extra..., ctx.maximizer_kwargs...)
+                push!(
+                    result,
+                    DataSample(;
+                        x=ctx.x,
+                        θ=ctx.θ,
+                        ctx.maximizer_kwargs...,
+                        extra=(; ctx.extra..., scenario=ξ),
+                    ),
+                )
+            end
+        else
+            scenarios = [
+                generate_scenario(bench, rng; ctx.extra..., ctx.maximizer_kwargs...) for
+                _ in 1:nb_scenarios
+            ]
+            append!(result, target_policy(ctx, scenarios))
+        end
     end
+    return result
 end
 
 """
@@ -329,16 +380,21 @@ $TYPEDSIGNATURES
 
 Specialised [`generate_dataset`](@ref) for exogenous stochastic benchmarks.
 
-Generates `nb_instances` problem instances, each with `nb_scenarios` independent
-scenario draws. The scenario→sample mapping is controlled by the `target_policy`:
-- Without `target_policy` (default): K scenarios produce K unlabeled samples (1:1).
-- With `target_policy(sample, scenarios) -> Vector{DataSample}`: enables anticipative
-  labeling (K labeled samples) or SAA (1 sample aggregating all K scenarios).
+Generates `nb_instances` problem instances, each with `nb_contexts` context draws
+and `nb_scenarios` scenario draws per context. The scenario→sample mapping is controlled
+by the `target_policy`:
+- Without `target_policy` (default): M contexts × K scenarios produce M×K unlabeled
+  samples per instance.
+- With `target_policy(ctx_sample, scenarios) -> Vector{DataSample}`:
+  enables anticipative labeling (K labeled samples) or SAA (1 sample aggregating all K
+  scenarios).
 
 # Keyword arguments
-- `nb_scenarios::Int = 1`: scenarios per instance (K).
-- `target_policy`: when provided, called as `target_policy(sample, scenarios)` to
-  compute labels. Defaults to `nothing` (unlabeled samples).
+- `nb_scenarios::Int = 1`: scenarios per context (K).
+- `nb_contexts::Int = 1`: context draws per instance (M).
+- `target_policy`: when provided, called as
+  `target_policy(ctx_sample, scenarios)` to compute labels.
+  Defaults to `nothing` (unlabeled samples).
 - `seed`: passed to `MersenneTwister` when `rng` is not provided.
 - `rng`: random number generator; overrides `seed` when provided.
 - `kwargs...`: forwarded to [`generate_sample`](@ref).
@@ -348,6 +404,7 @@ function generate_dataset(
     nb_instances::Int;
     target_policy=nothing,
     nb_scenarios::Int=1,
+    nb_contexts::Int=1,
     seed=nothing,
     rng=MersenneTwister(seed),
     kwargs...,
@@ -355,7 +412,9 @@ function generate_dataset(
     Random.seed!(rng, seed)
     samples = DataSample[]
     for _ in 1:nb_instances
-        new_samples = generate_sample(bench, rng; target_policy, nb_scenarios, kwargs...)
+        new_samples = generate_sample(
+            bench, rng; target_policy, nb_scenarios, nb_contexts, kwargs...
+        )
         append!(samples, new_samples)
     end
     return samples
@@ -364,6 +423,108 @@ end
 """
 $TYPEDEF
 
+Transforms an [`ExogenousStochasticBenchmark`](@ref) into a static benchmark via
+Sample Average Approximation (SAA).
+
+For each (instance, context) pair, draws `nb_scenarios` fixed scenarios. These are stored
+in the sample and used for feature computation, target labeling (via `target_policy`),
+and gap evaluation.
+
+# Fields
+$TYPEDFIELDS
+"""
+struct SampleAverageApproximation{B<:ExogenousStochasticBenchmark} <: AbstractBenchmark
+    "inner stochastic benchmark"
+    benchmark::B
+    "number of scenarios to draw per (instance, context) pair"
+    nb_scenarios::Int
+end
+
+function is_minimization_problem(saa::SampleAverageApproximation)
+    return is_minimization_problem(saa.benchmark)
+end
+
+function generate_maximizer(saa::SampleAverageApproximation; kwargs...)
+    return generate_maximizer(saa.benchmark; kwargs...)
+end
+
+function generate_statistical_model(saa::SampleAverageApproximation; kwargs...)
+    return generate_statistical_model(saa.benchmark; kwargs...)
+end
+
+function generate_sample(
+    saa::SampleAverageApproximation, rng; target_policy=nothing, kwargs...
+)
+    inner = saa.benchmark
+    instance_sample = generate_instance(inner, rng; kwargs...)
+    ctx = generate_context(inner, rng, instance_sample)
+    scenarios = [
+        generate_scenario(inner, rng; ctx.extra..., ctx.maximizer_kwargs...) for
+        _ in 1:(saa.nb_scenarios)
+    ]
+    if isnothing(target_policy)
+        return [
+            DataSample(;
+                x=ctx.x, ctx.maximizer_kwargs..., extra=(; ctx.extra..., scenarios)
+            ),
+        ]
+    else
+        return target_policy(ctx, scenarios)
+    end
+end
+
+"""
+$TYPEDSIGNATURES
+
+Specialised [`generate_dataset`](@ref) for [`SampleAverageApproximation`](@ref).
+
+- Without `target_policy`: returns one static [`DataSample`](@ref) per instance, with
+  `nb_scenarios` stored in `extra.scenarios`.
+- With `target_policy(ctx_sample, scenarios) -> Vector{DataSample}`:
+  labels each instance using all stored scenarios (same signature as
+  [`ExogenousStochasticBenchmark`](@ref) policies).
+"""
+function generate_dataset(
+    saa::SampleAverageApproximation,
+    nb_instances::Int;
+    target_policy=nothing,
+    seed=nothing,
+    rng=MersenneTwister(seed),
+    kwargs...,
+)
+    Random.seed!(rng, seed)
+    samples = DataSample[]
+    for _ in 1:nb_instances
+        append!(samples, generate_sample(saa, rng; target_policy, kwargs...))
+    end
+    return samples
+end
+
+"""
+$TYPEDSIGNATURES
+
+Evaluate a decision `y` against stored scenarios (average over scenarios).
+"""
+function objective_value(
+    saa::SampleAverageApproximation, sample::DataSample, y::AbstractArray
+)
+    return mean(objective_value(saa.benchmark, ξ, y) for ξ in sample.extra.scenarios)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Evaluate the target solution in the sample against stored scenarios.
+"""
+function objective_value(
+    saa::SampleAverageApproximation, sample::DataSample{CTX,EX,F,S,C}
+) where {CTX,EX,F,S<:AbstractArray,C}
+    return objective_value(saa, sample, sample.y)
+end
+
+"""
+$TYPEDEF
+
 Abstract type interface for multi-stage stochastic (dynamic) benchmark problems.
 
 Extends [`AbstractStochasticBenchmark`](@ref). The `{exogenous}` parameter retains its
diff --git a/src/Utils/maximizers.jl b/src/Utils/maximizers.jl
index ee5ceea..59eb824 100644
--- a/src/Utils/maximizers.jl
+++ b/src/Utils/maximizers.jl
@@ -20,3 +20,14 @@ function (m::TopKMaximizer)(θ; kwargs...)
     res[solution] .= 1
     return res
 end
+
+"""
+$TYPEDSIGNATURES
+
+Return a one-hot encoding of the index of the maximum value in `θ`.
+"""
+function one_hot_argmax(z::AbstractVector{R}; kwargs...) where {R<:Real}
+    e = zeros(R, length(z))
+    e[argmax(z)] = one(R)
+    return e
+end
diff --git a/test/argmax.jl b/test/argmax.jl
index d772c4d..afde005 100644
--- a/test/argmax.jl
+++ b/test/argmax.jl
@@ -24,7 +24,7 @@
         @test size(x) == (nb_features, instance_dim)
         @test length(θ_true) == instance_dim
         @test length(y_true) == instance_dim
-        @test isempty(sample.context)
+        @test isempty(sample.maximizer_kwargs)
         @test all(y_true .== maximizer(θ_true))
 
         θ = model(x)
diff --git a/test/contextual_stochastic_argmax.jl b/test/contextual_stochastic_argmax.jl
new file mode 100644
index 0000000..7fe6061
--- /dev/null
+++ b/test/contextual_stochastic_argmax.jl
@@ -0,0 +1,80 @@
+@testset "ContextualStochasticArgmaxBenchmark" begin
+    using DecisionFocusedLearningBenchmarks
+
+    b = ContextualStochasticArgmaxBenchmark(; n=5, d=3, seed=0)
+
+    # Unlabeled: N instances × M contexts × K scenarios = N*M*K samples
+    dataset = generate_dataset(b, 10; nb_contexts=2, nb_scenarios=4)
+    @test length(dataset) == 80
+    sample = first(dataset)
+    @test size(sample.x) == (8,)                                     # n+d
+    @test sample.x ≈ vcat(sample.c_base, sample.extra.x_raw)         # features = [c_base; x_raw]
+    @test sample.y === nothing
+    @test sample.scenario isa AbstractVector{Float32} && length(sample.scenario) == 5
+
+    # Maximizer and model
+    maximizer = generate_maximizer(b)
+    model = generate_statistical_model(b; seed=0)
+    @test sum(maximizer(sample.scenario)) ≈ 1.0  # one-hot
+
+    # Test with anticipative target_policy
+    policy =
+        (ctx_sample, scenarios) -> [
+            DataSample(;
+                ctx_sample.maximizer_kwargs...,
+                x=ctx_sample.x,
+                y=maximizer(s),
+                extra=(; ctx_sample.extra..., scenario=s),
+            ) for s in scenarios
+        ]
+    labeled = generate_dataset(b, 5; nb_scenarios=3, target_policy=policy)
+    @test length(labeled) == 15
+    @test sum(first(labeled).y) ≈ 1.0    # one-hot label
+
+    # Reproducibility
+    d1 = generate_dataset(b, 5; nb_scenarios=2, seed=42)
+    b2 = ContextualStochasticArgmaxBenchmark(; n=5, d=3, seed=0)
+    d2 = generate_dataset(b2, 5; nb_scenarios=2, seed=42)
+    @test first(d1).x ≈ first(d2).x
+end
+
+@testset "csa_saa_policy" begin
+    using DecisionFocusedLearningBenchmarks
+
+    b = ContextualStochasticArgmaxBenchmark(; n=5, d=3, seed=0)
+    policies = generate_baseline_policies(b)
+
+    labeled = generate_dataset(b, 3; nb_scenarios=4, target_policy=policies.saa)
+    @test length(labeled) == 3                               # one sample per context (SAA aggregates)
+    @test sum(first(labeled).y) ≈ 1.0                       # one-hot label
+    @test length(first(labeled).extra.scenarios) == 4       # scenarios stored in extra
+end
+
+@testset "SampleAverageApproximation wrapper on ContextualStochasticArgmax" begin
+    using DecisionFocusedLearningBenchmarks
+    using Statistics: mean
+
+    inner = ContextualStochasticArgmaxBenchmark(; n=5, d=3, seed=0)
+    saa = SampleAverageApproximation(inner, 20)
+
+    # Static instances: each sample has x and stored scenarios (no θ)
+    dataset = generate_dataset(saa, 10)
+    @test length(dataset) == 10
+    sample = first(dataset)
+    @test size(sample.x) == (8,)
+    @test sample.θ === nothing
+    @test length(sample.extra.scenarios) == 20    # nb_scenarios
+
+    # Label by SAA-optimal: y = argmax(mean(scenarios)) for linear objectives
+    maximizer = generate_maximizer(saa)
+    labeled = map(dataset) do s
+        y_saa = maximizer(mean(s.scenarios))
+        DataSample(; s.maximizer_kwargs..., x=s.x, y=y_saa, extra=s.extra)
+    end
+    @test sum(first(labeled).y) ≈ 1.0
+
+    # compute_gap averages over stored scenarios via objective_value override
+    model = generate_statistical_model(saa; seed=0)
+    gap = compute_gap(saa, labeled, model, maximizer)
+    @test isfinite(gap)
+end
diff --git a/test/dynamic_assortment.jl b/test/dynamic_assortment.jl
index a1a136e..a617170 100644
--- a/test/dynamic_assortment.jl
+++ b/test/dynamic_assortment.jl
@@ -342,7 +342,7 @@ end
 
     # Test integration with sample data
     sample = generate_sample(b, MersenneTwister(42))
-    @test hasfield(typeof(sample), :context)
+    @test hasfield(typeof(sample), :maximizer_kwargs)
 
     environments = generate_environments(b, 3; seed=42)
 
diff --git a/test/fixed_size_shortest_path.jl b/test/fixed_size_shortest_path.jl
index eacdd64..d8a547f 100644
--- a/test/fixed_size_shortest_path.jl
+++ b/test/fixed_size_shortest_path.jl
@@ -25,7 +25,7 @@
         @test size(x) == (p,)
         @test length(θ_true) == A
         @test length(y_true) == A
-        @test isempty(sample.context)
+        @test isempty(sample.maximizer_kwargs)
         @test all(y_true .== maximizer(θ_true))
         θ = model(x)
         @test length(θ) == length(θ_true)
diff --git a/test/maintenance.jl b/test/maintenance.jl
index a2a9983..80e648a 100644
--- a/test/maintenance.jl
+++ b/test/maintenance.jl
@@ -197,7 +197,7 @@ end
 
     # Test integration with sample data
     sample = generate_sample(b, MersenneTwister(42))
-    @test hasfield(typeof(sample), :context)
+    @test hasfield(typeof(sample), :maximizer_kwargs)
 
     environments = generate_environments(b, 3; seed=42)
 
diff --git a/test/portfolio_optimization.jl b/test/portfolio_optimization.jl
index a722c5b..023214f 100644
--- a/test/portfolio_optimization.jl
+++ b/test/portfolio_optimization.jl
@@ -16,7 +16,7 @@
         @test size(x) == (p,)
         @test length(θ_true) == d
         @test length(y_true) == d
-        @test isempty(sample.context)
+        @test isempty(sample.maximizer_kwargs)
         @test all(y_true .== maximizer(θ_true))
 
         θ = model(x)
diff --git a/test/ranking.jl b/test/ranking.jl
index e8e5939..8045cc6 100644
--- a/test/ranking.jl
+++ b/test/ranking.jl
@@ -21,7 +21,7 @@
         @test size(x) == (nb_features, instance_dim)
         @test length(θ_true) == instance_dim
         @test length(y_true) == instance_dim
-        @test isempty(sample.context)
+        @test isempty(sample.maximizer_kwargs)
         @test all(y_true .== maximizer(θ_true))
 
         θ = model(x)
diff --git a/test/runtests.jl b/test/runtests.jl
index 00bea25..f1ba02d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -17,6 +17,7 @@ using Random
     include("maintenance.jl")
     include("warcraft.jl")
     include("vsp.jl")
+    include("contextual_stochastic_argmax.jl")
     include("portfolio_optimization.jl")
 
     @testset "Dynamic Vehicle Scheduling Problem" begin
diff --git a/test/subset_selection.jl b/test/subset_selection.jl
index ba0ff10..da6e677 100644
--- a/test/subset_selection.jl
+++ b/test/subset_selection.jl
@@ -23,7 +23,7 @@
         @test size(x) == (n,)
         @test length(θ_true) == n
         @test length(y_true) == n
-        @test isempty(sample.context)
+        @test isempty(sample.maximizer_kwargs)
         @test all(y_true .== maximizer(θ_true))
 
         # Features and true weights should be equal
diff --git a/test/utils.jl b/test/utils.jl
index 17d3373..9e92457 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -58,7 +58,7 @@ end
     @test occursin("y_true", s)
     @test occursin("instance=\"this is an instance\"", s)
 
-    @test propertynames(sample) == (:x, :θ, :y, :context, :extra, :instance)
+    @test propertynames(sample) == (:x, :θ, :y, :maximizer_kwargs, :extra, :instance)
 
     # Create a dataset for testing
     N = 5
@@ -81,7 +81,7 @@ end
     for i in 1:N
         @test dataset_zt[i].θ == dataset[i].θ
         @test dataset_zt[i].y == dataset[i].y
-        @test dataset_zt[i].context == dataset[i].context
+        @test dataset_zt[i].maximizer_kwargs == dataset[i].maximizer_kwargs
     end
 
     # Check that features are actually transformed
@@ -97,7 +97,7 @@ end
     for i in 1:N
         @test dataset_copy[i].θ == dataset[i].θ
         @test dataset_copy[i].y == dataset[i].y
-        @test dataset_copy[i].context == dataset[i].context
+        @test dataset_copy[i].maximizer_kwargs == dataset[i].maximizer_kwargs
     end
 
     # Test reconstruct (non-mutating)
@@ -109,7 +109,7 @@ end
         @test dataset_reconstructed[i].x ≈ dataset[i].x atol = 1e-10
         @test dataset_reconstructed[i].θ == dataset[i].θ
         @test dataset_reconstructed[i].y == dataset[i].y
-        @test dataset_reconstructed[i].context == dataset[i].context
+        @test dataset_reconstructed[i].maximizer_kwargs == dataset[i].maximizer_kwargs
     end
 
     # Test reconstruct! (mutating)
diff --git a/test/vsp.jl b/test/vsp.jl
index fd10c5e..1a27d36 100644
--- a/test/vsp.jl
+++ b/test/vsp.jl
@@ -91,6 +91,6 @@
 
     anticipative_solver = generate_anticipative_solver(b)
     sample = unlabeled[1]
-    y_anticipative = anticipative_solver(sample.scenario; sample.context...)
+    y_anticipative = anticipative_solver(sample.scenario; sample.maximizer_kwargs...)
     @test y_anticipative isa BitVector
 end
diff --git a/test/warcraft.jl b/test/warcraft.jl
index 4d5f67d..f361eb3 100644
--- a/test/warcraft.jl
+++ b/test/warcraft.jl
@@ -29,7 +29,7 @@
         y_true = sample.y
         @test size(x) == (96, 96, 3, 1)
         @test all(θ_true .<= 0)
-        @test isempty(sample.context)
+        @test isempty(sample.maximizer_kwargs)
 
         θ = model(x)
         @test size(θ) == size(θ_true)