saa policy for the contextual argmax

BatyLeo · BatyLeo · commit 9443ac7bc7b2 · 2026-03-25T14:30:10.000+01:00
diff --git a/src/ContextualStochasticArgmax/ContextualStochasticArgmax.jl b/src/ContextualStochasticArgmax/ContextualStochasticArgmax.jl
@@ -1,9 +1,10 @@
 module ContextualStochasticArgmax
 
 using ..Utils
-using DocStringExtensions: TYPEDEF, TYPEDFIELDS
+using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
 using Flux: Dense
 using Random: Random, AbstractRNG, MersenneTwister
+using Statistics: mean
 
 """
 $TYPEDEF
@@ -97,6 +98,8 @@ function Utils.generate_statistical_model(
     return Dense(bench.n + bench.d => bench.n; bias=false)
 end
 
+include("policies.jl")
+
 export ContextualStochasticArgmaxBenchmark
 
 end
diff --git a/src/ContextualStochasticArgmax/policies.jl b/src/ContextualStochasticArgmax/policies.jl
@@ -0,0 +1,30 @@
+using Statistics: mean
+
+"""
+$TYPEDSIGNATURES
+
+SAA baseline policy: returns `argmax(mean(scenarios))`.
+For a linear argmax problem this is the exact SAA-optimal decision.
+Returns a single labeled [`DataSample`](@ref) with `extra=(; scenarios)`.
+"""
+function csa_saa_policy(ctx_sample, scenarios)
+    y = one_hot_argmax(mean(scenarios))
+    return [
+        DataSample(;
+            ctx_sample.maximizer_kwargs...,
+            x=ctx_sample.x,
+            y=y,
+            extra=(; ctx_sample.extra..., scenarios),
+        ),
+    ]
+end
+
+"""
+$TYPEDSIGNATURES
+
+Return the named baseline policies for [`ContextualStochasticArgmaxBenchmark`](@ref).
+Each policy has signature `(ctx_sample, scenarios) -> Vector{DataSample}`.
+"""
+function Utils.generate_baseline_policies(::ContextualStochasticArgmaxBenchmark)
+    return (; saa=Policy("SAA", "argmax of mean scenarios", csa_saa_policy))
+end
diff --git a/test/contextual_stochastic_argmax.jl b/test/contextual_stochastic_argmax.jl
@@ -38,6 +38,18 @@
     @test first(d1).x ≈ first(d2).x
 end
 
+@testset "csa_saa_policy" begin
+    using DecisionFocusedLearningBenchmarks
+
+    b = ContextualStochasticArgmaxBenchmark(; n=5, d=3, seed=0)
+    policies = generate_baseline_policies(b)
+
+    labeled = generate_dataset(b, 3; nb_scenarios=4, target_policy=policies.saa)
+    @test length(labeled) == 3                               # one sample per context (SAA aggregates)
+    @test sum(first(labeled).y) ≈ 1.0                       # one-hot label
+    @test length(first(labeled).extra.scenarios) == 4       # scenarios stored in extra
+end
+
 @testset "SampleAverageApproximation wrapper on ContextualStochasticArgmax" begin
     using DecisionFocusedLearningBenchmarks
     using Statistics: mean