Cleanup train_policy interface and add new version of DFLBenchmarks to Project.toml

BatyLeo · BatyLeo · commit 6a98761f7c4f · 2026-03-27T17:00:05.000+01:00
diff --git a/Project.toml b/Project.toml
@@ -19,14 +19,14 @@ UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
 ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7"
 
 [compat]
-DecisionFocusedLearningBenchmarks = "0.4"
+DecisionFocusedLearningBenchmarks = "0.5.0"
 DocStringExtensions = "0.9.5"
-Flux = "0.16.5"
+Flux = "0.16.9"
 InferOpt = "0.7.1"
 MLUtils = "0.4.8"
 ProgressMeter = "1.11.0"
 Random = "1.11.0"
 Statistics = "1.11.1"
-UnicodePlots = "3.8.1"
-ValueHistories = "0.5.4"
+UnicodePlots = "3.8.2"
+ValueHistories = "0.5.6"
 julia = "1.11"
diff --git a/src/DecisionFocusedLearningAlgorithms.jl b/src/DecisionFocusedLearningAlgorithms.jl
@@ -4,7 +4,7 @@ using DecisionFocusedLearningBenchmarks
 using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
 using Flux: Flux, Adam
 using InferOpt: InferOpt, FenchelYoungLoss, PerturbedAdditive, PerturbedMultiplicative
-using MLUtils: splitobs, DataLoader
+using MLUtils: DataLoader
 using ProgressMeter: @showprogress
 using Random: Random, MersenneTwister
 using Statistics: mean
@@ -39,6 +39,7 @@ export AbstractMetric,
     compute!,
     evaluate_metrics!
 
+export AbstractAlgorithm, AbstractImitationAlgorithm
 export PerturbedFenchelYoungLossImitation,
     DAgger, AnticipativeImitation, train_policy!, train_policy
 export AbstractPolicy, DFLPolicy
diff --git a/src/algorithms/abstract_algorithm.jl b/src/algorithms/abstract_algorithm.jl
@@ -11,6 +11,46 @@ $TYPEDEF
 An abstract type for imitation learning algorithms.
 
 All subtypes must implement:
-- `train_policy!(algorithm::AbstractImitationAlgorithm, model, maximizer, train_data; epochs, metrics)`
+- `train_policy!(algorithm::AbstractImitationAlgorithm, policy::DFLPolicy, train_data; epochs, metrics)`
 """
 abstract type AbstractImitationAlgorithm <: AbstractAlgorithm end
+
+"""
+$TYPEDSIGNATURES
+
+Train a new DFLPolicy on a benchmark using any imitation learning algorithm.
+
+Convenience wrapper that handles dataset generation, model initialization, and policy
+creation. Returns the training history and the trained policy.
+
+For dynamic benchmarks, use the algorithm-specific `train_policy` overload that accepts
+environments and an anticipative policy.
+"""
+function train_policy(
+    algorithm::AbstractImitationAlgorithm,
+    benchmark::AbstractBenchmark;
+    target_policy=nothing,
+    dataset_size=30,
+    epochs=100,
+    metrics::Tuple=(),
+    seed=nothing,
+)
+    dataset = generate_dataset(benchmark, dataset_size; target_policy)
+
+    if any(s -> isnothing(s.y), dataset)
+        error(
+            "Training dataset contains unlabeled samples (y=nothing). " *
+            "Provide a `target_policy` kwarg to label samples during dataset generation.",
+        )
+    end
+
+    model = generate_statistical_model(benchmark; seed)
+    maximizer = generate_maximizer(benchmark)
+    policy = DFLPolicy(model, maximizer)
+
+    history = train_policy!(
+        algorithm, policy, dataset; epochs, metrics, maximizer_kwargs=s -> s.context
+    )
+
+    return history, policy
+end
diff --git a/src/algorithms/supervised/fyl.jl b/src/algorithms/supervised/fyl.jl
@@ -118,46 +118,3 @@ function train_policy!(
         maximizer_kwargs=maximizer_kwargs,
     )
 end
-
-"""
-$TYPEDSIGNATURES
-
-Train a DFLPolicy using the Perturbed Fenchel-Young Loss Imitation Algorithm on a benchmark.
-
-# Benchmark convenience wrapper
-
-This high-level function handles all setup from the benchmark and returns a trained policy.
-"""
-function train_policy(
-    algorithm::PerturbedFenchelYoungLossImitation,
-    benchmark::AbstractBenchmark;
-    target_policy=nothing,
-    dataset_size=30,
-    split_ratio=(0.3, 0.3),
-    epochs=100,
-    metrics::Tuple=(),
-    seed=nothing,
-)
-    # Generate dataset and split
-    dataset = generate_dataset(benchmark, dataset_size; target_policy)
-    train_instances, _, _ = splitobs(dataset; at=split_ratio)
-
-    if any(s -> isnothing(s.y), train_instances)
-        error(
-            "Training dataset contains unlabeled samples (y=nothing). " *
-            "Provide a `target_policy` kwarg to label samples during dataset generation.",
-        )
-    end
-
-    # Initialize model and create policy
-    model = generate_statistical_model(benchmark; seed)
-    maximizer = generate_maximizer(benchmark)
-    policy = DFLPolicy(model, maximizer)
-
-    # Train policy
-    history = train_policy!(
-        algorithm, policy, train_instances; epochs, metrics, maximizer_kwargs=s -> s.context
-    )
-
-    return history, policy
-end
diff --git a/src/metrics/interface.jl b/src/metrics/interface.jl
@@ -79,7 +79,7 @@ Metrics must return a `Number`, a `NamedTuple`, or `nothing`.
 function _store_metric_value!(::MVHistory, metric_name::Symbol, ::Int, value)
     return error(
         "Metric `$metric_name` returned a value of type $(typeof(value)), which cannot " *
-        "be stored in history. Metrics must return a Number, a NamedTuple, or nothing."
+        "be stored in history. Metrics must return a Number, a NamedTuple, or nothing.",
     )
 end
 
diff --git a/test/Project.toml b/test/Project.toml
@@ -15,7 +15,7 @@ DecisionFocusedLearningAlgorithms = {path = ".."}
 
 [compat]
 Aqua = "0.8"
-DecisionFocusedLearningBenchmarks = "0.4"
+DecisionFocusedLearningBenchmarks = "0.5"
 Documenter = "1"
 JuliaFormatter = "1"
 MLUtils = "0.4"

Original file line number	Diff line number	Diff line change
@@ -79,7 +79,7 @@ Metrics must return a `Number`, a `NamedTuple`, or `nothing`.
`79`	`79`	`function _store_metric_value!(::MVHistory, metric_name::Symbol, ::Int, value)`
`80`	`80`	`return error(`
`81`	`81`	"Metric `$metric_name` returned a value of type $(typeof(value)), which cannot " *
`82`		`- "be stored in history. Metrics must return a Number, a NamedTuple, or nothing."`
	`82`	`+ "be stored in history. Metrics must return a Number, a NamedTuple, or nothing.",`
`83`	`83`	`)`
`84`	`84`	`end`
`85`	`85`