wip

BatyLeo · BatyLeo · commit 77613cba98cc · 2026-01-12T19:41:58.000+01:00
diff --git a/docs/src/tutorials/tutorial.jl b/docs/src/tutorials/tutorial.jl
@@ -39,13 +39,7 @@ metrics = (validation_loss_metric, val_gap_metric, test_gap_metric)
 # Train the model
 fyl_model = deepcopy(model)
 history = train_policy!(
-    algorithm,
-    fyl_model,
-    maximizer,
-    train_instances,
-    validation_instances;
-    epochs=100,
-    metrics=metrics,
+    algorithm, fyl_model, maximizer, train_instances; epochs=100, metrics=metrics
 )
 
 # Plot validation and test gaps
diff --git a/docs/src/tutorials/tutorial.md b/docs/src/tutorials/tutorial.md
@@ -71,8 +71,7 @@ history = train_policy!(
     algorithm,
     fyl_model,
     maximizer,
-    train_instances,
-    validation_instances;
+    train_instances;
     epochs=100,
     metrics=metrics,
 )
diff --git a/scripts/main.jl b/scripts/main.jl
@@ -47,7 +47,7 @@ metrics = (
 
 model = deepcopy(initial_model)
 history = train_policy!(
-    algorithm, model, maximizer, train_dataset, val_dataset; epochs=50, metrics=metrics
+    algorithm, model, maximizer, train_dataset; epochs=50, metrics=metrics
 )
 X_train, Y_train = get(history, :training_loss)
 X_val, Y_val = get(history, :validation_loss)
diff --git a/scripts/old/main.jl b/scripts/old/main.jl
@@ -19,7 +19,7 @@ res = fyl_train_model(StochasticVehicleSchedulingBenchmark(); epochs=100)
 plot(res.validation_loss; label="Validation Loss")
 plot!(res.training_loss; label="Training Loss")
 
-baty_train_model(DynamicVehicleSchedulingBenchmark(; two_dimensional_features=false))
+kleopatra_train_model(DynamicVehicleSchedulingBenchmark(; two_dimensional_features=false))
 DAgger_train_model(DynamicVehicleSchedulingBenchmark(; two_dimensional_features=false))
 
 struct KleopatraPolicy{M}
@@ -79,7 +79,6 @@ dagger_history = DAgger_train_model!(
     dagger_model,
     maximizer,
     train_environments,
-    validation_environments,
     anticipative_policy;
     iterations=10,
     fyl_epochs=10,
diff --git a/src/DecisionFocusedLearningAlgorithms.jl b/src/DecisionFocusedLearningAlgorithms.jl
@@ -18,6 +18,7 @@ include("metrics/accumulators.jl")
 include("metrics/function_metric.jl")
 include("metrics/periodic.jl")
 
+include("algorithms/abstract_algorithm.jl")
 include("algorithms/supervised/fyl.jl")
 include("algorithms/supervised/kleopatra.jl")
 include("algorithms/supervised/dagger.jl")
@@ -32,10 +33,10 @@ export AbstractMetric,
     reset!,
     update!,
     evaluate!,
-    compute,
-    run_metrics!
+    compute!,
+    evaluate_metrics!
 
-export fyl_train_model, baty_train_model, DAgger_train_model!, DAgger_train_model
+export fyl_train_model, kleopatra_train_model, DAgger_train_model!, DAgger_train_model
 export PerturbedImitationAlgorithm, train_policy!
 
 end
diff --git a/src/algorithms/abstract_algorithm.jl b/src/algorithms/abstract_algorithm.jl
@@ -0,0 +1,16 @@
+"""
+$TYPEDEF
+
+An abstract type for decision-focused learning algorithms.
+"""
+abstract type AbstractAlgorithm end
+
+"""
+$TYPEDEF
+
+An abstract type for imitation learning algorithms.
+
+All subtypes must implement:
+- `train_policy!(algorithm::AbstractImitationAlgorithm, model, maximizer, train_data; epochs, metrics)`
+"""
+abstract type AbstractImitationAlgorithm <: AbstractAlgorithm end
diff --git a/src/algorithms/supervised/dagger.jl b/src/algorithms/supervised/dagger.jl
@@ -3,7 +3,6 @@ function DAgger_train_model!(
     model,
     maximizer,
     train_environments,
-    validation_environments,
     anticipative_policy;
     iterations=5,
     fyl_epochs=3,
@@ -16,10 +15,6 @@ function DAgger_train_model!(
         v, y = anticipative_policy(env; reset_env=true)
         return y
     end...)
-    val_dataset = vcat(map(validation_environments) do env
-        v, y = anticipative_policy(env; reset_env=true)
-        return y
-    end...)
 
     dataset = deepcopy(train_dataset)
 
@@ -117,18 +112,12 @@ function DAgger_train_model(b::AbstractStochasticBenchmark{true}; kwargs...)
     dataset = generate_dataset(b, 30)
     train_instances, validation_instances, _ = splitobs(dataset; at=(0.3, 0.3, 0.4))
     train_environments = generate_environments(b, train_instances; seed=0)
-    validation_environments = generate_environments(b, validation_instances)
     model = generate_statistical_model(b)
     maximizer = generate_maximizer(b)
     anticipative_policy =
         (env; reset_env) -> generate_anticipative_solution(b, env; reset_env)
     history = DAgger_train_model!(
-        model,
-        maximizer,
-        train_environments,
-        validation_environments,
-        anticipative_policy;
-        kwargs...,
+        model, maximizer, train_environments, anticipative_policy; kwargs...
     )
     return history, model
 end
diff --git a/src/algorithms/supervised/fyl.jl b/src/algorithms/supervised/fyl.jl
@@ -3,16 +3,32 @@
 # TODO: parallelize loss computation on validation set
 # TODO: have supervised learning training method, where fyl_train calls it, therefore we can easily test new supervised losses if needed
 
-@kwdef struct PerturbedImitationAlgorithm{O,S}
+"""
+$TYPEDEF
+
+Structured imitation learning with a perturbed Fenchel-Young loss.
+
+# Fields
+$TYPEDFIELDS
+"""
+@kwdef struct PerturbedImitationAlgorithm{O,S} <: AbstractImitationAlgorithm
+    "number of perturbation samples"
     nb_samples::Int = 10
+    "perturbation magnitude"
     ε::Float64 = 0.1
+    "whether to use threading for perturbations"
     threaded::Bool = true
+    "optimizer used for training"
     training_optimizer::O = Adam()
+    "random seed for perturbations"
     seed::S = nothing
 end
 
-reset!(algorithm::PerturbedImitationAlgorithm) = empty!(algorithm.history)
+"""
+$TYPEDSIGNATURES
 
+Train a model using the Perturbed Imitation Algorithm on the provided training dataset.
+"""
 function train_policy!(
     algorithm::PerturbedImitationAlgorithm,
     model,
@@ -21,9 +37,7 @@ function train_policy!(
     epochs=100,
     maximizer_kwargs=get_info,
     metrics::Tuple=(),
-    reset=false,
 )
-    reset && reset!(algorithm)
     (; nb_samples, ε, threaded, training_optimizer, seed) = algorithm
     perturbed = PerturbedAdditive(maximizer; nb_samples, ε, threaded, seed)
     loss = FenchelYoungLoss(perturbed)
@@ -32,23 +46,21 @@ function train_policy!(
 
     history = MVHistory()
 
-    train_loss_metric = LossAccumulator(:training_loss)
+    train_loss_metric = FYLLossMetric(train_dataset, :training_loss)
 
-    # Store initial losses (epoch 0)
-    # Epoch 0
-    for sample in train_dataset
-        (; x, y) = sample
-        val = loss(model(x), y; maximizer_kwargs(sample)...)
-        update!(train_loss_metric, val)
-    end
-    push!(history, :training_loss, 0, compute(train_loss_metric))
-    reset!(train_loss_metric)
-
-    # Initial metric evaluation
-    context = TrainingContext(; model=model, epoch=0, maximizer=maximizer, loss=loss)
-    run_metrics!(history, metrics, context)
+    # Initial metric evaluation and training loss (epoch 0)
+    context = TrainingContext(;
+        model=model,
+        epoch=0,
+        maximizer=maximizer,
+        maximizer_kwargs=maximizer_kwargs,
+        loss=loss,
+    )
+    push!(history, :training_loss, 0, evaluate!(train_loss_metric, context))
+    evaluate_metrics!(history, metrics, context)
 
     @showprogress for epoch in 1:epochs
+        next_epoch!(context)
         # Training step
         for sample in train_dataset
             (; x, y) = sample
@@ -59,13 +71,9 @@ function train_policy!(
             update!(train_loss_metric, val)
         end
 
-        # Store training loss
-        push!(history, :training_loss, epoch, compute(train_loss_metric))
-        reset!(train_loss_metric)
-
-        # Evaluate all metrics - update epoch in context
-        context.epoch = epoch
-        run_metrics!(history, metrics, context)
+        # Log metrics
+        push!(history, :training_loss, epoch, compute!(train_loss_metric))
+        evaluate_metrics!(history, metrics, context)
     end
 
     # Plot training loss (or first metric if available)
diff --git a/src/algorithms/supervised/kleopatra.jl b/src/algorithms/supervised/kleopatra.jl
@@ -1,4 +1,4 @@
-function baty_train_model(
+function kleopatra_train_model(
     b::AbstractStochasticBenchmark{true};
     epochs=10,
     metrics::Tuple=(),
@@ -8,7 +8,6 @@ function baty_train_model(
     dataset = generate_dataset(b, 30)
     train_instances, validation_instances, _ = splitobs(dataset; at=(0.3, 0.3))
     train_environments = generate_environments(b, train_instances)
-    validation_environments = generate_environments(b, validation_instances)
 
     # Generate anticipative solutions
     train_dataset = vcat(
@@ -18,11 +17,6 @@ function baty_train_model(
         end...
     )
 
-    val_dataset = vcat(map(validation_environments) do env
-        v, y = generate_anticipative_solution(b, env; reset_env=true)
-        return y
-    end...)
-
     # Initialize model and maximizer
     model = generate_statistical_model(b)
     maximizer = generate_maximizer(b)
@@ -32,12 +26,11 @@ function baty_train_model(
         algorithm,
         model,
         maximizer,
-        train_dataset,
-        val_dataset;
+        train_dataset;
         epochs=epochs,
         metrics=metrics,
         maximizer_kwargs=get_state,
     )
 
     return history, model
-end
+end
diff --git a/src/metrics/accumulators.jl b/src/metrics/accumulators.jl
@@ -20,7 +20,7 @@ for sample in dataset
 end
 
 # Get average and reset
-avg_loss = compute(metric)  # Automatically resets
+avg_loss = compute!(metric)  # Automatically resets
 ```
 
 # See also
@@ -76,7 +76,7 @@ Add a loss value to the accumulator.
 metric = LossAccumulator()
 update!(metric, 1.5)
 update!(metric, 2.0)
-compute(metric)  # Returns 1.75
+compute!(metric)  # Returns 1.75
 ```
 """
 function update!(metric::LossAccumulator, loss_value::Float64)
@@ -101,10 +101,10 @@ Compute the average loss from accumulated values.
 metric = LossAccumulator()
 update!(metric, 1.5)
 update!(metric, 2.5)
-avg = compute(metric)  # Returns 2.0, then resets
+avg = compute!(metric)  # Returns 2.0, then resets
 ```
 """
-function compute(metric::LossAccumulator; reset::Bool=true)
+function compute!(metric::LossAccumulator; reset::Bool=true)
     value = metric.count == 0 ? 0.0 : metric.total_loss / metric.count
     reset && reset!(metric)
     return value
@@ -130,7 +130,7 @@ Can also be used in the algorithms to accumulate loss over training data.
 # Create metric with validation dataset
 val_metric = FYLLossMetric(val_dataset, :validation_loss)
 
-# Evaluate during training (called by run_metrics!)
+# Evaluate during training (called by evaluate_metrics!)
 context = TrainingContext(model=model, epoch=5, maximizer=maximizer, loss=loss)
 avg_loss = evaluate!(val_metric, context)
 ```
@@ -228,19 +228,33 @@ function evaluate!(metric::FYLLossMetric, context::TrainingContext)
     for sample in metric.dataset
         θ = context.model(sample.x)
         y_target = sample.y
-        update!(metric, context.loss, θ, y_target; sample.info...)
+        update!(metric, context.loss, θ, y_target; context.maximizer_kwargs(sample)...)
     end
-    return compute(metric)
+    return compute!(metric)
 end
 
 """
-    compute(metric::FYLLossMetric)
+$TYPEDSIGNATURES
+
+Update the metric with an already-computed loss value. This avoids re-evaluating
+the loss inside the metric when the loss was computed during training.
+
+# Returns
+- `Float64` - The provided loss value
+"""
+function update!(metric::FYLLossMetric, loss_value::Float64)
+    update!(metric.accumulator, loss_value)
+    return loss_value
+end
+
+"""
+    compute!(metric::FYLLossMetric)
 
 Compute the average loss from accumulated values.
 
 # Returns
 - `Float64` - Average loss (or 0.0 if no values accumulated)
 """
-function compute(metric::FYLLossMetric)
-    return compute(metric.accumulator)
+function compute!(metric::FYLLossMetric)
+    return compute!(metric.accumulator)
 end
diff --git a/src/metrics/interface.jl b/src/metrics/interface.jl
@@ -101,14 +101,14 @@ epoch_metric = FunctionMetric(ctx -> ctx.epoch, :current_epoch)
 
 # Evaluate and store
 context = TrainingContext(model=model, epoch=5, maximizer=maximizer)
-run_metrics!(history, (val_loss, epoch_metric), context)
+evaluate_metrics!(history, (val_loss, epoch_metric), context)
 ```
 
 # See also
 - [`AbstractMetric`](@ref)
 - [`evaluate!`](@ref)
 """
-function run_metrics!(history::MVHistory, metrics::Tuple, context::TrainingContext)
+function evaluate_metrics!(history::MVHistory, metrics::Tuple, context::TrainingContext)
     for metric in metrics
         value = evaluate!(metric, context)
         _store_metric_value!(history, metric.name, context.epoch, value)
diff --git a/src/metrics/periodic.jl b/src/metrics/periodic.jl
@@ -11,7 +11,7 @@ $TYPEDFIELDS
 
 # Behavior
 The metric is evaluated when `(epoch - offset) % frequency == 0`.
-On other epochs, `evaluate!` returns `nothing` (which is skipped by `run_metrics!`).
+On other epochs, `evaluate!` returns `nothing` (which is skipped by `evaluate_metrics!`).
 
 # Examples
 ```julia
@@ -31,7 +31,7 @@ final_test = PeriodicMetric(test_metric, 1; offset=100)
 # See also
 - [`FunctionMetric`](@ref)
 - [`evaluate!`](@ref)
-- [`run_metrics!`](@ref)
+- [`evaluate_metrics!`](@ref)
 """
 struct PeriodicMetric{M<:AbstractMetric} <: AbstractMetric
     "the wrapped metric to evaluate periodically"
@@ -94,7 +94,7 @@ Evaluate the wrapped metric only if the current epoch matches the frequency patt
 
 # Returns
 - The result of `evaluate!(pm.metric, context)` if epoch matches the pattern
-- `nothing` otherwise (which is skipped by `run_metrics!`)
+- `nothing` otherwise (which is skipped by `evaluate_metrics!`)
 
 # Examples
 ```julia
diff --git a/src/training_context.jl b/src/training_context.jl
diff --git a/test/dagger.jl b/test/dagger.jl
diff --git a/test/fyl.jl b/test/fyl.jl

Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ metrics = (`
`47`	`47`
`48`	`48`	`model = deepcopy(initial_model)`
`49`	`49`	`history = train_policy!(`
`50`		`- algorithm, model, maximizer, train_dataset, val_dataset; epochs=50, metrics=metrics`
	`50`	`+ algorithm, model, maximizer, train_dataset; epochs=50, metrics=metrics`
`51`	`51`	`)`
`52`	`52`	`X_train, Y_train = get(history, :training_loss)`
`53`	`53`	`X_val, Y_val = get(history, :validation_loss)`