From 75d320bf7f19cd70ea5d53051847871c9e5c8a8c Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Wed, 18 Mar 2026 17:36:15 +0100
Subject: [PATCH 1/2] import spanning tree code from old repository

---
 .../src/benchmarks/two_stage_spanning_tree.md |  28 +++
 .../algorithms/anticipative.jl                |  16 ++
 .../algorithms/benders_decomposition.jl       | 164 ++++++++++++++++
 .../algorithms/column_generation.jl           | 111 +++++++++++
 .../algorithms/cut_generation.jl              | 182 ++++++++++++++++++
 .../algorithms/lagrangian_relaxation.jl       | 117 +++++++++++
 src/TwoStageSpanningTree/instance.jl          |  32 +++
 src/TwoStageSpanningTree/learning/features.jl | 107 ++++++++++
 src/TwoStageSpanningTree/plot.jl              |  80 ++++++++
 src/TwoStageSpanningTree/solution.jl          |  71 +++++++
 src/TwoStageSpanningTree/utils.jl             |  36 ++++
 11 files changed, 944 insertions(+)
 create mode 100644 docs/src/benchmarks/two_stage_spanning_tree.md
 create mode 100644 src/TwoStageSpanningTree/algorithms/anticipative.jl
 create mode 100644 src/TwoStageSpanningTree/algorithms/benders_decomposition.jl
 create mode 100644 src/TwoStageSpanningTree/algorithms/column_generation.jl
 create mode 100644 src/TwoStageSpanningTree/algorithms/cut_generation.jl
 create mode 100644 src/TwoStageSpanningTree/algorithms/lagrangian_relaxation.jl
 create mode 100644 src/TwoStageSpanningTree/instance.jl
 create mode 100644 src/TwoStageSpanningTree/learning/features.jl
 create mode 100644 src/TwoStageSpanningTree/plot.jl
 create mode 100644 src/TwoStageSpanningTree/solution.jl
 create mode 100644 src/TwoStageSpanningTree/utils.jl

diff --git a/docs/src/benchmarks/two_stage_spanning_tree.md b/docs/src/benchmarks/two_stage_spanning_tree.md
new file mode 100644
index 0000000..cde91d8
--- /dev/null
+++ b/docs/src/benchmarks/two_stage_spanning_tree.md
@@ -0,0 +1,28 @@
+# Problem statement
+
+We consider a two-stage stochastic variant of the classic [minimum spanning tree problem](https://en.wikipedia.org/wiki/Minimum_spanning_tree).
+
+Rather than immediately constructing a spanning tree and incurring a cost ``c_e`` for each selected edge in the tree, we instead can build only a partial tree (forest) during the first stage and paying first stage costs ``c_e`` for the selected edges. Then, second stage costs ``d_e`` are revealed and replace first stage costs. The task then involves completing the first stage forest into a spanning tree.
+
+The objective is to minimize the total incurred cost in expectation.
+
+## Instance
+Let ``G = (V,E)`` be an undirected **graph**, and ``S`` be a finite set of **scenarios**.
+
+For each edge ``e`` in ``E``, we have a **first stage cost** ``c_e\in\mathbb{R}``.
+
+For each edge ``e`` in ``E`` and scenario ``s`` in ``S``, we have a **second stage cost** ``d_{es}\in\mathbb{R}``.
+
+# MIP formulation
+Unlike the regular minimum spanning tree problem, this two-stage variant is NP-hard.
+However, it can still be formulated as linear program with binary variables, and exponential number of constraints.
+```math
+\begin{array}{lll}
+\min\limits_{y, z}\, & \sum\limits_{e\in E}c_e y_e + \frac{1}{|S|}\sum\limits_{s \in S}d_{es}z_{es} & \\
+\mathrm{s.t.}\, & \sum\limits_{e\in E}y_e + z_{es} = |V| - 1, & \forall s \in S\\
+& \sum\limits_{e\in E(Y)} y_e + z_{es} \leq |Y| - 1,\quad & \forall \emptyset \subsetneq Y \subsetneq V,\, \forall s\in S\\
+& y_e\in \{0, 1\}, & \forall e\in E\\
+& z_{es}\in \{0, 1\}, & \forall e\in E, \forall s\in S
+\end{array}
+```
+where ``y_e`` is a binary variable indicating if ``e`` is in the first stage solution, and ``z_{es}`` is a binary variable indicating if ``e`` is in the second stage solution for scenario ``s``.
diff --git a/src/TwoStageSpanningTree/algorithms/anticipative.jl b/src/TwoStageSpanningTree/algorithms/anticipative.jl
new file mode 100644
index 0000000..5ca5398
--- /dev/null
+++ b/src/TwoStageSpanningTree/algorithms/anticipative.jl
@@ -0,0 +1,16 @@
+"""
+$TYPEDSIGNATURES
+
+Compute an anticipative solution for given scenario.
+"""
+function anticipative_solution(instance::TwoStageSpanningTreeInstance, scenario::Int=1)
+    (; graph, first_stage_costs, second_stage_costs) = instance
+    scenario_second_stage_costs = @view second_stage_costs[:, scenario]
+    weights = min.(first_stage_costs, scenario_second_stage_costs)
+    (; value, tree) = kruskal(graph, weights)
+
+    slice = first_stage_costs .<= scenario_second_stage_costs
+    y = tree[slice]
+    z = tree[.!slice]
+    return (; value, y, z)
+end
diff --git a/src/TwoStageSpanningTree/algorithms/benders_decomposition.jl b/src/TwoStageSpanningTree/algorithms/benders_decomposition.jl
new file mode 100644
index 0000000..02cd01e
--- /dev/null
+++ b/src/TwoStageSpanningTree/algorithms/benders_decomposition.jl
@@ -0,0 +1,164 @@
+function separate_benders_cut(instance::TwoStageSpanningTreeInstance, y, s; MILP_solver, tol=1e-5)
+	(; graph, second_stage_costs) = instance
+
+	E = ne(graph)
+
+	columns = BitVector[]
+
+	# Feasibility cut
+	model = Model(MILP_solver)
+
+	@variable(model, dummy, Bin)
+
+	@variable(model, νₛ <= 1)
+	@variable(model, 0 <= μₛ[e in 1:E] <= 1)
+
+	@objective(model, Max, νₛ + sum(y[e] * μₛ[e] for e in 1:E))
+
+	function feasibility_callback(cb_data)
+		μ_val = callback_value.(cb_data, μₛ)
+		ν_val = callback_value(cb_data, νₛ)
+
+		weights = -μ_val
+		val, tree = kruskal(graph, weights)
+
+		push!(columns, tree)
+		
+		if val + tol < ν_val
+			new_constraint = @build_constraint(
+				- sum(μₛ[e] for e in 1:E if tree[e]) - νₛ >= 0
+			)
+			MOI.submit(
+				model, MOI.LazyConstraint(cb_data), new_constraint
+			)
+		end
+	end
+
+	set_attribute(model, MOI.LazyConstraintCallback(), feasibility_callback)
+	optimize!(model)
+
+	if objective_value(model) > tol
+		return false, value.(νₛ), value.(μₛ), objective_value(model)
+	end
+	
+	# Else, optimality cut
+	optimality_model = Model(MILP_solver)
+
+	@variable(optimality_model, dummy, Bin)
+
+	@variable(optimality_model, νₛ)
+	@variable(optimality_model, μₛ[e in 1:E] >= 0)
+
+	@objective(
+		optimality_model, Max,
+		νₛ + sum(y[e] * μₛ[e] for e in 1:E) - sum(second_stage_costs[e, s] * y[e] for e in 1:E)
+	)
+
+	for tree in columns
+		@constraint(
+			optimality_model,
+			sum(second_stage_costs[e, s] - μₛ[e] for e in 1:E if tree[e]) >= νₛ
+		)
+	end
+
+	function my_callback_function(cb_data)
+		μ_val = callback_value.(cb_data, μₛ)
+		ν_val = callback_value(cb_data, νₛ)
+
+		weights = second_stage_costs[:, s] .- μ_val
+
+		val, tree = kruskal(graph, weights)
+
+		if val - ν_val + tol < 0
+			new_constraint = @build_constraint(
+				sum(second_stage_costs[e, s] - μₛ[e] for e in 1:E if tree[e]) >= νₛ
+			)
+			MOI.submit(
+				optimality_model, MOI.LazyConstraint(cb_data), new_constraint
+			)
+		end
+	end
+
+	set_attribute(optimality_model, MOI.LazyConstraintCallback(), my_callback_function)
+
+	optimize!(optimality_model)
+
+	# If primal feasible, add an optimality cut
+	@assert termination_status(optimality_model) != DUAL_INFEASIBLE
+	return true, value.(νₛ), value.(μₛ), objective_value(optimality_model)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Returns the optimal solution using a Benders decomposition algorithm.
+"""
+function benders_decomposition(
+    instance::TwoStageSpanningTreeInstance;
+    MILP_solver=GLPK.Optimizer,
+	tol=1e-6,
+	verbose=true
+)
+	(; graph, first_stage_costs, second_stage_costs) = instance
+	E = ne(graph)
+	S = nb_scenarios(instance)
+	
+    model = Model(MILP_solver)
+    @variable(model, y[e in 1:E], Bin)
+    @variable(
+        model,
+        θ[s in 1:S] >= sum(min(0, second_stage_costs[e, s]) for e in 1:E)
+    )
+    @objective(
+        model,
+        Min,
+        sum(first_stage_costs[e] * y[e] for e in 1:E) + sum(θ[s] for s in 1:S) / S
+    )
+
+    # current_scenario = 0
+    callback_counter = 0
+    function benders_callback(cb_data)
+        if callback_counter % 10 == 0
+            verbose && @info("Benders iteration: $(callback_counter)")
+        end
+        callback_counter += 1
+
+        y_val = callback_value.(cb_data, y)
+		θ_val = callback_value.(cb_data, θ)
+
+        for current_scenario in 1:S
+            optimality_cut, ν_val, μ_val =
+				separate_benders_cut(instance, y_val, current_scenario; MILP_solver)
+
+			# If feasibility cut
+            if !optimality_cut
+                new_feasibility_cut = @build_constraint(
+                    ν_val + sum(μ_val[e] * y[e] for e in 1:E) <= 0
+                )
+                MOI.submit(
+                    model,
+					MOI.LazyConstraint(cb_data),
+					new_feasibility_cut
+                )
+
+                return nothing
+            end
+
+			# Else, optimality cut
+			if θ_val[current_scenario] + tol < ν_val + sum(μ_val[e] * y_val[e] for e in 1:E) -
+				sum(second_stage_costs[e, current_scenario] * y_val[e] for e in 1:E)
+				con = @build_constraint(
+					θ[current_scenario] >=
+						ν_val + sum(μ_val[e] * y[e] for e in 1:E) - sum(second_stage_costs[e, current_scenario] * y[e] for e in 1:E)
+				)
+				MOI.submit(model, MOI.LazyConstraint(cb_data), con)
+				return nothing
+			end
+        end
+    end
+
+    set_attribute(model, MOI.LazyConstraintCallback(), benders_callback)
+    optimize!(model)
+
+	return solution_from_first_stage_forest(value.(y) .> 0.5, instance)
+end
diff --git a/src/TwoStageSpanningTree/algorithms/column_generation.jl b/src/TwoStageSpanningTree/algorithms/column_generation.jl
new file mode 100644
index 0000000..7e0330b
--- /dev/null
+++ b/src/TwoStageSpanningTree/algorithms/column_generation.jl
@@ -0,0 +1,111 @@
+"""
+$TYPEDSIGNATURES
+
+Solves the linear relaxation using a column generation algorithm.
+"""
+function column_generation(instance; MILP_solver=GLPK.Optimizer, tol=1e-6, verbose=true)
+	(; graph, first_stage_costs, second_stage_costs) = instance
+
+	V = nv(graph)
+	E = ne(graph)
+	S = nb_scenarios(instance)
+	
+	model = Model(MILP_solver)
+
+	@variable(model, dummy, Bin) # dummy binary variable to activate callbacks
+
+	@variable(model, ν[s in 1:S])
+	@variable(model, μ[e in 1:E, s in 1:S])
+
+	@objective(model, Max, sum(ν[s] for s in 1:S))
+
+	@constraint(
+		model, [e in 1:E, s in 1:S],
+		second_stage_costs[e, s] / S - μ[e, s] >= 0
+	)
+
+	@constraint(
+		model, [e in 1:E],
+		first_stage_costs[e] - sum(μ[e, s] for s in 1:S) >= 0
+	)
+
+	trees = BitVector[]
+
+	for s in 1:S
+		_, dummy_tree = kruskal(graph, min.(first_stage_costs, second_stage_costs[:, s]))
+		@constraint(
+        	model,
+        	ν[s] <= sum(μ[e, s] for e in 1:E if dummy_tree[e])
+    	)
+		push!(trees, dummy_tree)
+	end
+
+	callback_counter = 0
+	function my_callback_function(cb_data)
+		callback_counter += 1
+
+        ν_val = callback_value.(cb_data, ν)
+        μ_val = callback_value.(cb_data, μ)
+
+        for s in 1:S
+            val, T = kruskal(graph, @view μ_val[:, s])
+
+			if val + tol < ν_val[s]
+				push!(trees, T)
+                new_constraint = @build_constraint(
+                    ν[s] <= sum(μ[e, s] for e in 1:E if T[e])
+                )
+                MOI.submit(
+                    model, MOI.LazyConstraint(cb_data), new_constraint
+                )
+            end
+        end
+    end
+
+	set_attribute(model, MOI.LazyConstraintCallback(), my_callback_function)
+
+	optimize!(model)
+	verbose && @info "Optimal solution found after $callback_counter cuts"
+	return (; value=objective_value(model), ν=value.(ν), μ=value.(μ), columns=trees)
+end
+
+function column_heuristic(instance, columns; MILP_solver=GLPK.Optimizer)
+	(; graph, first_stage_costs, second_stage_costs) = instance
+	E = ne(graph)
+	S = nb_scenarios(instance)
+	T = length(columns)
+
+	model = Model(MILP_solver)
+
+	@variable(model, y[e in 1:E], Bin)
+	@variable(model, z[e in 1:E, s in 1:S], Bin)
+
+	@variable(model, λ[t in 1:T, s in 1:S], Bin)
+
+	@objective(
+		model, Min,
+		sum(first_stage_costs[e] * y[e] for e in 1:E) + sum(second_stage_costs[e, s] * z[e, s] for e in 1:E for s in 1:S) / S
+	)
+
+	@constraint(model, [s in 1:S], sum(λ[t, s] for t in 1:T) == 1)
+	@constraint(
+		model, [e in 1:E, s in 1:S],
+		y[e] + z[e, s] == sum(λ[t, s] for t in 1:T if columns[t][e])
+	)
+
+	optimize!(model)
+
+	return TwoStageSpanningTreeSolution(value.(y) .> 0.5, value.(z) .> 0.5)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Column generation heuristic, that solves the linear relaxation and then outputs the solution of the proble restricted to selected columns.
+Returns an heuristic solution.
+"""
+function column_heuristic(instance; MILP_solver=GLPK.Optimizer, verbose=true)
+	(; columns) = column_generation(instance; verbose=false, MILP_solver)
+	verbose && @info "Continuous relaxation solved with $(length(columns)) columns."
+	return column_heuristic(instance, columns)
+end
diff --git a/src/TwoStageSpanningTree/algorithms/cut_generation.jl b/src/TwoStageSpanningTree/algorithms/cut_generation.jl
new file mode 100644
index 0000000..4558e9d
--- /dev/null
+++ b/src/TwoStageSpanningTree/algorithms/cut_generation.jl
@@ -0,0 +1,182 @@
+"""
+$TYPEDSIGNATURES
+
+Solve the separation problem using the MILP formulation.
+"""
+function MILP_separation_problem(graph, weights; MILP_solver, tol=1e-6)
+	V = nv(graph)
+	E = ne(graph)
+
+	model = Model(MILP_solver)
+	set_silent(model)
+
+	@variable(model, α[v in 1:V], Bin)
+	@variable(model, β[e in 1:E], Bin)
+
+	@objective(
+		model, Min,
+		sum(α[v] for v in 1:V) - 1 - sum(weights[e] * β[e] for e in 1:E)
+	)
+
+	@constraint(model, [(e, edge) in enumerate(edges(graph))], 2 * β[e] <= α[dst(edge)] + α[src(edge)])
+
+	@constraint(model, sum(α[v] for v in 1:V) >= 1)
+	@constraint(model, sum(α[v] for v in 1:V) <= V - 1)
+	
+	optimize!(model)
+	found = objective_value(model) + tol <= 0
+	return found, value.(β) .> 0.5, sum(value.(α))
+end
+
+function build_flow_graph(graph, weights; infinity=1e6)
+	V = nv(graph)
+	E = ne(graph)
+	
+	# A = 3 * E + V
+	VV = 2 + E + V
+
+	o = 1
+	d = 2
+
+	sources = vcat(
+		fill(o, E),
+		[2 + e for e in 1:E],
+		[2 + e for e in 1:E],
+		[2 + E + v for v in 1:V]
+	)
+	destinations = vcat(
+		[2 + e for e in 1:E],
+		[2 + E + src(e) for e in edges(graph)],
+		[2 + E + dst(e) for e in edges(graph)],
+		fill(d, V)
+	)
+	costs = vcat(
+		[weights[e] for e in 1:E],
+		fill(infinity, 2 * E),
+		ones(V)
+	)
+
+	return sources, destinations, costs
+end
+
+"""
+$TYPEDSIGNATURES
+
+Solve the separation problem using the min cut formulation.
+"""
+function cut_separation_problem(graph, weights; MILP_solver=GLPK.Optimizer, tol=1e-6)
+	sources, destinations, costs = build_flow_graph(graph, weights)
+
+	A = 3 * ne(graph) + nv(graph)
+	V = 2 + ne(graph) + nv(graph)
+	@assert A == length(costs)
+
+	vertex_range = (2 + ne(graph) + 1):V
+	edge_range = 1:ne(graph)
+
+	model = Model(MILP_solver)
+
+	@variable(model, β[a in 1:A], Bin)
+	@variable(model, α[v in 1:V], Bin)
+
+	@objective(
+		model, Min,
+		sum(costs[a] * β[a] for a in 1:A)
+	)
+
+	@constraint(model, α[1] == 1)
+	@constraint(model, α[2] == 0)
+
+	@constraint(
+		model,
+		[a in 1:A],
+		β[a] >= α[sources[a]] - α[destinations[a]]
+	)
+
+	@constraint(model, sum(α[v] for v in vertex_range) >= 1)
+
+	optimize!(model)
+
+	min_cut_value = objective_value(model)
+
+    found = min_cut_value + tol < nv(graph)
+
+    return found, value.(β)[edge_range] .< 0.5, sum(value.(α)[vertex_range])
+end
+
+"""
+$TYPEDSIGNATURES
+
+Returns the optimal solution using a cut generation algorithm with custom separation problem solver.
+"""
+function cut_generation(
+    instance::TwoStageSpanningTreeInstance;
+	separation_problem=MILP_separation_pb,
+    MILP_solver=GLPK.Optimizer,
+	verbose=true
+)
+	# Unpack fields
+	(; graph, first_stage_costs, second_stage_costs) = instance
+	S = nb_scenarios(instance)
+	E = ne(graph)
+	V = nv(graph)
+
+	# Initialize model and link to solver
+	model = Model(MILP_solver)
+
+	# Add variables
+    @variable(model, y[e in 1:E], Bin)
+    @variable(model, z[e in 1:E, s in 1:S], Bin)
+
+	# Add an objective function
+	@expression(
+		model,
+		first_stage_objective,
+		sum(first_stage_costs[e] * y[e] for e in 1:E)
+	)
+	@expression(
+		model,
+		second_stage_objective,
+		sum(second_stage_costs[e, s] * z[e, s] for e in 1:E for s in 1:S) / S
+	)
+    @objective(model, Min, first_stage_objective + second_stage_objective)
+
+	# Add constraints
+	@constraint(
+		model, [s in 1:S],
+		sum(y[e] + z[e, s] for e in 1:E) == V - 1
+	)
+
+    call_back_counter = 0
+
+    function my_callback_function(cb_data)
+        call_back_counter += 1
+
+        y_val = callback_value.(cb_data, y)
+        z_val = callback_value.(cb_data, z)
+
+        for current_s in 1:S
+			weights = [y_val[e] + z_val[e, current_s] for e in 1:E]
+
+            found, Y, YY = separation_problem(
+				graph, weights; MILP_solver=MILP_solver
+			)
+
+            if found
+                new_constraint = @build_constraint(
+                    sum(y[e] + z[e, current_s] for e in 1:E if Y[e]) <= YY - 1
+                )
+                MOI.submit(
+                    model, MOI.LazyConstraint(cb_data), new_constraint
+                )
+                return
+            end
+        end
+    end
+
+	set_attribute(model, MOI.LazyConstraintCallback(), my_callback_function)
+
+	optimize!(model)
+	verbose && @info "Optimal solution found after $call_back_counter cuts"
+    return TwoStageSpanningTreeSolution(value.(y) .> 0.5, value.(z) .> 0.5)
+end
diff --git a/src/TwoStageSpanningTree/algorithms/lagrangian_relaxation.jl b/src/TwoStageSpanningTree/algorithms/lagrangian_relaxation.jl
new file mode 100644
index 0000000..7e013fa
--- /dev/null
+++ b/src/TwoStageSpanningTree/algorithms/lagrangian_relaxation.jl
@@ -0,0 +1,117 @@
+function first_stage_optimal_solution(inst::TwoStageSpanningTreeInstance, θ::AbstractMatrix; M=20.0)
+	S = nb_scenarios(inst)
+	E = ne(inst.graph)
+
+	# first stage objective value
+    edge_weight_vector = inst.first_stage_costs .+ vec(sum(θ; dims=2)) ./ S
+
+    edges_index_with_negative_cost = [e for e in 1:E if edge_weight_vector[e] < 0]
+
+    value = 0.0
+    if length(edges_index_with_negative_cost) > 0
+        value = sum(M * edge_weight_vector[e] for e in edges_index_with_negative_cost)
+    end
+
+    grad = zeros(E, S)
+	grad[edges_index_with_negative_cost, :] .= M / S
+    return value, grad
+end;
+
+function second_stage_optimal_solution!(
+    instance::TwoStageSpanningTreeInstance,
+    θ::AbstractMatrix,
+    scenario::Int,
+    grad::AbstractMatrix,
+)
+	(; graph, second_stage_costs) = instance
+    S = nb_scenarios(instance)
+
+	weights = min.(-θ[:, scenario], second_stage_costs[:, scenario])
+
+    (; value, tree) = kruskal(graph, weights)
+
+	# update gradient
+	slice = (-θ[:, scenario] .< second_stage_costs[:, scenario]) .&& tree
+	grad[slice, scenario] .-= 1 / S
+
+    return value ./ S
+end;
+
+function lagrangian_function_value_gradient(inst::TwoStageSpanningTreeInstance, θ::AbstractMatrix)
+    value, grad = first_stage_optimal_solution(inst, θ)
+
+	S = nb_scenarios(inst)
+    values = zeros(S)
+    for s in 1:S
+		# Different part of grad are modified
+        values[s] = second_stage_optimal_solution!(inst, θ, s, grad)
+    end
+    value += sum(values)
+    return value, grad
+end;
+
+function lagrangian_heuristic(θ::AbstractMatrix; inst::TwoStageSpanningTreeInstance)
+    # Retrieve - y_{es} / S from θ by computing the gradient
+	(; graph) = inst
+	S = nb_scenarios(inst)
+    grad = zeros(ne(graph), S)
+    for s in 1:S
+        second_stage_optimal_solution!(inst, θ, s, grad)
+    end
+    # Compute the average (over s) y_{es} and build a graph that is a candidate spannning tree (but not necessarily a spanning tree nor a forest)
+    average_x = -vec(sum(grad; dims=2))
+    weights = average_x .> 0.5
+    # Build a spanning tree that contains as many edges of our candidate as possible
+    _, tree_from_candidate = kruskal(graph, weights; minimize=false)
+    # Keep only the edges that are in the initial candidate graph and in the spanning tree
+    forest = weights .&& tree_from_candidate
+    sol = solution_from_first_stage_forest(forest, inst)
+	# v, _ = evaluate_first_stage_solution(inst, forest)
+    return solution_value(sol, inst), forest
+end;
+
+"""
+$TYPEDSIGNATURES
+
+Return an heuristic solution using a combination of lagarngian relaxation and lagrangian heuristic.
+"""
+function lagrangian_relaxation(
+    inst::TwoStageSpanningTreeInstance; nb_epochs=100, stop_gap=1e-8
+)
+    θ = zeros(ne(inst.graph), nb_scenarios(inst))
+
+    opt = Adam()
+
+    lb = -Inf
+    ub = Inf
+    best_theta = θ
+    forest = Edge{Int}[]
+
+    last_ub_epoch = -1000
+
+    lb_history = Float64[]
+    ub_history = Float64[]
+    for epoch in 1:nb_epochs
+        value, grad = lagrangian_function_value_gradient(inst, θ)
+
+        if value > lb
+            lb = value
+            best_theta = θ
+            if epoch > last_ub_epoch + 100
+                last_ub_epoch = epoch
+                ub, forest = lagrangian_heuristic(θ; inst=inst)
+                if (ub - lb) / abs(lb) <= stop_gap
+                    @info "Stopped after $epoch gap smaller than $stop_gap"
+                    break
+                end
+            end
+        end
+        Flux.update!(opt, θ, -grad)
+        push!(lb_history, value)
+        push!(ub_history, ub)
+    end
+
+	ub, forest = lagrangian_heuristic(best_theta; inst=inst)
+    solution = solution_from_first_stage_forest(forest, inst)
+    return solution, (; lb, ub, best_theta, lb_history, ub_history)
+end
diff --git a/src/TwoStageSpanningTree/instance.jl b/src/TwoStageSpanningTree/instance.jl
new file mode 100644
index 0000000..e0671af
--- /dev/null
+++ b/src/TwoStageSpanningTree/instance.jl
@@ -0,0 +1,32 @@
+"""
+$TYPEDEF
+
+# Fields
+$TYPEDFIELDS
+"""
+struct TwoStageSpanningTreeInstance{T}
+    "Graph"
+    graph::SimpleGraph{Int}
+    "First stage costs for each edge"
+    first_stage_costs::Vector{T}
+    "Second stage costs for each edge and scenario [e, s]"
+	second_stage_costs::Matrix{T}
+end
+
+Base.show(io::IO, instance::TwoStageSpanningTreeInstance) = print(io, "$(collect(edges(instance.graph))), $(instance.first_stage_costs), $(instance.second_stage_costs)")
+
+function TwoStageSpanningTreeInstance(; n, m, nb_scenarios=1, c_range=1:20, d_range=1:20, seed=nothing, type=Float64)
+	g = Graphs.grid((n, m))
+	rng = MersenneTwister(seed)
+	c = type[rand(rng, c_range) for _ in 1:ne(g)]
+	d = type[rand(rng, d_range) for _ in 1:ne(g), _ in 1:nb_scenarios]
+
+	return TwoStageSpanningTreeInstance(g, c, d)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Return the number of scenarios of `instance`.
+"""
+nb_scenarios(instance::TwoStageSpanningTreeInstance) = size(instance.second_stage_costs, 2)
diff --git a/src/TwoStageSpanningTree/learning/features.jl b/src/TwoStageSpanningTree/learning/features.jl
new file mode 100644
index 0000000..94dfabf
--- /dev/null
+++ b/src/TwoStageSpanningTree/learning/features.jl
@@ -0,0 +1,107 @@
+
+function first_stage_cost(
+    inst::TwoStageSpanningTreeInstance, e::AbstractEdge, scenario::Int
+)
+    return inst.first_stage_weights_matrix[src(e), dst(e)]
+end
+
+function scenario_second_stage_cost(
+    inst::TwoStageSpanningTreeInstance, e::AbstractEdge, scenario::Int
+)
+    return inst.second_stage_weights[scenario][src(e), dst(e)]
+end
+
+function scenario_best_stage_cost(
+    inst::TwoStageSpanningTreeInstance, e::AbstractEdge, scenario::Int
+)
+    return min(
+        inst.second_stage_weights[scenario][src(e), dst(e)],
+        inst.first_stage_weights_matrix[src(e), dst(e)],
+    )
+end
+
+function edge_neighbors(g::AbstractGraph, e::AbstractEdge)
+    result = Vector{AbstractEdge}()
+    for v in [src(e), dst(e)]
+        for u in neighbors(g, v)
+            push!(result, Edge(min(u, v), max(u, v)))
+        end
+    end
+    return result
+end
+
+function compute_minimum_spanning_tree(
+    inst::TwoStageSpanningTreeInstance, scenario::Int, weight_function
+)
+    weights_vec = zeros(ne(inst.g))
+    for e in edges(inst.g)
+        weights_vec[edge_index(inst, e)] = weight_function(inst, e, scenario)
+    end
+    weights = get_weight_matrix_from_weight_vector(inst.g, inst.edge_index, weights_vec)
+    return kruskal_mst(inst.g, weights)
+end
+
+function compute_first_stage_mst(inst::TwoStageSpanningTreeInstance)
+    tree = compute_minimum_spanning_tree(inst, -1, first_stage_cost)
+    fs_mst_indicator = zeros(ne(inst.g))
+    for e in tree
+        fs_mst_indicator[edge_index(inst, e)] = 1.0
+    end
+    return fs_mst_indicator
+end
+
+function compute_second_stage_mst(inst::TwoStageSpanningTreeInstance)
+    ss_mst_indicator = zeros(ne(inst.g), inst.nb_scenarios)
+    for scenario in 1:(inst.nb_scenarios)
+        tree = compute_minimum_spanning_tree(inst, scenario, scenario_second_stage_cost)
+        for e in tree
+            ss_mst_indicator[edge_index(inst, e), scenario] = 1.0
+        end
+    end
+    return ss_mst_indicator
+end
+
+function compute_best_stage_mst(inst::TwoStageSpanningTreeInstance)
+    bfs_mst_indicator = zeros(ne(inst.g), inst.nb_scenarios)
+    bss_mst_indicator = zeros(ne(inst.g), inst.nb_scenarios)
+    for scenario in 1:(inst.nb_scenarios)
+        tree = compute_minimum_spanning_tree(inst, scenario, scenario_best_stage_cost)
+        for e in tree
+            e_ind = edge_index(inst, e)
+            if inst.first_stage_weights_matrix[src(e), dst(e)] <
+                inst.second_stage_weights[scenario][src(e), dst(e)]
+                bfs_mst_indicator[e_ind, scenario] = 1.0
+            else
+                bss_mst_indicator[e_ind, scenario] = 1.0
+            end
+        end
+    end
+    return bfs_mst_indicator, bss_mst_indicator
+end
+
+function pivot_instance_second_stage_costs(inst::TwoStageSpanningTreeInstance)
+    edgeWeights = zeros(ne(inst.g), inst.nb_scenarios)
+    for s in 1:(inst.nb_scenarios)
+        for e in edges(inst.g)
+            edgeWeights[edge_index(inst, e), s] = inst.second_stage_weights[s][
+                src(e), dst(e)
+            ]
+        end
+    end
+    return edgeWeights
+end
+
+function compute_edge_neighbors(inst::TwoStageSpanningTreeInstance)
+    neighbors = Vector{Vector{Int}}(undef, ne(inst.g))
+    for e in edges(inst.g)
+        e_ind = edge_index(inst, e)
+        neighbors_list = edge_neighbors(inst.g, e)
+        neighbors[e_ind] = Vector{Int}(undef, length(neighbors_list))
+        count = 0
+        for f in neighbors_list
+            count += 1
+            neighbors[e_ind][count] = edge_index(inst, f)
+        end
+    end
+    return neighbors
+end
diff --git a/src/TwoStageSpanningTree/plot.jl b/src/TwoStageSpanningTree/plot.jl
new file mode 100644
index 0000000..e143c01
--- /dev/null
+++ b/src/TwoStageSpanningTree/plot.jl
@@ -0,0 +1,80 @@
+"""
+	plot_grid_graph(graph, n, m, weights=nothing)
+
+# Arguments
+- `graph`: grid graph to plot
+- `n`: n dimension
+- `m`: m dimension
+- `weights`: edge weights to display (optional)
+"""
+function plot_grid_graph(
+	graph, n, m, weights=nothing;
+	show_node_indices=false, δ=0.25, δ₂=0.13,
+	edge_colors=fill(:black, ne(graph)),
+	edge_widths=fill(1, ne(graph)),
+	edge_labels=fill(nothing, ne(graph)),
+	space_for_legend=0
+)
+	l = [((i - 1) % n, floor((i - 1) / n)) for i in 1:nv(graph)]
+	function args_from_ij(i, j)
+		return [l[i][1], l[j][1]], [l[i][2], l[j][2]]
+	end
+	f = Plots.plot(; axis=([], false), ylimits=(-δ, m-1+δ+space_for_legend), xlimits=(-δ, n-1+δ), aspect_ratio=:equal, leg=:top)
+	for (color, width, label, e) in zip(edge_colors, edge_widths, edge_labels, edges(graph))
+		Plots.plot!(f, args_from_ij(src(e), dst(e)); color, width, label)
+	end
+	series_annotations = show_node_indices ? (1:nv(g)) : nothing
+	Plots.scatter!(f, l; series_annotations, label=nothing, markersize=15, color=:lightgrey)
+	if !isnothing(weights)
+		for (w, e) in zip(weights, edges(graph))
+			i, j = src(e), dst(e)
+			x, y = (l[j] .+ l[i]) ./ 2
+			if j == i + 1
+				y += δ₂
+			else
+				x -= δ₂
+			end
+			Plots.annotate!(f, x, y, Int(w))
+		end
+	end
+	return f
+end
+
+"""
+$TYPEDSIGNATURES
+
+Plot the two-stage tree from `solution` for requested `scenario`.
+"""
+function plot_scenario(
+	solution::TwoStageSpanningTreeSolution, instance::TwoStageSpanningTreeInstance, scenario;
+	show_node_indices=false, δ=0.25, δ₂=0.16, n, m
+)
+	(; graph, first_stage_costs, second_stage_costs) = instance
+	first_stage_forest = solution.y
+	second_stage_forests = solution.z
+
+	is_labeled_1 = false
+	is_labeled_2 = false
+	edge_labels = fill("", ne(graph))
+
+	S = nb_scenarios(instance)
+
+	for e in 1:ne(graph)
+		b1 = first_stage_forest[e]
+		b2 = second_stage_forests[e, scenario]
+		if !is_labeled_1 && b1
+			edge_labels[e] = "First stage forest"
+			is_labeled_1 = true
+		elseif !is_labeled_2 && b2
+			edge_labels[e] = "Second stage forest (scenario $scenario/$S)"
+			is_labeled_2 = true
+		end
+	end
+
+	edge_colors = [e1 ? :red : e2 ? :green : :black for (e1, e2) in zip(first_stage_forest, second_stage_forests[:, scenario])]
+	edge_widths = [e1 || e2 ? 3 : 1 for (e1, e2) in zip(first_stage_forest, second_stage_forests[:, scenario])]
+	weights = first_stage_forest .* first_stage_costs + .!first_stage_forest .* second_stage_costs[:, scenario]
+	return plot_grid_graph(
+		graph, n, m, weights; show_node_indices, δ, δ₂, edge_colors, edge_widths, edge_labels, space_for_legend=3δ
+	)
+end
diff --git a/src/TwoStageSpanningTree/solution.jl b/src/TwoStageSpanningTree/solution.jl
new file mode 100644
index 0000000..d0db655
--- /dev/null
+++ b/src/TwoStageSpanningTree/solution.jl
@@ -0,0 +1,71 @@
+"""
+$TYPEDEF
+
+# Fields
+$TYPEDFIELDS
+"""
+struct TwoStageSpanningTreeSolution
+    y::BitVector
+    z::BitMatrix
+end
+
+"""
+$TYPEDSIGNATURES
+
+Compute the objective value of given solution for given instance.
+"""
+function solution_value(solution::TwoStageSpanningTreeSolution, instance::TwoStageSpanningTreeInstance)
+    return dot(solution.y, instance.first_stage_costs) + dot(solution.z, instance.second_stage_costs) / nb_scenarios(instance)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Check if a given solution is feasible for given instance.
+"""
+function is_feasible(solution::TwoStageSpanningTreeSolution, instance::TwoStageSpanningTreeInstance; verbose=true)
+    (; y, z) = solution
+    (; graph) = instance
+
+    # Check that no edge was selected in both stages
+    if any(y .+ z .> 1)
+        verbose && @warn "Same edge selected in both stages"
+        return false
+    end
+
+    # Check that each scenario is a spanning tree
+    S = nb_scenarios(instance)
+    for s in 1:S
+        if !is_spanning_tree(y .|| z[:, s], graph)
+            verbose && @warn "Scenario $s is not a spanning tree: $y, $(z[:, s]), $instance"
+            return false
+        end
+    end
+
+    return true
+end
+
+"""
+$TYPEDSIGNATURES
+
+Return the associated two-stage solution from given first stage forest and instance.
+"""
+function solution_from_first_stage_forest(forest::BitVector, instance::TwoStageSpanningTreeInstance)
+	(; graph, second_stage_costs) = instance
+
+	S = nb_scenarios(instance)
+	forests = falses(ne(graph), S)
+    for s in 1:S
+		weights = deepcopy(second_stage_costs[:, s])
+        m = minimum(weights) - 1
+        m = min(0, m - 1)
+        weights[forest] .= m  # set weights over forest as the minimum
+
+		# find min spanning tree including forest
+        _, tree_s = kruskal(graph, weights)
+		forest_s = tree_s .- forest
+		forests[:, s] .= forest_s
+    end
+
+    return TwoStageSpanningTreeSolution(forest, forests)
+end
diff --git a/src/TwoStageSpanningTree/utils.jl b/src/TwoStageSpanningTree/utils.jl
new file mode 100644
index 0000000..22ddd00
--- /dev/null
+++ b/src/TwoStageSpanningTree/utils.jl
@@ -0,0 +1,36 @@
+"""
+$TYPEDSIGNATURES
+
+Kruskal's algorithm.
+Same as [`Graphs.kruskal_mst`](https://juliagraphs.org/Graphs.jl/dev/algorithms/spanningtrees/#Graphs.kruskal_mst), but also returns the value of the tree,
+and a binary vector instead of a vecror of edges.
+"""
+function kruskal(g::AbstractGraph, weights::AbstractVector; minimize::Bool=true)
+    connected_vs = IntDisjointSets(nv(g))
+
+	tree = falses(ne(g))
+
+    edge_list = collect(edges(g))
+	order = sortperm(weights; rev=!minimize)
+	value = 0.0
+
+	tree_size = 0
+
+    for (e_ind, e) in zip(order, edge_list[order])
+        if !in_same_set(connected_vs, src(e), dst(e))
+            union!(connected_vs, src(e), dst(e))
+			tree[e_ind] = true
+			tree_size += 1
+			value += weights[e_ind]
+            (tree_size >= nv(g) - 1) && break
+        end
+    end
+
+    return (; value, tree)
+end
+
+function is_spanning_tree(tree_candidate::BitVector, graph::AbstractGraph)
+    edge_list = [e for (i, e) in enumerate(edges(graph)) if tree_candidate[i]]
+    subgraph = induced_subgraph(graph, edge_list)[1]
+    return !is_cyclic(subgraph) && nv(subgraph) == nv(graph)
+end

From d7ac87f3d1e51526a12b7c3ea471d563bcf0f5c8 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Fri, 20 Mar 2026 09:43:03 +0100
Subject: [PATCH 2/2] wip

---
 Project.toml                                  |   6 +-
 ext/DFLBenchmarksPlotsExt.jl                  |   1 +
 ext/plots/dvs_plots.jl                        |   2 -
 ext/plots/tst_plots.jl                        | 110 ++++++++++
 src/DecisionFocusedLearningBenchmarks.jl      |   3 +
 .../TwoStageSpanningTree.jl                   |  39 ++++
 .../algorithms/benders_decomposition.jl       | 193 +++++++++--------
 .../algorithms/column_generation.jl           | 124 ++++++-----
 .../algorithms/cut_generation.jl              | 197 ++++++++---------
 .../algorithms/lagrangian_relaxation.jl       |  43 ++--
 src/TwoStageSpanningTree/benchmark.jl         | 126 +++++++++++
 src/TwoStageSpanningTree/instance.jl          |  23 +-
 src/TwoStageSpanningTree/learning/features.jl | 200 ++++++++++--------
 src/TwoStageSpanningTree/plot.jl              | 150 ++++++++-----
 src/TwoStageSpanningTree/solution.jl          |  31 ++-
 src/TwoStageSpanningTree/utils.jl             |  48 ++++-
 test/runtests.jl                              |   1 +
 test/two_stage_spanning_tree.jl               |  52 +++++
 18 files changed, 892 insertions(+), 457 deletions(-)
 create mode 100644 ext/plots/tst_plots.jl
 create mode 100644 src/TwoStageSpanningTree/TwoStageSpanningTree.jl
 create mode 100644 src/TwoStageSpanningTree/benchmark.jl
 create mode 100644 test/two_stage_spanning_tree.jl

diff --git a/Project.toml b/Project.toml
index 7ad625a..69b2469 100644
--- a/Project.toml
+++ b/Project.toml
@@ -10,10 +10,12 @@ projects = ["docs", "test"]
 Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
 ConstrainedShortestPaths = "b3798467-87dc-4d99-943d-35a1bd39e395"
 DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe"
+DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6"
 Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
 HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
 Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0"
@@ -45,10 +47,12 @@ DFLBenchmarksPlotsExt = "Plots"
 Combinatorics = "1.0.3"
 ConstrainedShortestPaths = "0.6.0"
 DataDeps = "0.7"
+DataStructures = "0.18.22"
 Distributions = "0.25"
 DocStringExtensions = "0.9"
 FileIO = "1.17.0"
 Flux = "0.16"
+GLPK = "1.2.1"
 Graphs = "1.11"
 HiGHS = "1.9"
 Images = "0.26.1"
@@ -65,7 +69,7 @@ Plots = "1"
 Printf = "1"
 Random = "1"
 Requires = "1.3.0"
-SCIP = "0.12"
+SCIP = "0.12.8"
 SimpleWeightedGraphs = "1.4"
 SparseArrays = "1"
 Statistics = "1"
diff --git a/ext/DFLBenchmarksPlotsExt.jl b/ext/DFLBenchmarksPlotsExt.jl
index 0a5caae..8cf4752 100644
--- a/ext/DFLBenchmarksPlotsExt.jl
+++ b/ext/DFLBenchmarksPlotsExt.jl
@@ -11,6 +11,7 @@ include("plots/argmax2d_plots.jl")
 include("plots/warcraft_plots.jl")
 include("plots/svs_plots.jl")
 include("plots/dvs_plots.jl")
+include("plots/tst_plots.jl")
 
 """
     plot_solution(bench::AbstractBenchmark, sample::DataSample, y; kwargs...)
diff --git a/ext/plots/dvs_plots.jl b/ext/plots/dvs_plots.jl
index 0b61a5e..e1c5874 100644
--- a/ext/plots/dvs_plots.jl
+++ b/ext/plots/dvs_plots.jl
@@ -3,8 +3,6 @@ using Printf: @sprintf
 
 has_visualization(::DynamicVehicleSchedulingBenchmark) = true
 
-# ── helpers (moved from static_vsp/plot.jl) ─────────────────────────────────
-
 function _plot_static_instance(
     x_depot,
     y_depot,
diff --git a/ext/plots/tst_plots.jl b/ext/plots/tst_plots.jl
new file mode 100644
index 0000000..29b17d5
--- /dev/null
+++ b/ext/plots/tst_plots.jl
@@ -0,0 +1,110 @@
+import DecisionFocusedLearningBenchmarks.TwoStageSpanningTree:
+    TwoStageSpanningTreeInstance,
+    TwoStageSpanningTreeSolution,
+    nb_scenarios,
+    kruskal,
+    solution_from_first_stage_forest
+using Graphs: ne, nv, edges, src, dst
+
+has_visualization(::TwoStageSpanningTreeBenchmark) = true
+
+function _plot_grid_graph(
+    graph,
+    n,
+    m,
+    weights=nothing;
+    edge_colors=fill(:black, ne(graph)),
+    edge_widths=fill(1, ne(graph)),
+    edge_labels=fill(nothing, ne(graph)),
+    δ=0.25,
+    δ₂=0.13,
+    space_for_legend=0,
+)
+    node_pos = [((i - 1) % n, floor((i - 1) / n)) for i in 1:nv(graph)]
+    function segment(i, j)
+        return [node_pos[i][1], node_pos[j][1]], [node_pos[i][2], node_pos[j][2]]
+    end
+    fig = Plots.plot(;
+        axis=([], false),
+        ylimits=(-δ, m - 1 + δ + space_for_legend),
+        xlimits=(-δ, n - 1 + δ),
+        aspect_ratio=:equal,
+        leg=:top,
+    )
+    for (color, width, label, e) in zip(edge_colors, edge_widths, edge_labels, edges(graph))
+        Plots.plot!(fig, segment(src(e), dst(e))...; color, width, label)
+    end
+    Plots.scatter!(fig, node_pos; label=nothing, markersize=15, color=:lightgrey)
+    if !isnothing(weights)
+        for (w, e) in zip(weights, edges(graph))
+            i, j = src(e), dst(e)
+            x = (node_pos[j][1] + node_pos[i][1]) / 2
+            y = (node_pos[j][2] + node_pos[i][2]) / 2
+            j == i + 1 ? (y += δ₂) : (x -= δ₂)
+            Plots.annotate!(fig, x, y, Int(w))
+        end
+    end
+    return fig
+end
+
+function plot_instance(bench::TwoStageSpanningTreeBenchmark, sample::DataSample; kwargs...)
+    (; n, m) = bench
+    return _plot_grid_graph(
+        sample.instance.graph, n, m, sample.instance.first_stage_costs; kwargs...
+    )
+end
+
+function plot_solution(bench::TwoStageSpanningTreeBenchmark, sample::DataSample; kwargs...)
+    (; n, m) = bench
+    (; instance) = sample.context
+    y = sample.y
+    isnothing(y) && error("sample.y is nothing — provide a labeled sample")
+
+    # Use the evaluation scenario if present, otherwise fall back to first feature scenario
+    d_plot = if hasproperty(sample.extra, :scenario)
+        sample.extra.scenario
+    else
+        instance.second_stage_costs[:, 1]
+    end
+
+    # Complete first-stage forest to a spanning tree for display
+    inst_s = TwoStageSpanningTreeInstance(
+        instance.graph, instance.first_stage_costs, reshape(d_plot, :, 1)
+    )
+    full_sol = solution_from_first_stage_forest(BitVector(y .> 0), inst_s)
+
+    yv, zv = full_sol.y, full_sol.z[:, 1]
+    is_labeled_1 = is_labeled_2 = false
+    edge_labels = fill(nothing, ne(instance.graph))
+    for i in 1:ne(instance.graph)
+        if !is_labeled_1 && yv[i]
+            edge_labels[i] = "First stage"
+            is_labeled_1 = true
+        elseif !is_labeled_2 && zv[i]
+            edge_labels[i] = "Second stage"
+            is_labeled_2 = true
+        end
+    end
+    edge_colors = [
+        if yv[i]
+            :red
+        elseif zv[i]
+            :green
+        else
+            :black
+        end for i in 1:ne(instance.graph)
+    ]
+    edge_widths = [(yv[i] || zv[i]) ? 3 : 1 for i in 1:ne(instance.graph)]
+    weights = yv .* instance.first_stage_costs .+ .!yv .* d_plot
+    return _plot_grid_graph(
+        instance.graph,
+        n,
+        m,
+        weights;
+        edge_colors,
+        edge_widths,
+        edge_labels,
+        space_for_legend=0.75,
+        kwargs...,
+    )
+end
diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index f0b4f5b..d309e11 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -55,6 +55,7 @@ include("Warcraft/Warcraft.jl")
 include("FixedSizeShortestPath/FixedSizeShortestPath.jl")
 include("PortfolioOptimization/PortfolioOptimization.jl")
 include("StochasticVehicleScheduling/StochasticVehicleScheduling.jl")
+include("TwoStageSpanningTree/TwoStageSpanningTree.jl")
 include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl")
 include("DynamicAssortment/DynamicAssortment.jl")
 include("Maintenance/Maintenance.jl")
@@ -91,6 +92,7 @@ using .Warcraft
 using .FixedSizeShortestPath
 using .PortfolioOptimization
 using .StochasticVehicleScheduling
+using .TwoStageSpanningTree
 using .DynamicVehicleScheduling
 using .DynamicAssortment
 using .Maintenance
@@ -104,6 +106,7 @@ export PortfolioOptimizationBenchmark
 export RankingBenchmark
 export StochasticVehicleSchedulingBenchmark
 export SubsetSelectionBenchmark
+export TwoStageSpanningTreeBenchmark
 export WarcraftBenchmark
 export MaintenanceBenchmark
 
diff --git a/src/TwoStageSpanningTree/TwoStageSpanningTree.jl b/src/TwoStageSpanningTree/TwoStageSpanningTree.jl
new file mode 100644
index 0000000..9bc2f3e
--- /dev/null
+++ b/src/TwoStageSpanningTree/TwoStageSpanningTree.jl
@@ -0,0 +1,39 @@
+module TwoStageSpanningTree
+
+using DataStructures: IntDisjointSets, in_same_set, union!
+using DocStringExtensions
+using Flux
+using Graphs
+using GLPK
+using HiGHS
+using JuMP
+using JuMP: MOI
+using LinearAlgebra: dot
+using Random
+using Statistics: mean, quantile
+
+using ..Utils
+
+include("utils.jl")
+include("instance.jl")
+include("solution.jl")
+
+include("algorithms/anticipative.jl")
+include("algorithms/cut_generation.jl")
+include("algorithms/benders_decomposition.jl")
+include("algorithms/column_generation.jl")
+include("algorithms/lagrangian_relaxation.jl")
+
+include("learning/features.jl")
+
+include("benchmark.jl")
+
+export TwoStageSpanningTreeBenchmark
+export TwoStageSpanningTreeInstance, nb_scenarios
+export TwoStageSpanningTreeSolution,
+    solution_value, is_feasible, solution_from_first_stage_forest
+export kruskal, anticipative_solution
+export cut_generation,
+    benders_decomposition, column_generation, column_heuristic, lagrangian_relaxation
+
+end
diff --git a/src/TwoStageSpanningTree/algorithms/benders_decomposition.jl b/src/TwoStageSpanningTree/algorithms/benders_decomposition.jl
index 02cd01e..dcc37e9 100644
--- a/src/TwoStageSpanningTree/algorithms/benders_decomposition.jl
+++ b/src/TwoStageSpanningTree/algorithms/benders_decomposition.jl
@@ -1,91 +1,91 @@
-function separate_benders_cut(instance::TwoStageSpanningTreeInstance, y, s; MILP_solver, tol=1e-5)
-	(; graph, second_stage_costs) = instance
+function separate_benders_cut(
+    instance::TwoStageSpanningTreeInstance, y, s; model_builder, tol=1e-5
+)
+    (; graph, second_stage_costs) = instance
 
-	E = ne(graph)
+    E = ne(graph)
 
-	columns = BitVector[]
+    columns = BitVector[]
 
-	# Feasibility cut
-	model = Model(MILP_solver)
+    # Feasibility cut
+    model = model_builder()
 
-	@variable(model, dummy, Bin)
+    @variable(model, dummy, Bin)
 
-	@variable(model, νₛ <= 1)
-	@variable(model, 0 <= μₛ[e in 1:E] <= 1)
+    @variable(model, νₛ <= 1)
+    @variable(model, 0 <= μₛ[e in 1:E] <= 1)
 
-	@objective(model, Max, νₛ + sum(y[e] * μₛ[e] for e in 1:E))
+    @objective(model, Max, νₛ + sum(y[e] * μₛ[e] for e in 1:E))
 
-	function feasibility_callback(cb_data)
-		μ_val = callback_value.(cb_data, μₛ)
-		ν_val = callback_value(cb_data, νₛ)
+    function feasibility_callback(cb_data)
+        μ_val = callback_value.(cb_data, μₛ)
+        ν_val = callback_value(cb_data, νₛ)
 
-		weights = -μ_val
-		val, tree = kruskal(graph, weights)
+        weights = -μ_val
+        val, tree = kruskal(graph, weights)
 
-		push!(columns, tree)
-		
-		if val + tol < ν_val
-			new_constraint = @build_constraint(
-				- sum(μₛ[e] for e in 1:E if tree[e]) - νₛ >= 0
-			)
-			MOI.submit(
-				model, MOI.LazyConstraint(cb_data), new_constraint
-			)
-		end
-	end
+        push!(columns, tree)
 
-	set_attribute(model, MOI.LazyConstraintCallback(), feasibility_callback)
-	optimize!(model)
+        if val + tol < ν_val
+            new_constraint = @build_constraint(
+                -sum(μₛ[e] for e in 1:E if tree[e]) - νₛ >= 0
+            )
+            MOI.submit(model, MOI.LazyConstraint(cb_data), new_constraint)
+        end
+    end
 
-	if objective_value(model) > tol
-		return false, value.(νₛ), value.(μₛ), objective_value(model)
-	end
-	
-	# Else, optimality cut
-	optimality_model = Model(MILP_solver)
+    set_attribute(model, MOI.LazyConstraintCallback(), feasibility_callback)
+    optimize!(model)
 
-	@variable(optimality_model, dummy, Bin)
+    if JuMP.objective_value(model) > tol
+        return false, value.(νₛ), value.(μₛ), JuMP.objective_value(model)
+    end
 
-	@variable(optimality_model, νₛ)
-	@variable(optimality_model, μₛ[e in 1:E] >= 0)
+    # Else, optimality cut
+    optimality_model = model_builder()
 
-	@objective(
-		optimality_model, Max,
-		νₛ + sum(y[e] * μₛ[e] for e in 1:E) - sum(second_stage_costs[e, s] * y[e] for e in 1:E)
-	)
+    @variable(optimality_model, dummy, Bin)
 
-	for tree in columns
-		@constraint(
-			optimality_model,
-			sum(second_stage_costs[e, s] - μₛ[e] for e in 1:E if tree[e]) >= νₛ
-		)
-	end
+    @variable(optimality_model, νₛ)
+    @variable(optimality_model, μₛ[e in 1:E] >= 0)
 
-	function my_callback_function(cb_data)
-		μ_val = callback_value.(cb_data, μₛ)
-		ν_val = callback_value(cb_data, νₛ)
+    @objective(
+        optimality_model,
+        Max,
+        νₛ + sum(y[e] * μₛ[e] for e in 1:E) -
+            sum(second_stage_costs[e, s] * y[e] for e in 1:E)
+    )
 
-		weights = second_stage_costs[:, s] .- μ_val
+    for tree in columns
+        @constraint(
+            optimality_model,
+            sum(second_stage_costs[e, s] - μₛ[e] for e in 1:E if tree[e]) >= νₛ
+        )
+    end
 
-		val, tree = kruskal(graph, weights)
+    function my_callback_function(cb_data)
+        μ_val = callback_value.(cb_data, μₛ)
+        ν_val = callback_value(cb_data, νₛ)
 
-		if val - ν_val + tol < 0
-			new_constraint = @build_constraint(
-				sum(second_stage_costs[e, s] - μₛ[e] for e in 1:E if tree[e]) >= νₛ
-			)
-			MOI.submit(
-				optimality_model, MOI.LazyConstraint(cb_data), new_constraint
-			)
-		end
-	end
+        weights = second_stage_costs[:, s] .- μ_val
 
-	set_attribute(optimality_model, MOI.LazyConstraintCallback(), my_callback_function)
+        val, tree = kruskal(graph, weights)
 
-	optimize!(optimality_model)
+        if val - ν_val + tol < 0
+            new_constraint = @build_constraint(
+                sum(second_stage_costs[e, s] - μₛ[e] for e in 1:E if tree[e]) >= νₛ
+            )
+            MOI.submit(optimality_model, MOI.LazyConstraint(cb_data), new_constraint)
+        end
+    end
 
-	# If primal feasible, add an optimality cut
-	@assert termination_status(optimality_model) != DUAL_INFEASIBLE
-	return true, value.(νₛ), value.(μₛ), objective_value(optimality_model)
+    set_attribute(optimality_model, MOI.LazyConstraintCallback(), my_callback_function)
+
+    optimize!(optimality_model)
+
+    # If primal feasible, add an optimality cut
+    @assert termination_status(optimality_model) != DUAL_INFEASIBLE
+    return true, value.(νₛ), value.(μₛ), JuMP.objective_value(optimality_model)
 end
 
 """
@@ -95,20 +95,18 @@ Returns the optimal solution using a Benders decomposition algorithm.
 """
 function benders_decomposition(
     instance::TwoStageSpanningTreeInstance;
-    MILP_solver=GLPK.Optimizer,
-	tol=1e-6,
-	verbose=true
+    model_builder=() -> Model(GLPK.Optimizer),
+    tol=1e-6,
+    verbose=true,
 )
-	(; graph, first_stage_costs, second_stage_costs) = instance
-	E = ne(graph)
-	S = nb_scenarios(instance)
-	
-    model = Model(MILP_solver)
+    (; graph, first_stage_costs, second_stage_costs) = instance
+    E = ne(graph)
+    S = nb_scenarios(instance)
+
+    model = model_builder()
+    set_silent(model)
     @variable(model, y[e in 1:E], Bin)
-    @variable(
-        model,
-        θ[s in 1:S] >= sum(min(0, second_stage_costs[e, s]) for e in 1:E)
-    )
+    @variable(model, θ[s in 1:S] >= sum(min(0, second_stage_costs[e, s]) for e in 1:E))
     @objective(
         model,
         Min,
@@ -124,41 +122,40 @@ function benders_decomposition(
         callback_counter += 1
 
         y_val = callback_value.(cb_data, y)
-		θ_val = callback_value.(cb_data, θ)
+        θ_val = callback_value.(cb_data, θ)
 
         for current_scenario in 1:S
-            optimality_cut, ν_val, μ_val =
-				separate_benders_cut(instance, y_val, current_scenario; MILP_solver)
+            optimality_cut, ν_val, μ_val = separate_benders_cut(
+                instance, y_val, current_scenario; model_builder
+            )
 
-			# If feasibility cut
+            # If feasibility cut
             if !optimality_cut
                 new_feasibility_cut = @build_constraint(
                     ν_val + sum(μ_val[e] * y[e] for e in 1:E) <= 0
                 )
-                MOI.submit(
-                    model,
-					MOI.LazyConstraint(cb_data),
-					new_feasibility_cut
-                )
+                MOI.submit(model, MOI.LazyConstraint(cb_data), new_feasibility_cut)
 
                 return nothing
             end
 
-			# Else, optimality cut
-			if θ_val[current_scenario] + tol < ν_val + sum(μ_val[e] * y_val[e] for e in 1:E) -
-				sum(second_stage_costs[e, current_scenario] * y_val[e] for e in 1:E)
-				con = @build_constraint(
-					θ[current_scenario] >=
-						ν_val + sum(μ_val[e] * y[e] for e in 1:E) - sum(second_stage_costs[e, current_scenario] * y[e] for e in 1:E)
-				)
-				MOI.submit(model, MOI.LazyConstraint(cb_data), con)
-				return nothing
-			end
+            # Else, optimality cut
+            if θ_val[current_scenario] + tol <
+                ν_val + sum(μ_val[e] * y_val[e] for e in 1:E) -
+               sum(second_stage_costs[e, current_scenario] * y_val[e] for e in 1:E)
+                con = @build_constraint(
+                    θ[current_scenario] >=
+                        ν_val + sum(μ_val[e] * y[e] for e in 1:E) -
+                    sum(second_stage_costs[e, current_scenario] * y[e] for e in 1:E)
+                )
+                MOI.submit(model, MOI.LazyConstraint(cb_data), con)
+                return nothing
+            end
         end
     end
 
     set_attribute(model, MOI.LazyConstraintCallback(), benders_callback)
     optimize!(model)
 
-	return solution_from_first_stage_forest(value.(y) .> 0.5, instance)
+    return solution_from_first_stage_forest(value.(y) .> 0.5, instance)
 end
diff --git a/src/TwoStageSpanningTree/algorithms/column_generation.jl b/src/TwoStageSpanningTree/algorithms/column_generation.jl
index 7e0330b..de09f80 100644
--- a/src/TwoStageSpanningTree/algorithms/column_generation.jl
+++ b/src/TwoStageSpanningTree/algorithms/column_generation.jl
@@ -3,46 +3,40 @@ $TYPEDSIGNATURES
 
 Solves the linear relaxation using a column generation algorithm.
 """
-function column_generation(instance; MILP_solver=GLPK.Optimizer, tol=1e-6, verbose=true)
-	(; graph, first_stage_costs, second_stage_costs) = instance
+function column_generation(
+    instance; model_builder=() -> Model(GLPK.Optimizer), tol=1e-6, verbose=true
+)
+    (; graph, first_stage_costs, second_stage_costs) = instance
 
-	V = nv(graph)
-	E = ne(graph)
-	S = nb_scenarios(instance)
-	
-	model = Model(MILP_solver)
+    V = nv(graph)
+    E = ne(graph)
+    S = nb_scenarios(instance)
 
-	@variable(model, dummy, Bin) # dummy binary variable to activate callbacks
+    model = model_builder()
+    set_silent(model)
 
-	@variable(model, ν[s in 1:S])
-	@variable(model, μ[e in 1:E, s in 1:S])
+    @variable(model, dummy, Bin) # dummy binary variable to activate callbacks
 
-	@objective(model, Max, sum(ν[s] for s in 1:S))
+    @variable(model, ν[s in 1:S])
+    @variable(model, μ[e in 1:E, s in 1:S])
 
-	@constraint(
-		model, [e in 1:E, s in 1:S],
-		second_stage_costs[e, s] / S - μ[e, s] >= 0
-	)
+    @objective(model, Max, sum(ν[s] for s in 1:S))
 
-	@constraint(
-		model, [e in 1:E],
-		first_stage_costs[e] - sum(μ[e, s] for s in 1:S) >= 0
-	)
+    @constraint(model, [e in 1:E, s in 1:S], second_stage_costs[e, s] / S - μ[e, s] >= 0)
 
-	trees = BitVector[]
+    @constraint(model, [e in 1:E], first_stage_costs[e] - sum(μ[e, s] for s in 1:S) >= 0)
 
-	for s in 1:S
-		_, dummy_tree = kruskal(graph, min.(first_stage_costs, second_stage_costs[:, s]))
-		@constraint(
-        	model,
-        	ν[s] <= sum(μ[e, s] for e in 1:E if dummy_tree[e])
-    	)
-		push!(trees, dummy_tree)
-	end
+    trees = BitVector[]
 
-	callback_counter = 0
-	function my_callback_function(cb_data)
-		callback_counter += 1
+    for s in 1:S
+        _, dummy_tree = kruskal(graph, min.(first_stage_costs, second_stage_costs[:, s]))
+        @constraint(model, ν[s] <= sum(μ[e, s] for e in 1:E if dummy_tree[e]))
+        push!(trees, dummy_tree)
+    end
+
+    callback_counter = 0
+    function my_callback_function(cb_data)
+        callback_counter += 1
 
         ν_val = callback_value.(cb_data, ν)
         μ_val = callback_value.(cb_data, μ)
@@ -50,52 +44,54 @@ function column_generation(instance; MILP_solver=GLPK.Optimizer, tol=1e-6, verbo
         for s in 1:S
             val, T = kruskal(graph, @view μ_val[:, s])
 
-			if val + tol < ν_val[s]
-				push!(trees, T)
+            if val + tol < ν_val[s]
+                push!(trees, T)
                 new_constraint = @build_constraint(
                     ν[s] <= sum(μ[e, s] for e in 1:E if T[e])
                 )
-                MOI.submit(
-                    model, MOI.LazyConstraint(cb_data), new_constraint
-                )
+                MOI.submit(model, MOI.LazyConstraint(cb_data), new_constraint)
             end
         end
     end
 
-	set_attribute(model, MOI.LazyConstraintCallback(), my_callback_function)
+    set_attribute(model, MOI.LazyConstraintCallback(), my_callback_function)
 
-	optimize!(model)
-	verbose && @info "Optimal solution found after $callback_counter cuts"
-	return (; value=objective_value(model), ν=value.(ν), μ=value.(μ), columns=trees)
+    optimize!(model)
+    verbose && @info "Optimal solution found after $callback_counter cuts"
+    return (; value=JuMP.objective_value(model), ν=value.(ν), μ=value.(μ), columns=trees)
 end
 
-function column_heuristic(instance, columns; MILP_solver=GLPK.Optimizer)
-	(; graph, first_stage_costs, second_stage_costs) = instance
-	E = ne(graph)
-	S = nb_scenarios(instance)
-	T = length(columns)
+function column_heuristic(instance, columns; model_builder=() -> Model(HiGHS.Optimizer))
+    (; graph, first_stage_costs, second_stage_costs) = instance
+    E = ne(graph)
+    S = nb_scenarios(instance)
+    T = length(columns)
 
-	model = Model(MILP_solver)
+    model = model_builder()
+    set_silent(model)
 
-	@variable(model, y[e in 1:E], Bin)
-	@variable(model, z[e in 1:E, s in 1:S], Bin)
+    @variable(model, y[e in 1:E], Bin)
+    @variable(model, z[e in 1:E, s in 1:S], Bin)
 
-	@variable(model, λ[t in 1:T, s in 1:S], Bin)
+    @variable(model, λ[t in 1:T, s in 1:S], Bin)
 
-	@objective(
-		model, Min,
-		sum(first_stage_costs[e] * y[e] for e in 1:E) + sum(second_stage_costs[e, s] * z[e, s] for e in 1:E for s in 1:S) / S
-	)
+    @objective(
+        model,
+        Min,
+        sum(first_stage_costs[e] * y[e] for e in 1:E) +
+            sum(second_stage_costs[e, s] * z[e, s] for e in 1:E for s in 1:S) / S
+    )
 
-	@constraint(model, [s in 1:S], sum(λ[t, s] for t in 1:T) == 1)
-	@constraint(
-		model, [e in 1:E, s in 1:S],
-		y[e] + z[e, s] == sum(λ[t, s] for t in 1:T if columns[t][e])
-	)
+    @constraint(model, [s in 1:S], sum(λ[t, s] for t in 1:T) == 1)
+    @constraint(
+        model,
+        [e in 1:E, s in 1:S],
+        y[e] + z[e, s] == sum(λ[t, s] for t in 1:T if columns[t][e])
+    )
 
-	optimize!(model)
+    optimize!(model)
 
-	return TwoStageSpanningTreeSolution(value.(y) .> 0.5, value.(z) .> 0.5)
+    return TwoStageSpanningTreeSolution(value.(y) .> 0.5, value.(z) .> 0.5)
 end
 
 """
@@ -104,8 +100,8 @@ $TYPEDSIGNATURES
 Column generation heuristic, that solves the linear relaxation and then outputs the solution of the proble restricted to selected columns.
 Returns an heuristic solution.
 """
-function column_heuristic(instance; MILP_solver=GLPK.Optimizer, verbose=true)
-	(; columns) = column_generation(instance; verbose=false, MILP_solver)
-	verbose && @info "Continuous relaxation solved with $(length(columns)) columns."
-	return column_heuristic(instance, columns)
+function column_heuristic(instance; model_builder=() -> Model(GLPK.Optimizer), verbose=true)
+    (; columns) = column_generation(instance; verbose=false, model_builder)
+    verbose && @info "Continuous relaxation solved with $(length(columns)) columns."
+    return column_heuristic(instance, columns)
 end
diff --git a/src/TwoStageSpanningTree/algorithms/cut_generation.jl b/src/TwoStageSpanningTree/algorithms/cut_generation.jl
index 4558e9d..45949ea 100644
--- a/src/TwoStageSpanningTree/algorithms/cut_generation.jl
+++ b/src/TwoStageSpanningTree/algorithms/cut_generation.jl
@@ -3,60 +3,56 @@ $TYPEDSIGNATURES
 
 Solve the separation problem using the MILP formulation.
 """
-function MILP_separation_problem(graph, weights; MILP_solver, tol=1e-6)
-	V = nv(graph)
-	E = ne(graph)
+function MILP_separation_problem(
+    graph, weights; model_builder=() -> Model(HiGHS.Optimizer), tol=1e-6
+)
+    V = nv(graph)
+    E = ne(graph)
+
+    model = model_builder()
+    set_silent(model)
 
-	model = Model(MILP_solver)
-	set_silent(model)
+    @variable(model, α[v in 1:V], Bin)
+    @variable(model, β[e in 1:E], Bin)
 
-	@variable(model, α[v in 1:V], Bin)
-	@variable(model, β[e in 1:E], Bin)
+    @objective(model, Min, sum(α[v] for v in 1:V) - 1 - sum(weights[e] * β[e] for e in 1:E))
 
-	@objective(
-		model, Min,
-		sum(α[v] for v in 1:V) - 1 - sum(weights[e] * β[e] for e in 1:E)
-	)
+    @constraint(
+        model,
+        [(e, edge) in enumerate(edges(graph))],
+        2 * β[e] <= α[dst(edge)] + α[src(edge)]
+    )
 
-	@constraint(model, [(e, edge) in enumerate(edges(graph))], 2 * β[e] <= α[dst(edge)] + α[src(edge)])
+    @constraint(model, sum(α[v] for v in 1:V) >= 1)
+    @constraint(model, sum(α[v] for v in 1:V) <= V - 1)
 
-	@constraint(model, sum(α[v] for v in 1:V) >= 1)
-	@constraint(model, sum(α[v] for v in 1:V) <= V - 1)
-	
-	optimize!(model)
-	found = objective_value(model) + tol <= 0
-	return found, value.(β) .> 0.5, sum(value.(α))
+    optimize!(model)
+    found = JuMP.objective_value(model) + tol <= 0
+    return found, value.(β) .> 0.5, sum(value.(α))
 end
 
 function build_flow_graph(graph, weights; infinity=1e6)
-	V = nv(graph)
-	E = ne(graph)
-	
-	# A = 3 * E + V
-	VV = 2 + E + V
-
-	o = 1
-	d = 2
-
-	sources = vcat(
-		fill(o, E),
-		[2 + e for e in 1:E],
-		[2 + e for e in 1:E],
-		[2 + E + v for v in 1:V]
-	)
-	destinations = vcat(
-		[2 + e for e in 1:E],
-		[2 + E + src(e) for e in edges(graph)],
-		[2 + E + dst(e) for e in edges(graph)],
-		fill(d, V)
-	)
-	costs = vcat(
-		[weights[e] for e in 1:E],
-		fill(infinity, 2 * E),
-		ones(V)
-	)
-
-	return sources, destinations, costs
+    V = nv(graph)
+    E = ne(graph)
+
+    # A = 3 * E + V
+    VV = 2 + E + V
+
+    o = 1
+    d = 2
+
+    sources = vcat(
+        fill(o, E), [2 + e for e in 1:E], [2 + e for e in 1:E], [2 + E + v for v in 1:V]
+    )
+    destinations = vcat(
+        [2 + e for e in 1:E],
+        [2 + E + src(e) for e in edges(graph)],
+        [2 + E + dst(e) for e in edges(graph)],
+        fill(d, V),
+    )
+    costs = vcat([weights[e] for e in 1:E], fill(infinity, 2 * E), ones(V))
+
+    return sources, destinations, costs
 end
 
 """
@@ -64,40 +60,35 @@ $TYPEDSIGNATURES
 
 Solve the separation problem using the min cut formulation.
 """
-function cut_separation_problem(graph, weights; MILP_solver=GLPK.Optimizer, tol=1e-6)
-	sources, destinations, costs = build_flow_graph(graph, weights)
+function cut_separation_problem(
+    graph, weights; model_builder=() -> Model(HiGHS.Optimizer), tol=1e-6
+)
+    sources, destinations, costs = build_flow_graph(graph, weights)
 
-	A = 3 * ne(graph) + nv(graph)
-	V = 2 + ne(graph) + nv(graph)
-	@assert A == length(costs)
+    A = 3 * ne(graph) + nv(graph)
+    V = 2 + ne(graph) + nv(graph)
+    @assert A == length(costs)
 
-	vertex_range = (2 + ne(graph) + 1):V
-	edge_range = 1:ne(graph)
+    vertex_range = (2 + ne(graph) + 1):V
+    edge_range = 1:ne(graph)
 
-	model = Model(MILP_solver)
+    model = model_builder()
 
-	@variable(model, β[a in 1:A], Bin)
-	@variable(model, α[v in 1:V], Bin)
+    @variable(model, β[a in 1:A], Bin)
+    @variable(model, α[v in 1:V], Bin)
 
-	@objective(
-		model, Min,
-		sum(costs[a] * β[a] for a in 1:A)
-	)
+    @objective(model, Min, sum(costs[a] * β[a] for a in 1:A))
 
-	@constraint(model, α[1] == 1)
-	@constraint(model, α[2] == 0)
+    @constraint(model, α[1] == 1)
+    @constraint(model, α[2] == 0)
 
-	@constraint(
-		model,
-		[a in 1:A],
-		β[a] >= α[sources[a]] - α[destinations[a]]
-	)
+    @constraint(model, [a in 1:A], β[a] >= α[sources[a]] - α[destinations[a]])
 
-	@constraint(model, sum(α[v] for v in vertex_range) >= 1)
+    @constraint(model, sum(α[v] for v in vertex_range) >= 1)
 
-	optimize!(model)
+    optimize!(model)
 
-	min_cut_value = objective_value(model)
+    min_cut_value = JuMP.objective_value(model)
 
     found = min_cut_value + tol < nv(graph)
 
@@ -111,41 +102,35 @@ Returns the optimal solution using a cut generation algorithm with custom separa
 """
 function cut_generation(
     instance::TwoStageSpanningTreeInstance;
-	separation_problem=MILP_separation_pb,
-    MILP_solver=GLPK.Optimizer,
-	verbose=true
+    separation_problem=MILP_separation_problem,
+    model_builder=() -> Model(GLPK.Optimizer),
+    verbose=true,
 )
-	# Unpack fields
-	(; graph, first_stage_costs, second_stage_costs) = instance
-	S = nb_scenarios(instance)
-	E = ne(graph)
-	V = nv(graph)
+    # Unpack fields
+    (; graph, first_stage_costs, second_stage_costs) = instance
+    S = nb_scenarios(instance)
+    E = ne(graph)
+    V = nv(graph)
 
-	# Initialize model and link to solver
-	model = Model(MILP_solver)
+    # Initialize model and link to solver
+    model = model_builder()
+    set_silent(model)
 
-	# Add variables
+    # Add variables
     @variable(model, y[e in 1:E], Bin)
     @variable(model, z[e in 1:E, s in 1:S], Bin)
 
-	# Add an objective function
-	@expression(
-		model,
-		first_stage_objective,
-		sum(first_stage_costs[e] * y[e] for e in 1:E)
-	)
-	@expression(
-		model,
-		second_stage_objective,
-		sum(second_stage_costs[e, s] * z[e, s] for e in 1:E for s in 1:S) / S
-	)
+    # Add an objective function
+    @expression(model, first_stage_objective, sum(first_stage_costs[e] * y[e] for e in 1:E))
+    @expression(
+        model,
+        second_stage_objective,
+        sum(second_stage_costs[e, s] * z[e, s] for e in 1:E for s in 1:S) / S
+    )
     @objective(model, Min, first_stage_objective + second_stage_objective)
 
-	# Add constraints
-	@constraint(
-		model, [s in 1:S],
-		sum(y[e] + z[e, s] for e in 1:E) == V - 1
-	)
+    # Add constraints
+    @constraint(model, [s in 1:S], sum(y[e] + z[e, s] for e in 1:E) == V - 1)
 
     call_back_counter = 0
 
@@ -156,27 +141,23 @@ function cut_generation(
         z_val = callback_value.(cb_data, z)
 
         for current_s in 1:S
-			weights = [y_val[e] + z_val[e, current_s] for e in 1:E]
+            weights = [y_val[e] + z_val[e, current_s] for e in 1:E]
 
-            found, Y, YY = separation_problem(
-				graph, weights; MILP_solver=MILP_solver
-			)
+            found, Y, YY = separation_problem(graph, weights; model_builder=model_builder)
 
             if found
                 new_constraint = @build_constraint(
                     sum(y[e] + z[e, current_s] for e in 1:E if Y[e]) <= YY - 1
                 )
-                MOI.submit(
-                    model, MOI.LazyConstraint(cb_data), new_constraint
-                )
-                return
+                MOI.submit(model, MOI.LazyConstraint(cb_data), new_constraint)
+                return nothing
             end
         end
     end
 
-	set_attribute(model, MOI.LazyConstraintCallback(), my_callback_function)
+    set_attribute(model, MOI.LazyConstraintCallback(), my_callback_function)
 
-	optimize!(model)
-	verbose && @info "Optimal solution found after $call_back_counter cuts"
+    optimize!(model)
+    verbose && @info "Optimal solution found after $call_back_counter cuts"
     return TwoStageSpanningTreeSolution(value.(y) .> 0.5, value.(z) .> 0.5)
 end
diff --git a/src/TwoStageSpanningTree/algorithms/lagrangian_relaxation.jl b/src/TwoStageSpanningTree/algorithms/lagrangian_relaxation.jl
index 7e013fa..7260fe4 100644
--- a/src/TwoStageSpanningTree/algorithms/lagrangian_relaxation.jl
+++ b/src/TwoStageSpanningTree/algorithms/lagrangian_relaxation.jl
@@ -1,8 +1,10 @@
-function first_stage_optimal_solution(inst::TwoStageSpanningTreeInstance, θ::AbstractMatrix; M=20.0)
-	S = nb_scenarios(inst)
-	E = ne(inst.graph)
+function first_stage_optimal_solution(
+    inst::TwoStageSpanningTreeInstance, θ::AbstractMatrix; M=20.0
+)
+    S = nb_scenarios(inst)
+    E = ne(inst.graph)
 
-	# first stage objective value
+    # first stage objective value
     edge_weight_vector = inst.first_stage_costs .+ vec(sum(θ; dims=2)) ./ S
 
     edges_index_with_negative_cost = [e for e in 1:E if edge_weight_vector[e] < 0]
@@ -13,7 +15,7 @@ function first_stage_optimal_solution(inst::TwoStageSpanningTreeInstance, θ::Ab
     end
 
     grad = zeros(E, S)
-	grad[edges_index_with_negative_cost, :] .= M / S
+    grad[edges_index_with_negative_cost, :] .= M / S
     return value, grad
 end;
 
@@ -23,27 +25,29 @@ function second_stage_optimal_solution!(
     scenario::Int,
     grad::AbstractMatrix,
 )
-	(; graph, second_stage_costs) = instance
+    (; graph, second_stage_costs) = instance
     S = nb_scenarios(instance)
 
-	weights = min.(-θ[:, scenario], second_stage_costs[:, scenario])
+    weights = min.(-θ[:, scenario], second_stage_costs[:, scenario])
 
     (; value, tree) = kruskal(graph, weights)
 
-	# update gradient
-	slice = (-θ[:, scenario] .< second_stage_costs[:, scenario]) .&& tree
-	grad[slice, scenario] .-= 1 / S
+    # update gradient
+    slice = (-θ[:, scenario] .< second_stage_costs[:, scenario]) .&& tree
+    grad[slice, scenario] .-= 1 / S
 
     return value ./ S
 end;
 
-function lagrangian_function_value_gradient(inst::TwoStageSpanningTreeInstance, θ::AbstractMatrix)
+function lagrangian_function_value_gradient(
+    inst::TwoStageSpanningTreeInstance, θ::AbstractMatrix
+)
     value, grad = first_stage_optimal_solution(inst, θ)
 
-	S = nb_scenarios(inst)
+    S = nb_scenarios(inst)
     values = zeros(S)
     for s in 1:S
-		# Different part of grad are modified
+        # Different part of grad are modified
         values[s] = second_stage_optimal_solution!(inst, θ, s, grad)
     end
     value += sum(values)
@@ -52,8 +56,8 @@ end;
 
 function lagrangian_heuristic(θ::AbstractMatrix; inst::TwoStageSpanningTreeInstance)
     # Retrieve - y_{es} / S from θ by computing the gradient
-	(; graph) = inst
-	S = nb_scenarios(inst)
+    (; graph) = inst
+    S = nb_scenarios(inst)
     grad = zeros(ne(graph), S)
     for s in 1:S
         second_stage_optimal_solution!(inst, θ, s, grad)
@@ -66,7 +70,7 @@ function lagrangian_heuristic(θ::AbstractMatrix; inst::TwoStageSpanningTreeInst
     # Keep only the edges that are in the initial candidate graph and in the spanning tree
     forest = weights .&& tree_from_candidate
     sol = solution_from_first_stage_forest(forest, inst)
-	# v, _ = evaluate_first_stage_solution(inst, forest)
+    # v, _ = evaluate_first_stage_solution(inst, forest)
     return solution_value(sol, inst), forest
 end;
 
@@ -76,11 +80,12 @@ $TYPEDSIGNATURES
 Return an heuristic solution using a combination of lagarngian relaxation and lagrangian heuristic.
 """
 function lagrangian_relaxation(
-    inst::TwoStageSpanningTreeInstance; nb_epochs=100, stop_gap=1e-8
+    inst::TwoStageSpanningTreeInstance; nb_epochs=500, stop_gap=1e-8
 )
     θ = zeros(ne(inst.graph), nb_scenarios(inst))
 
     opt = Adam()
+    opt_state = Flux.setup(opt, θ)
 
     lb = -Inf
     ub = Inf
@@ -106,12 +111,12 @@ function lagrangian_relaxation(
                 end
             end
         end
-        Flux.update!(opt, θ, -grad)
+        Flux.update!(opt_state, θ, -grad)
         push!(lb_history, value)
         push!(ub_history, ub)
     end
 
-	ub, forest = lagrangian_heuristic(best_theta; inst=inst)
+    ub, forest = lagrangian_heuristic(best_theta; inst=inst)
     solution = solution_from_first_stage_forest(forest, inst)
     return solution, (; lb, ub, best_theta, lb_history, ub_history)
 end
diff --git a/src/TwoStageSpanningTree/benchmark.jl b/src/TwoStageSpanningTree/benchmark.jl
new file mode 100644
index 0000000..7f3d813
--- /dev/null
+++ b/src/TwoStageSpanningTree/benchmark.jl
@@ -0,0 +1,126 @@
+"""
+$TYPEDEF
+
+Two-Stage Minimum Spanning Tree benchmark.
+
+An instance consists of a grid graph of size `n × m` with:
+- Random first-stage costs for each edge (drawn from `c_range`), known before the scenario.
+- `nb_scenarios` second-stage cost draws (from `d_range`) embedded in the instance and used
+  for computing features via [`compute_features`](@ref).
+
+A fresh evaluation scenario is drawn independently by [`generate_scenario`](@ref)
+and is used for labeling and objective evaluation.
+
+# Fields
+$TYPEDFIELDS
+"""
+@kwdef struct TwoStageSpanningTreeBenchmark <: Utils.AbstractStochasticBenchmark{true}
+    "Number of grid rows."
+    n::Int = 4
+    "Number of grid columns."
+    m::Int = 4
+    "Number of second-stage cost scenarios embedded in the instance (for features)."
+    nb_scenarios::Int = 10
+    "Range for first-stage edge costs."
+    c_range::UnitRange{Int} = 1:10
+    "Range for second-stage edge costs."
+    d_range::UnitRange{Int} = 1:20
+end
+
+function Utils.generate_instance(
+    bench::TwoStageSpanningTreeBenchmark, rng::AbstractRNG; kwargs...
+)
+    (; n, m, nb_scenarios, c_range, d_range) = bench
+    g = Graphs.grid((n, m))
+    E = ne(g)
+    c = Float32[rand(rng, c_range) for _ in 1:E]
+    d = Float32[rand(rng, d_range) for _ in 1:E, _ in 1:nb_scenarios]
+    instance = TwoStageSpanningTreeInstance(g, c, d)
+    x = compute_features(instance; c_max=maximum(c_range), d_max=maximum(d_range))
+    return Utils.DataSample(; x, instance)
+end
+
+function Utils.generate_scenario(
+    bench::TwoStageSpanningTreeBenchmark,
+    rng::AbstractRNG;
+    instance::TwoStageSpanningTreeInstance,
+    kwargs...,
+)
+    return Float32[rand(rng, bench.d_range) for _ in 1:ne(instance.graph)]
+end
+
+function Utils.generate_statistical_model(::TwoStageSpanningTreeBenchmark; seed=nothing)
+    isnothing(seed) || Random.seed!(seed)
+    nb_features = 2 + 7 * length(0.0:0.1:1.0)
+    return Flux.Chain(Flux.Dense(nb_features => 1; bias=false), vec)
+end
+
+struct TSTMaximizer end
+
+function (::TSTMaximizer)(
+    θ::AbstractVector; instance::TwoStageSpanningTreeInstance, kwargs...
+)
+    return maximum_weight_forest(instance.graph, θ)
+end
+
+Utils.generate_maximizer(::TwoStageSpanningTreeBenchmark) = TSTMaximizer()
+
+Utils.is_minimization_problem(::TwoStageSpanningTreeBenchmark) = true
+
+function _complete_second_stage(
+    y::AbstractVector, graph::AbstractGraph, scenario::AbstractVector
+)
+    weights = copy(scenario)
+    m = min(zero(eltype(weights)), minimum(weights) - one(eltype(weights)))
+    weights[y .> 0] .= m
+    _, tree = kruskal(graph, weights)
+    return tree .⊻ BitVector(y .> 0)
+end
+
+function Utils.objective_value(
+    ::TwoStageSpanningTreeBenchmark, sample::Utils.DataSample, y::AbstractVector
+)
+    (; instance) = sample.context
+    scenarios = if hasproperty(sample.extra, :scenarios)
+        sample.extra.scenarios
+    else
+        [instance.second_stage_costs[:, s] for s in 1:nb_scenarios(instance)]
+    end
+    return dot(y, instance.first_stage_costs) +
+           mean(dot(_complete_second_stage(y, instance.graph, ξ), ξ) for ξ in scenarios)
+end
+
+function Utils.generate_anticipative_solver(::TwoStageSpanningTreeBenchmark)
+    return (scenario::AbstractVector; instance::TwoStageSpanningTreeInstance, kwargs...) ->
+        begin
+            c, g = instance.first_stage_costs, instance.graph
+            _, tree = kruskal(g, min.(c, scenario))
+            return BitVector(tree .& (c .<= scenario))
+        end
+end
+
+function Utils.generate_baseline_policies(::TwoStageSpanningTreeBenchmark)
+    function make_saa_policy(solver_fn)
+        return (sample, scenarios) -> begin
+            d_eval = reduce(hcat, scenarios)
+            saa_inst = TwoStageSpanningTreeInstance(
+                sample.instance.graph, sample.instance.first_stage_costs, d_eval
+            )
+            sol = solver_fn(saa_inst)
+            return [
+                Utils.DataSample(;
+                    sample.context...,
+                    x=sample.x,
+                    y=sol.y,
+                    extra=(; scenarios=scenarios),
+                ),
+            ]
+        end
+    end
+    return (;
+        cut_generation=make_saa_policy(inst -> cut_generation(inst; verbose=false)),
+        benders=make_saa_policy(inst -> benders_decomposition(inst; verbose=false)),
+        column_heuristic=make_saa_policy(inst -> column_heuristic(inst; verbose=false)),
+        lagrangian=make_saa_policy(inst -> first(lagrangian_relaxation(inst))),
+    )
+end
diff --git a/src/TwoStageSpanningTree/instance.jl b/src/TwoStageSpanningTree/instance.jl
index e0671af..fad7939 100644
--- a/src/TwoStageSpanningTree/instance.jl
+++ b/src/TwoStageSpanningTree/instance.jl
@@ -10,18 +10,25 @@ struct TwoStageSpanningTreeInstance{T}
     "First stage costs for each edge"
     first_stage_costs::Vector{T}
     "Second stage costs for each edge and scenario [e, s]"
-	second_stage_costs::Matrix{T}
+    second_stage_costs::Matrix{T}
 end
 
-Base.show(io::IO, instance::TwoStageSpanningTreeInstance) = print(io, "$(collect(edges(instance.graph))), $(instance.first_stage_costs), $(instance.second_stage_costs)")
+function Base.show(io::IO, instance::TwoStageSpanningTreeInstance)
+    return print(
+        io,
+        "$(collect(edges(instance.graph))), $(instance.first_stage_costs), $(instance.second_stage_costs)",
+    )
+end
 
-function TwoStageSpanningTreeInstance(; n, m, nb_scenarios=1, c_range=1:20, d_range=1:20, seed=nothing, type=Float64)
-	g = Graphs.grid((n, m))
-	rng = MersenneTwister(seed)
-	c = type[rand(rng, c_range) for _ in 1:ne(g)]
-	d = type[rand(rng, d_range) for _ in 1:ne(g), _ in 1:nb_scenarios]
+function TwoStageSpanningTreeInstance(;
+    n, m, nb_scenarios=1, c_range=1:20, d_range=1:20, seed=nothing, type=Float64
+)
+    g = Graphs.grid((n, m))
+    rng = MersenneTwister(seed)
+    c = type[rand(rng, c_range) for _ in 1:ne(g)]
+    d = type[rand(rng, d_range) for _ in 1:ne(g), _ in 1:nb_scenarios]
 
-	return TwoStageSpanningTreeInstance(g, c, d)
+    return TwoStageSpanningTreeInstance(g, c, d)
 end
 
 """
diff --git a/src/TwoStageSpanningTree/learning/features.jl b/src/TwoStageSpanningTree/learning/features.jl
index 94dfabf..1befc55 100644
--- a/src/TwoStageSpanningTree/learning/features.jl
+++ b/src/TwoStageSpanningTree/learning/features.jl
@@ -1,107 +1,139 @@
-
-function first_stage_cost(
-    inst::TwoStageSpanningTreeInstance, e::AbstractEdge, scenario::Int
-)
-    return inst.first_stage_weights_matrix[src(e), dst(e)]
-end
-
-function scenario_second_stage_cost(
-    inst::TwoStageSpanningTreeInstance, e::AbstractEdge, scenario::Int
-)
-    return inst.second_stage_weights[scenario][src(e), dst(e)]
-end
-
-function scenario_best_stage_cost(
-    inst::TwoStageSpanningTreeInstance, e::AbstractEdge, scenario::Int
-)
-    return min(
-        inst.second_stage_weights[scenario][src(e), dst(e)],
-        inst.first_stage_weights_matrix[src(e), dst(e)],
-    )
-end
-
-function edge_neighbors(g::AbstractGraph, e::AbstractEdge)
-    result = Vector{AbstractEdge}()
-    for v in [src(e), dst(e)]
-        for u in neighbors(g, v)
-            push!(result, Edge(min(u, v), max(u, v)))
+function _edge_adjacent_indices(graph::AbstractGraph, e::AbstractEdge)
+    result = Int[]
+    for v in (src(e), dst(e))
+        for u in neighbors(graph, v)
+            push!(result, edge_index(graph, Edge(min(u, v), max(u, v))))
         end
     end
     return result
 end
 
-function compute_minimum_spanning_tree(
-    inst::TwoStageSpanningTreeInstance, scenario::Int, weight_function
+function _compute_mst_indicator(
+    instance::TwoStageSpanningTreeInstance, scenario::Int, weight_fn
 )
-    weights_vec = zeros(ne(inst.g))
-    for e in edges(inst.g)
-        weights_vec[edge_index(inst, e)] = weight_function(inst, e, scenario)
-    end
-    weights = get_weight_matrix_from_weight_vector(inst.g, inst.edge_index, weights_vec)
-    return kruskal_mst(inst.g, weights)
+    weights = [weight_fn(instance, i, scenario) for i in 1:ne(instance.graph)]
+    _, tree = kruskal(instance.graph, weights)
+    return tree
 end
 
-function compute_first_stage_mst(inst::TwoStageSpanningTreeInstance)
-    tree = compute_minimum_spanning_tree(inst, -1, first_stage_cost)
-    fs_mst_indicator = zeros(ne(inst.g))
-    for e in tree
-        fs_mst_indicator[edge_index(inst, e)] = 1.0
-    end
-    return fs_mst_indicator
+function _compute_first_stage_mst(instance::TwoStageSpanningTreeInstance)
+    return _compute_mst_indicator(instance, 1, (inst, i, _) -> inst.first_stage_costs[i])
 end
 
-function compute_second_stage_mst(inst::TwoStageSpanningTreeInstance)
-    ss_mst_indicator = zeros(ne(inst.g), inst.nb_scenarios)
-    for scenario in 1:(inst.nb_scenarios)
-        tree = compute_minimum_spanning_tree(inst, scenario, scenario_second_stage_cost)
-        for e in tree
-            ss_mst_indicator[edge_index(inst, e), scenario] = 1.0
-        end
+function _compute_second_stage_mst(instance::TwoStageSpanningTreeInstance)
+    S = nb_scenarios(instance)
+    indicator = falses(ne(instance.graph), S)
+    for s in 1:S
+        indicator[:, s] .= _compute_mst_indicator(
+            instance, s, (inst, i, sc) -> inst.second_stage_costs[i, sc]
+        )
     end
-    return ss_mst_indicator
+    return indicator
 end
 
-function compute_best_stage_mst(inst::TwoStageSpanningTreeInstance)
-    bfs_mst_indicator = zeros(ne(inst.g), inst.nb_scenarios)
-    bss_mst_indicator = zeros(ne(inst.g), inst.nb_scenarios)
-    for scenario in 1:(inst.nb_scenarios)
-        tree = compute_minimum_spanning_tree(inst, scenario, scenario_best_stage_cost)
-        for e in tree
-            e_ind = edge_index(inst, e)
-            if inst.first_stage_weights_matrix[src(e), dst(e)] <
-                inst.second_stage_weights[scenario][src(e), dst(e)]
-                bfs_mst_indicator[e_ind, scenario] = 1.0
+function _compute_best_stage_mst(instance::TwoStageSpanningTreeInstance)
+    (; first_stage_costs, second_stage_costs) = instance
+    S = nb_scenarios(instance)
+    E = ne(instance.graph)
+    bfs = falses(E, S)
+    bss = falses(E, S)
+    for s in 1:S
+        tree = _compute_mst_indicator(
+            instance,
+            s,
+            (inst, i, sc) -> min(inst.first_stage_costs[i], inst.second_stage_costs[i, sc]),
+        )
+        for i in 1:E
+            tree[i] || continue
+            if first_stage_costs[i] <= second_stage_costs[i, s]
+                bfs[i, s] = true
             else
-                bss_mst_indicator[e_ind, scenario] = 1.0
+                bss[i, s] = true
             end
         end
     end
-    return bfs_mst_indicator, bss_mst_indicator
+    return bfs, bss
 end
 
-function pivot_instance_second_stage_costs(inst::TwoStageSpanningTreeInstance)
-    edgeWeights = zeros(ne(inst.g), inst.nb_scenarios)
-    for s in 1:(inst.nb_scenarios)
-        for e in edges(inst.g)
-            edgeWeights[edge_index(inst, e), s] = inst.second_stage_weights[s][
-                src(e), dst(e)
-            ]
-        end
-    end
-    return edgeWeights
-end
+"""
+$TYPEDSIGNATURES
+
+Compute per-edge features for a [`TwoStageSpanningTreeInstance`](@ref).
+
+Returns a `Float32` matrix of shape `(nb_features, ne)` where `nb_features = 2 + 7 * 11`.
 
-function compute_edge_neighbors(inst::TwoStageSpanningTreeInstance)
-    neighbors = Vector{Vector{Int}}(undef, ne(inst.g))
-    for e in edges(inst.g)
-        e_ind = edge_index(inst, e)
-        neighbors_list = edge_neighbors(inst.g, e)
-        neighbors[e_ind] = Vector{Int}(undef, length(neighbors_list))
-        count = 0
-        for f in neighbors_list
-            count += 1
-            neighbors[e_ind][count] = edge_index(inst, f)
+Features are normalized by `c_max` (first-stage costs) and `d_max` (second-stage costs) so
+that all values lie in `[0, 1]`. Pass `c_max=1` and `d_max=1` to skip normalization.
+
+# Features (normalized to [0, 1])
+- `first_stage_cost / c_max`
+- Quantiles (0:0.1:1) of `second_stage_cost / d_max` across scenarios
+- Quantiles of `best_stage_cost / max(c_max, d_max)` across scenarios
+- Quantiles of `neighbors_first_stage_cost / c_max`
+- Quantiles of `neighbors_second_stage_cost / d_max` across scenarios and neighbors
+- `is_in_first_stage_mst × first_stage_cost / c_max`
+- Quantiles of `is_in_second_stage_mst × second_stage_cost / d_max`
+- Quantiles of `is_first_in_best_stage_mst × first_stage_cost / c_max`
+- Quantiles of `is_second_in_best_stage_mst × second_stage_cost / d_max`
+"""
+function compute_features(
+    instance::TwoStageSpanningTreeInstance; c_max::Real=1, d_max::Real=1
+)
+    (; graph, first_stage_costs, second_stage_costs) = instance
+    S = nb_scenarios(instance)
+    E = ne(graph)
+    cd_max = max(c_max, d_max)
+
+    quantiles_used = 0.0:0.1:1.0
+    nb_quantiles = length(quantiles_used)
+    nb_features = 2 + 7 * nb_quantiles
+
+    fs_mst = _compute_first_stage_mst(instance)
+    ss_mst = _compute_second_stage_mst(instance)
+    bfs_mst, bss_mst = _compute_best_stage_mst(instance)
+
+    X = zeros(Float32, nb_features, E)
+
+    for (i, e) in enumerate(edges(graph))
+        f = 0
+
+        function add_quantiles(realizations)
+            for p in quantiles_used
+                f += 1
+                X[f, i] = quantile(realizations, p)
+            end
         end
+
+        # first_stage_cost
+        f += 1
+        X[f, i] = first_stage_costs[i] / c_max
+
+        # second_stage_cost quantiles across scenarios
+        add_quantiles(second_stage_costs[i, :] ./ d_max)
+
+        # best_stage_cost quantiles across scenarios
+        add_quantiles(min.(first_stage_costs[i], second_stage_costs[i, :]) ./ cd_max)
+
+        # neighbor first_stage_cost quantiles
+        adj = _edge_adjacent_indices(graph, e)
+        add_quantiles(first_stage_costs[adj] ./ c_max)
+
+        # neighbor second_stage_cost quantiles across scenarios and neighbors
+        add_quantiles([second_stage_costs[n, s] / d_max for s in 1:S for n in adj])
+
+        # is_in_first_stage_mst × first_stage_cost
+        f += 1
+        X[f, i] = fs_mst[i] * first_stage_costs[i] / c_max
+
+        # is_in_second_stage_mst × second_stage_cost quantiles
+        add_quantiles(ss_mst[i, :] .* second_stage_costs[i, :] ./ d_max)
+
+        # is_first_in_best_stage_mst × first_stage_cost quantiles
+        add_quantiles(bfs_mst[i, :] .* first_stage_costs[i] ./ c_max)
+
+        # is_second_in_best_stage_mst × second_stage_cost quantiles
+        add_quantiles(bss_mst[i, :] .* second_stage_costs[i, :] ./ d_max)
     end
-    return neighbors
+
+    return X
 end
diff --git a/src/TwoStageSpanningTree/plot.jl b/src/TwoStageSpanningTree/plot.jl
index e143c01..174aad8 100644
--- a/src/TwoStageSpanningTree/plot.jl
+++ b/src/TwoStageSpanningTree/plot.jl
@@ -8,36 +8,47 @@
 - `weights`: edge weights to display (optional)
 """
 function plot_grid_graph(
-	graph, n, m, weights=nothing;
-	show_node_indices=false, δ=0.25, δ₂=0.13,
-	edge_colors=fill(:black, ne(graph)),
-	edge_widths=fill(1, ne(graph)),
-	edge_labels=fill(nothing, ne(graph)),
-	space_for_legend=0
+    graph,
+    n,
+    m,
+    weights=nothing;
+    show_node_indices=false,
+    δ=0.25,
+    δ₂=0.13,
+    edge_colors=fill(:black, ne(graph)),
+    edge_widths=fill(1, ne(graph)),
+    edge_labels=fill(nothing, ne(graph)),
+    space_for_legend=0,
 )
-	l = [((i - 1) % n, floor((i - 1) / n)) for i in 1:nv(graph)]
-	function args_from_ij(i, j)
-		return [l[i][1], l[j][1]], [l[i][2], l[j][2]]
-	end
-	f = Plots.plot(; axis=([], false), ylimits=(-δ, m-1+δ+space_for_legend), xlimits=(-δ, n-1+δ), aspect_ratio=:equal, leg=:top)
-	for (color, width, label, e) in zip(edge_colors, edge_widths, edge_labels, edges(graph))
-		Plots.plot!(f, args_from_ij(src(e), dst(e)); color, width, label)
-	end
-	series_annotations = show_node_indices ? (1:nv(g)) : nothing
-	Plots.scatter!(f, l; series_annotations, label=nothing, markersize=15, color=:lightgrey)
-	if !isnothing(weights)
-		for (w, e) in zip(weights, edges(graph))
-			i, j = src(e), dst(e)
-			x, y = (l[j] .+ l[i]) ./ 2
-			if j == i + 1
-				y += δ₂
-			else
-				x -= δ₂
-			end
-			Plots.annotate!(f, x, y, Int(w))
-		end
-	end
-	return f
+    l = [((i - 1) % n, floor((i - 1) / n)) for i in 1:nv(graph)]
+    function args_from_ij(i, j)
+        return [l[i][1], l[j][1]], [l[i][2], l[j][2]]
+    end
+    f = Plots.plot(;
+        axis=([], false),
+        ylimits=(-δ, m - 1 + δ + space_for_legend),
+        xlimits=(-δ, n - 1 + δ),
+        aspect_ratio=:equal,
+        leg=:top,
+    )
+    for (color, width, label, e) in zip(edge_colors, edge_widths, edge_labels, edges(graph))
+        Plots.plot!(f, args_from_ij(src(e), dst(e)); color, width, label)
+    end
+    series_annotations = show_node_indices ? (1:nv(g)) : nothing
+    Plots.scatter!(f, l; series_annotations, label=nothing, markersize=15, color=:lightgrey)
+    if !isnothing(weights)
+        for (w, e) in zip(weights, edges(graph))
+            i, j = src(e), dst(e)
+            x, y = (l[j] .+ l[i]) ./ 2
+            if j == i + 1
+                y += δ₂
+            else
+                x -= δ₂
+            end
+            Plots.annotate!(f, x, y, Int(w))
+        end
+    end
+    return f
 end
 
 """
@@ -46,35 +57,64 @@ $TYPEDSIGNATURES
 Plot the two-stage tree from `solution` for requested `scenario`.
 """
 function plot_scenario(
-	solution::TwoStageSpanningTreeSolution, instance::TwoStageSpanningTreeInstance, scenario;
-	show_node_indices=false, δ=0.25, δ₂=0.16, n, m
+    solution::TwoStageSpanningTreeSolution,
+    instance::TwoStageSpanningTreeInstance,
+    scenario;
+    show_node_indices=false,
+    δ=0.25,
+    δ₂=0.16,
+    n,
+    m,
 )
-	(; graph, first_stage_costs, second_stage_costs) = instance
-	first_stage_forest = solution.y
-	second_stage_forests = solution.z
+    (; graph, first_stage_costs, second_stage_costs) = instance
+    first_stage_forest = solution.y
+    second_stage_forests = solution.z
 
-	is_labeled_1 = false
-	is_labeled_2 = false
-	edge_labels = fill("", ne(graph))
+    is_labeled_1 = false
+    is_labeled_2 = false
+    edge_labels = fill("", ne(graph))
 
-	S = nb_scenarios(instance)
+    S = nb_scenarios(instance)
 
-	for e in 1:ne(graph)
-		b1 = first_stage_forest[e]
-		b2 = second_stage_forests[e, scenario]
-		if !is_labeled_1 && b1
-			edge_labels[e] = "First stage forest"
-			is_labeled_1 = true
-		elseif !is_labeled_2 && b2
-			edge_labels[e] = "Second stage forest (scenario $scenario/$S)"
-			is_labeled_2 = true
-		end
-	end
+    for e in 1:ne(graph)
+        b1 = first_stage_forest[e]
+        b2 = second_stage_forests[e, scenario]
+        if !is_labeled_1 && b1
+            edge_labels[e] = "First stage forest"
+            is_labeled_1 = true
+        elseif !is_labeled_2 && b2
+            edge_labels[e] = "Second stage forest (scenario $scenario/$S)"
+            is_labeled_2 = true
+        end
+    end
 
-	edge_colors = [e1 ? :red : e2 ? :green : :black for (e1, e2) in zip(first_stage_forest, second_stage_forests[:, scenario])]
-	edge_widths = [e1 || e2 ? 3 : 1 for (e1, e2) in zip(first_stage_forest, second_stage_forests[:, scenario])]
-	weights = first_stage_forest .* first_stage_costs + .!first_stage_forest .* second_stage_costs[:, scenario]
-	return plot_grid_graph(
-		graph, n, m, weights; show_node_indices, δ, δ₂, edge_colors, edge_widths, edge_labels, space_for_legend=3δ
-	)
+    edge_colors = [
+        if e1
+            :red
+        elseif e2
+            :green
+        else
+            :black
+        end for (e1, e2) in zip(first_stage_forest, second_stage_forests[:, scenario])
+    ]
+    edge_widths = [
+        e1 || e2 ? 3 : 1 for
+        (e1, e2) in zip(first_stage_forest, second_stage_forests[:, scenario])
+    ]
+    weights =
+        first_stage_forest .* first_stage_costs +
+        .!first_stage_forest .* second_stage_costs[:, scenario]
+    return plot_grid_graph(
+        graph,
+        n,
+        m,
+        weights;
+        show_node_indices,
+        δ,
+        δ₂,
+        edge_colors,
+        edge_widths,
+        edge_labels,
+        space_for_legend=3δ,
+    )
 end
diff --git a/src/TwoStageSpanningTree/solution.jl b/src/TwoStageSpanningTree/solution.jl
index d0db655..5325a53 100644
--- a/src/TwoStageSpanningTree/solution.jl
+++ b/src/TwoStageSpanningTree/solution.jl
@@ -14,8 +14,11 @@ $TYPEDSIGNATURES
 
 Compute the objective value of given solution for given instance.
 """
-function solution_value(solution::TwoStageSpanningTreeSolution, instance::TwoStageSpanningTreeInstance)
-    return dot(solution.y, instance.first_stage_costs) + dot(solution.z, instance.second_stage_costs) / nb_scenarios(instance)
+function solution_value(
+    solution::TwoStageSpanningTreeSolution, instance::TwoStageSpanningTreeInstance
+)
+    return dot(solution.y, instance.first_stage_costs) +
+           dot(solution.z, instance.second_stage_costs) / nb_scenarios(instance)
 end
 
 """
@@ -23,7 +26,11 @@ $TYPEDSIGNATURES
 
 Check if a given solution is feasible for given instance.
 """
-function is_feasible(solution::TwoStageSpanningTreeSolution, instance::TwoStageSpanningTreeInstance; verbose=true)
+function is_feasible(
+    solution::TwoStageSpanningTreeSolution,
+    instance::TwoStageSpanningTreeInstance;
+    verbose=true,
+)
     (; y, z) = solution
     (; graph) = instance
 
@@ -50,21 +57,23 @@ $TYPEDSIGNATURES
 
 Return the associated two-stage solution from given first stage forest and instance.
 """
-function solution_from_first_stage_forest(forest::BitVector, instance::TwoStageSpanningTreeInstance)
-	(; graph, second_stage_costs) = instance
+function solution_from_first_stage_forest(
+    forest::BitVector, instance::TwoStageSpanningTreeInstance
+)
+    (; graph, second_stage_costs) = instance
 
-	S = nb_scenarios(instance)
-	forests = falses(ne(graph), S)
+    S = nb_scenarios(instance)
+    forests = falses(ne(graph), S)
     for s in 1:S
-		weights = deepcopy(second_stage_costs[:, s])
+        weights = deepcopy(second_stage_costs[:, s])
         m = minimum(weights) - 1
         m = min(0, m - 1)
         weights[forest] .= m  # set weights over forest as the minimum
 
-		# find min spanning tree including forest
+        # find min spanning tree including forest
         _, tree_s = kruskal(graph, weights)
-		forest_s = tree_s .- forest
-		forests[:, s] .= forest_s
+        forest_s = tree_s .- forest
+        forests[:, s] .= forest_s
     end
 
     return TwoStageSpanningTreeSolution(forest, forests)
diff --git a/src/TwoStageSpanningTree/utils.jl b/src/TwoStageSpanningTree/utils.jl
index 22ddd00..ff915a7 100644
--- a/src/TwoStageSpanningTree/utils.jl
+++ b/src/TwoStageSpanningTree/utils.jl
@@ -8,20 +8,20 @@ and a binary vector instead of a vecror of edges.
 function kruskal(g::AbstractGraph, weights::AbstractVector; minimize::Bool=true)
     connected_vs = IntDisjointSets(nv(g))
 
-	tree = falses(ne(g))
+    tree = falses(ne(g))
 
     edge_list = collect(edges(g))
-	order = sortperm(weights; rev=!minimize)
-	value = 0.0
+    order = sortperm(weights; rev=!minimize)
+    value = 0.0
 
-	tree_size = 0
+    tree_size = 0
 
     for (e_ind, e) in zip(order, edge_list[order])
         if !in_same_set(connected_vs, src(e), dst(e))
             union!(connected_vs, src(e), dst(e))
-			tree[e_ind] = true
-			tree_size += 1
-			value += weights[e_ind]
+            tree[e_ind] = true
+            tree_size += 1
+            value += weights[e_ind]
             (tree_size >= nv(g) - 1) && break
         end
     end
@@ -34,3 +34,37 @@ function is_spanning_tree(tree_candidate::BitVector, graph::AbstractGraph)
     subgraph = induced_subgraph(graph, edge_list)[1]
     return !is_cyclic(subgraph) && nv(subgraph) == nv(graph)
 end
+
+"""
+$TYPEDSIGNATURES
+
+Return the index of edge `e` in `collect(edges(graph))`.
+"""
+function edge_index(graph::AbstractGraph, e::AbstractEdge)
+    for (i, f) in enumerate(edges(graph))
+        (src(f) == src(e) && dst(f) == dst(e)) && return i
+    end
+    return error("Edge $(e) not found in graph")
+end
+
+"""
+$TYPEDSIGNATURES
+
+Return the maximum weight forest of `g` with respect to edge weights `θ`.
+Edges with non-positive weight are excluded.
+"""
+function maximum_weight_forest(g::AbstractGraph, θ::AbstractVector)
+    edge_list = collect(edges(g))
+    order = sortperm(θ; rev=true)
+    forest = falses(ne(g))
+    connected_vs = IntDisjointSets(nv(g))
+    for i in order
+        θ[i] <= 0 && break
+        e = edge_list[i]
+        if !in_same_set(connected_vs, src(e), dst(e))
+            union!(connected_vs, src(e), dst(e))
+            forest[i] = true
+        end
+    end
+    return forest
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 00bea25..c2a7c9e 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -18,6 +18,7 @@ using Random
     include("warcraft.jl")
     include("vsp.jl")
     include("portfolio_optimization.jl")
+    include("two_stage_spanning_tree.jl")
 
     @testset "Dynamic Vehicle Scheduling Problem" begin
         include("dynamic_vsp.jl")
diff --git a/test/two_stage_spanning_tree.jl b/test/two_stage_spanning_tree.jl
new file mode 100644
index 0000000..010c1f6
--- /dev/null
+++ b/test/two_stage_spanning_tree.jl
@@ -0,0 +1,52 @@
+@testset "Two-Stage Spanning Tree" begin
+    using Graphs: ne
+    b = TwoStageSpanningTreeBenchmark(; n=3, m=3, nb_scenarios=5)
+
+    nb_features = 2 + 7 * length(0.0:0.1:1.0)  # 79
+
+    # Unlabeled dataset: 2 instances × 3 evaluation scenarios each
+    dataset = generate_dataset(b, 2; nb_scenarios=3, seed=1)
+    @test length(dataset) == 6
+    @test all(s -> s.y === nothing, dataset)
+    @test all(s -> hasproperty(s.extra, :scenario), dataset)
+    @test all(s -> size(s.x) == (nb_features, ne(s.instance.graph)), dataset)
+
+    # Instance has nb_scenarios feature scenarios embedded
+    @test size(dataset[1].instance.second_stage_costs, 2) == 5
+
+    # Labeled dataset with anticipative solver
+    anticipative = generate_anticipative_solver(b)
+    policy =
+        (sample, scenarios) -> [
+            DataSample(;
+                sample.context...,
+                x=sample.x,
+                y=anticipative(ξ; sample.context...),
+                extra=(; scenario=ξ),
+            ) for ξ in scenarios
+        ]
+    labeled = generate_dataset(b, 2; nb_scenarios=3, target_policy=policy, seed=1)
+    @test length(labeled) == 6
+    @test all(s -> s.y isa BitVector, labeled)
+
+    # Maximizer
+    model = generate_statistical_model(b; seed=1)
+    maximizer = generate_maximizer(b)
+    for sample in labeled[1:3]
+        θ = model(sample.x)
+        y = maximizer(θ; sample.context...)
+        @test y isa BitVector
+        @test length(y) == ne(sample.instance.graph)
+    end
+
+    # Objective value (should be non-negative for valid solutions)
+    for sample in labeled[1:3]
+        obj = objective_value(b, sample)
+        @test isfinite(obj)
+        @test obj >= 0
+    end
+
+    # Gap (should be finite)
+    gap = compute_gap(b, labeled, model, maximizer)
+    @test isfinite(gap)
+end