JuliaDecisionFocusedLearning · BatyLeo · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026
diff --git a/Project.toml b/Project.toml
@@ -7,7 +7,6 @@ authors = ["Members of JuliaDecisionFocusedLearning"]
 projects = ["docs", "test"]
 
 [deps]
-Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
 Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
 ConstrainedShortestPaths = "b3798467-87dc-4d99-943d-35a1bd39e395"
 DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe"
@@ -27,7 +26,6 @@ LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Metalhead = "dbeba491-748d-5e0e-a39e-b530a07fa0cc"
 NPZ = "15e1cf62-19b3-5cfa-8e77-841668bca605"
-Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
@@ -37,8 +35,13 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
+[weakdeps]
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+
+[extensions]
+DFLBenchmarksPlotsExt = "Plots"
+
 [compat]
-Colors = "0.13.1"
 Combinatorics = "1.0.3"
 ConstrainedShortestPaths = "0.6.0"
 DataDeps = "0.7"

diff --git a/docs/src/benchmarks/maintenance.md b/docs/src/benchmarks/maintenance.md
@@ -7,17 +7,17 @@ The Maintenance problem with resource constraint is a sequential decision-making
 
 ### Overview
 
-In this benchmark, a system consists of $N$ identical components, each of which can degrade over $n$ discrete states. State $1$ means that the component is new, state $n$ means that the component is failed. At each time step, the agent can maintain up to $K$ components.  
+In this benchmark, a system consists of ``N`` identical components, each of which can degrade over ``n`` discrete states. State ``1`` means that the component is new, state $n$ means that the component is failed. At each time step, the agent can maintain up to $K$ components.  
 
 This forms an endogenous multistage stochastic optimization problem, where the agent must plan maintenance actions over the horizon.
 
 ### Mathematical Formulation
 
 The maintenance problem can be formulated as a finite-horizon Markov Decision Process (MDP) with the following components:
 
-**State Space** $\mathcal{S}$: At time step $t$, the state $s_t \in [1:n]^N$ is the degradation state for each component.
+**State Space** ``\mathcal{S}``: At time step ``t``, the state ``s_t \in [1:n]^N`` is the degradation state for each component.
 
-**Action Space** $\mathcal{A}$: The action at time $t$ is the set of components that are maintained at time $t$:
+**Action Space** ``\mathcal{A}``: The action at time ``t`` is the set of components that are maintained at time ``t``:
 ```math
 a_t \subseteq \{1, 2, \ldots, N\} \text{ such that } |a_t| \leq K
 ```
@@ -51,9 +51,9 @@ Here, \(p\) is the degradation probability, \(s_t^i\) is the current state of co
 
 The immediate cost at time \(t\) is:
 
-$$
+```math
 c(s_t, a_t) = \Big( c_m \cdot |a_t| + c_f \cdot \#\{ i : s_t^i = n \} \Big)
-$$
+```
 
 Where:
 

diff --git a/docs/src/custom_benchmarks.md b/docs/src/custom_benchmarks.md
@@ -69,13 +69,13 @@ generate_maximizer(bench::MyBenchmark)
 ### Optional methods
 
 ```julia
+generate_baseline_policies(bench::MyBenchmark) -> collection of callables
 is_minimization_problem(bench::MyBenchmark) -> Bool   # default: false (maximization)
 objective_value(bench::MyBenchmark, sample::DataSample, y) -> Real
 compute_gap(bench::MyBenchmark, dataset, model, maximizer) -> Float64
-plot_data(bench::MyBenchmark, sample::DataSample; kwargs...)
-plot_instance(bench::MyBenchmark, instance; kwargs...)
-plot_solution(bench::MyBenchmark, sample::DataSample, y; kwargs...)
-generate_baseline_policies(bench::MyBenchmark) -> collection of callables
+has_visualization(bench::MyBenchmark) -> Bool                            # default: false; return true when plot methods are implemented/available
+plot_instance(bench::MyBenchmark, sample::DataSample; kwargs...)
+plot_solution(bench::MyBenchmark, sample::DataSample; kwargs...)
 ```
 
 ---
@@ -148,6 +148,13 @@ generate_baseline_policies(bench::MyDynamicBenchmark)
 # Each callable performs a full episode rollout and returns the trajectory.
 ```
 
+### Optional visualization methods
+
+```julia
+plot_trajectory(bench::MyDynamicBenchmark, traj::Vector{DataSample}; kwargs...)
+animate_trajectory(bench::MyDynamicBenchmark, traj::Vector{DataSample}; kwargs...)
+```
+
 `generate_dataset` for dynamic benchmarks **requires** a `target_policy` kwarg, 
 there is no default. The `target_policy` must be a callable `(env) -> Vector{DataSample}`.
 

diff --git a/docs/src/tutorials/warcraft_tutorial.jl b/docs/src/tutorials/warcraft_tutorial.jl
@@ -8,6 +8,7 @@ The map is represented as a 2D image representing a 12x12 grid, each cell having
 
 # First, let's load the package and create a benchmark object as follows:
 using DecisionFocusedLearningBenchmarks
+using Plots
 b = WarcraftBenchmark()
 
 # ## Dataset generation
@@ -32,8 +33,8 @@ y_true = sample.y
 # `context` is not used in this benchmark (no solver kwargs needed), so it is empty:
 isempty(sample.context)
 
-# For some benchmarks, we provide the following plotting method [`plot_data`](@ref) to visualize the data:
-plot_data(b, sample)
+# For some benchmarks, we provide the following plotting method [`plot_solution`](@ref) to visualize the data:
+plot_solution(b, sample)
 # We can see here the terrain image, the true terrain weights, and the true shortest path avoiding the high cost cells.
 
 # ## Building a pipeline
@@ -50,7 +51,7 @@ maximizer = generate_maximizer(b; dijkstra=true)
 # In the case o fthe Warcraft benchmark, the method has an additional keyword argument to chose the algorithm to use: Dijkstra's algorithm or Bellman-Ford algorithm.
 y = maximizer(θ)
 # As we can see, currently the pipeline predicts random noise as cell weights, and therefore the maximizer returns a straight line path.
-plot_data(b, DataSample(; x, θ, y))
+plot_solution(b, DataSample(; x, θ, y))
 # We can evaluate the current pipeline performance using the optimality gap metric:
 starting_gap = compute_gap(b, test_dataset, model, maximizer)
 
@@ -59,7 +60,6 @@ starting_gap = compute_gap(b, test_dataset, model, maximizer)
 # We can now train the model using the InferOpt.jl package:
 using InferOpt
 using Flux
-using Plots
 
 perturbed_maximizer = PerturbedMultiplicative(maximizer; ε=0.2, nb_samples=100)
 loss = FenchelYoungLoss(perturbed_maximizer)
@@ -85,7 +85,7 @@ final_gap = compute_gap(b, test_dataset, model, maximizer)
 #
 θ = model(x)
 y = maximizer(θ)
-plot_data(b, DataSample(; x, θ, y))
+plot_solution(b, DataSample(; x, θ, y))
 
 using Test #src
 @test final_gap < starting_gap #src
diff --git a/docs/src/using_benchmarks.md b/docs/src/using_benchmarks.md
@@ -10,7 +10,7 @@ A benchmark bundles a problem family (an instance generator, a combinatorial sol
 Three abstract types cover the main settings:
 - **`AbstractBenchmark`**: static problems (one instance, one decision)
 - **`AbstractStochasticBenchmark{exogenous}`**: stochastic problems (type parameter indicates whether uncertainty is exogenous)
-- **`AbstractDynamicBenchmark`**: sequential / multi-stage problems
+- **`AbstractDynamicBenchmark{exogenous}`**: sequential / multi-stage problems
 
 The sections below explain what changes between these settings. For most purposes, start with a static benchmark to understand the core workflow.
 
@@ -180,10 +180,29 @@ rewards, samples = evaluate_policy!(pol, envs, n_episodes)
 
 ## Visualization
 
-Where implemented, benchmarks provide benchmark-specific plotting helpers:
-
+Plots is an **optional** dependency, load it with `using Plots` to unlock the plot functions. Not all benchmarks support visualization, call `has_visualization(bench)` to check.
 ```julia
-plot_data(bench, sample)            # overview of a data sample
-plot_instance(bench, instance)      # raw problem instance
-plot_solution(bench, sample, y)     # overlay solution on instance
+using Plots
+
+bench = Argmax2DBenchmark()
+dataset = generate_dataset(bench, 10)
+sample = dataset[1]
+
+has_visualization(bench)           # true
+plot_instance(bench, sample)       # problem geometry only
+plot_solution(bench, sample)       # sample.y overlaid on the instance
+plot_solution(bench, sample, y)    # convenience 3-arg form: override y before plotting
+
+# Dynamic benchmarks only
+traj = generate_anticipative_solver(bench)(env)
+plot_trajectory(bench, traj)           # grid of epoch subplots
+anim = animate_trajectory(bench, traj; fps=2)
+gif(anim, "episode.gif")
 ```
+
+- `has_visualization(bench)`: returns `true` for benchmarks that implement plot support (if Plots is loaded).
+- `plot_instance(bench, sample; kwargs...)`: renders the problem geometry without any solution.
+- `plot_solution(bench, sample; kwargs...)`: renders `sample.y` overlaid on the instance.
+- `plot_solution(bench, sample, y; kwargs...)`: 3-arg convenience form that overrides `y` before plotting.
+- `plot_trajectory(bench, traj; kwargs...)`: dynamic benchmarks only; produces a grid of per-epoch subplots.
+- `animate_trajectory(bench, traj; kwargs...)`: dynamic benchmarks only, returns a `Plots.Animation` that can be saved with `gif(anim, "file.gif")`.
diff --git a/ext/DFLBenchmarksPlotsExt.jl b/ext/DFLBenchmarksPlotsExt.jl
@@ -0,0 +1,29 @@
+module DFLBenchmarksPlotsExt
+
+using DecisionFocusedLearningBenchmarks
+using DocStringExtensions: TYPEDSIGNATURES
+using LaTeXStrings: @L_str
+using Plots
+import DecisionFocusedLearningBenchmarks:
+    has_visualization, plot_instance, plot_solution, plot_trajectory, animate_trajectory
+
+include("plots/argmax2d_plots.jl")
+include("plots/warcraft_plots.jl")
+include("plots/svs_plots.jl")
+include("plots/dvs_plots.jl")
+
+"""
+    plot_solution(bench::AbstractBenchmark, sample::DataSample, y; kwargs...)
+
+Reconstruct a new sample with `y` overridden and delegate to the 2-arg
+[`plot_solution`](@ref). Only available when `Plots` is loaded.
+"""
+function plot_solution(bench::AbstractBenchmark, sample::DataSample, y; kwargs...)
+    return plot_solution(
+        bench,
+        DataSample(; sample.context..., x=sample.x, θ=sample.θ, y=y, extra=sample.extra);
+        kwargs...,
+    )
+end
+
+end
diff --git a/ext/plots/argmax2d_plots.jl b/ext/plots/argmax2d_plots.jl
@@ -0,0 +1,66 @@
+function _init_plot(title=""; kwargs...)
+    pl = Plots.plot(;
+        aspect_ratio=:equal,
+        legend=:outerleft,
+        xlim=(-1.1, 1.1),
+        ylim=(-1.1, 1.1),
+        title=title,
+        kwargs...,
+    )
+    return pl
+end
+
+function _plot_polytope!(pl, vertices)
+    return Plots.plot!(
+        pl,
+        vcat(map(first, vertices), first(vertices[1])),
+        vcat(map(last, vertices), last(vertices[1]));
+        fillrange=0,
+        fillcolor=:gray,
+        fillalpha=0.2,
+        linecolor=:black,
+        label=L"\mathrm{conv}(\mathcal{Y}(x))",
+    )
+end
+
+function _plot_objective!(pl, θ)
+    Plots.plot!(
+        pl, [0.0, θ[1]], [0.0, θ[2]]; color="#9558B2", arrow=true, lw=2, label=nothing
+    )
+    Plots.annotate!(pl, [-0.2 * θ[1]], [-0.2 * θ[2]], [L"\theta"])
+    return pl
+end
+
+function _plot_y!(pl, y)
+    return Plots.scatter!(
+        pl,
+        [y[1]],
+        [y[2]];
+        color="#CB3C33",
+        markersize=9,
+        markershape=:square,
+        label=L"f(\theta)",
+    )
+end
+
+has_visualization(::Argmax2DBenchmark) = true
+
+function plot_instance(::Argmax2DBenchmark, sample::DataSample; kwargs...)
+    pl = _init_plot(; kwargs...)
+    _plot_polytope!(pl, sample.instance)
+    return pl
+end
+
+function plot_solution(::Argmax2DBenchmark, sample::DataSample; kwargs...)
+    pl = _init_plot(; kwargs...)
+    _plot_polytope!(pl, sample.instance)
+    _plot_objective!(pl, sample.θ)
+    return _plot_y!(pl, sample.y)
+end
+
+function plot_solution(::Argmax2DBenchmark, sample::DataSample, y; θ=sample.θ, kwargs...)
+    pl = _init_plot(; kwargs...)
+    _plot_polytope!(pl, sample.instance)
+    _plot_objective!(pl, θ)
+    return _plot_y!(pl, y)
+end