JuliaDecisionFocusedLearning · BatyLeo · Jan 12, 2026 · Dec 30, 2025 · Jan 8, 2026 · Jan 8, 2026
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -61,6 +61,7 @@ Single-stage optimization problems under uncertainty:
 Multi-stage sequential decision-making problems:
 - [`DynamicVehicleSchedulingBenchmark`](@ref): multi-stage vehicle scheduling under customer uncertainty
 - [`DynamicAssortmentBenchmark`](@ref): sequential product assortment selection with endogenous uncertainty
+- [`MaintenanceBenchmark`](@ref): maintenance problem with resource constraint
 
 ## Getting Started
 

diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
@@ -57,6 +57,7 @@ include("PortfolioOptimization/PortfolioOptimization.jl")
 include("StochasticVehicleScheduling/StochasticVehicleScheduling.jl")
 include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl")
 include("DynamicAssortment/DynamicAssortment.jl")
+include("Maintenance/Maintenance.jl")
 
 using .Utils
 
@@ -89,6 +90,7 @@ using .PortfolioOptimization
 using .StochasticVehicleScheduling
 using .DynamicVehicleScheduling
 using .DynamicAssortment
+using .Maintenance
 
 export Argmax2DBenchmark
 export ArgmaxBenchmark
@@ -100,5 +102,6 @@ export RankingBenchmark
 export StochasticVehicleSchedulingBenchmark
 export SubsetSelectionBenchmark
 export WarcraftBenchmark
+export MaintenanceBenchmark
 
 end # module DecisionFocusedLearningBenchmarks
diff --git a/src/DynamicAssortment/environment.jl b/src/DynamicAssortment/environment.jl
@@ -197,16 +197,21 @@ Features observed by the agent at current step, as a concatenation of:
 - change in hype and saturation features from the starting state
 - normalized current step (divided by max steps and multiplied by 10)
 All features are normalized by dividing by 10.
+
+State
 """
 function Utils.observe(env::Environment)
     delta_features = env.features[2:3, :] .- env.instance.starting_hype_and_saturation
-    return vcat(
+    features = vcat(
         env.features,
         env.d_features,
         delta_features,
         ones(1, item_count(env)) .* (env.step / max_steps(env) * 10),
-    ) ./ 10,
-    nothing
+    ) ./ 10
+
+    state = (env.features, env.purchase_history)
+
+    return features, state
 end
 
 """

diff --git a/src/Maintenance/Maintenance.jl b/src/Maintenance/Maintenance.jl
@@ -0,0 +1,150 @@
+module Maintenance
+
+using ..Utils
+
+using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES, SIGNATURES
+using Distributions: Uniform, Categorical
+using Flux: Chain, Dense
+using LinearAlgebra: dot
+using Random: Random, AbstractRNG, MersenneTwister
+using Statistics: mean
+
+using Combinatorics: combinations
+
+"""
+$TYPEDEF
+
+Benchmark for a standard maintenance problem with resource constraints.
+Components are identical and degrade idependently over time.
+A high cost is incurred for each component that reaches the final degradation level. 
+A cost is also incurred for maintaining a component. 
+The number of simultaneous maintenance operations is limited by a maintenance capacity constraint.
+
+# Fields
+$TYPEDFIELDS
+
+"""
+struct MaintenanceBenchmark <: AbstractDynamicBenchmark{true}
+    "number of components"
+    N::Int
+    "maximum number of components that can be maintained simultaneously"
+    K::Int
+    "number of degradation states per component"
+    n::Int
+    "degradation probability"
+    p::Float64
+    "failure cost"
+    c_f::Float64
+    "maintenance cost"
+    c_m::Float64
+    "number of steps per episode"
+    max_steps::Int
+end
+
+"""
+    MaintenanceBenchmark(;
+        N=2,
+        K=1,
+        n=3,
+        p=0.2
+        c_f=10.0,
+        c_m=3.0,
+        max_steps=10,
+    )
+
+Constructor for [`MaintenanceBenchmark`](@ref).
+By default, the benchmark has 2 components, maintenance capacity 1, number of degradation levels 3, 
+degradation probability 0.2, failure cost 10.0, maintenance cost 3.0, 10 steps per episode, and is exogenous.
+"""
+
+function MaintenanceBenchmark(;
+    N=2,
+    K=1,
+    n=3,
+    p=0.2,
+    c_f=10.0,
+    c_m=3.0,
+    max_steps=80,
+)
+    return MaintenanceBenchmark(
+        N, K, n, p, c_f, c_m, max_steps
+    )
+end
+
+# Accessor functions
+component_count(b::MaintenanceBenchmark) = b.N
+maintenance_capacity(b::MaintenanceBenchmark) = b.K
+degradation_levels(b::MaintenanceBenchmark) = b.n
+degradation_probability(b::MaintenanceBenchmark) = b.p
+failure_cost(b::MaintenanceBenchmark) = b.c_f
+maintenance_cost(b::MaintenanceBenchmark) = b.c_m
+max_steps(b::MaintenanceBenchmark) = b.max_steps
+
+include("instance.jl")
+include("environment.jl")
+include("policies.jl")
+include("maximizer.jl")
+
+"""
+$TYPEDSIGNATURES
+
+Outputs a data sample containing an [`Instance`](@ref).
+"""
+function Utils.generate_sample(
+    b::MaintenanceBenchmark, rng::AbstractRNG=MersenneTwister(0)
+)
+    return DataSample(; instance=Instance(b, rng))
+end
+
+"""
+$TYPEDSIGNATURES
+
+Generates a statistical model for the maintenance benchmark.
+The model is a small neural network with one hidden layer no activation function.
+"""
+function Utils.generate_statistical_model(b::MaintenanceBenchmark; seed=nothing)
+    Random.seed!(seed)
+    N = component_count(b)
+    return Chain(Dense(N  => N), Dense(N => N), vec)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Outputs a top k maximizer, with k being the maintenance capacity of the benchmark.
+"""
+function Utils.generate_maximizer(b::MaintenanceBenchmark)
+    return TopKPositiveMaximizer(maintenance_capacity(b))
+end
+
+"""
+$TYPEDSIGNATURES
+
+Creates an [`Environment`](@ref) from an [`Instance`](@ref) of the maintenance benchmark.
+The seed of the environment is randomly generated using the provided random number generator.
+"""
+function Utils.generate_environment(
+    ::MaintenanceBenchmark, instance::Instance, rng::AbstractRNG; kwargs...
+)
+    seed = rand(rng, 1:typemax(Int))
+    return Environment(instance; seed)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Returns two policies for the dynamic assortment benchmark:
+- `Greedy`: maintains components when they are in the last state before failure, up to the maintenance capacity
+"""
+function Utils.generate_policies(::MaintenanceBenchmark)
+    greedy = Policy(
+        "Greedy",
+        "policy that maintains components when they are in the last state before failure, up to the maintenance capacity",
+        greedy_policy,
+    )
+    return (greedy,)
+end
+
+export MaintenanceBenchmark
+
+end
diff --git a/src/Maintenance/environment.jl b/src/Maintenance/environment.jl
@@ -0,0 +1,167 @@
+"""
+$TYPEDEF
+
+Environment for the maintenance problem.
+
+# Fields
+$TYPEDFIELDS
+"""
+@kwdef mutable struct Environment{I<:Instance,R<:AbstractRNG,S<:Union{Nothing,Int}} <:
+                      Utils.AbstractEnvironment
+    "associated instance"
+    instance::I
+    "current step"
+    step::Int
+    "degradation state"
+    degradation_state::Vector{Int}
+    "rng"
+    rng::R
+    "seed for RNG"
+    seed::S
+end
+
+"""
+$TYPEDSIGNATURES
+
+Creates an [`Environment`](@ref) from an [`Instance`](@ref) of the maintenance benchmark.
+"""
+function Environment(instance::Instance; seed=0, rng::AbstractRNG=MersenneTwister(seed))
+    degradation_state = starting_state(instance)
+    env = Environment(;
+        instance,
+        step=1,
+        degradation_state,
+        rng=rng,
+        seed=seed,
+    )
+    Utils.reset!(env; reset_rng=true)
+    return env
+end
+
+component_count(env::Environment) = component_count(env.instance) 
+maintenance_capacity(env::Environment) = maintenance_capacity(env.instance)
+degradation_levels(env::Environment) = degradation_levels(env.instance)
+degradation_probability(env::Environment) = degradation_probability(env.instance)
+failure_cost(env::Environment) = failure_cost(env.instance)
+maintenance_cost(env::Environment) = maintenance_cost(env.instance)
+max_steps(env::Environment) = max_steps(env.instance) 
+starting_state(env::Environment) = starting_state(env.instance)
+
+
+"""
+$TYPEDSIGNATURES
+Draw random degradations for all components.
+"""
+
+function degrad!(env::Environment)
+    N = component_count(env) 
+    n = degradation_levels(env)
+    p = degradation_probability(env)
+
+    for i in 1:N
+        if env.degradation_state[i] < n && rand() < p
+            env.degradation_state[i] += 1
+        end
+    end
+
+    return env.degradation_state
+end
+
+"""
+$TYPEDSIGNATURES
+Maintain components.
+"""
+
+function maintain!(env::Environment, maintenance::BitVector)
+    N = component_count(env) 
+
+    for i in 1:N
+        if maintenance[i]
+            env.degradation_state[i] = 1
+        end
+    end
+
+    return env.degradation_state
+end
+
+"""
+$TYPEDSIGNATURES
+
+Compute maintenance cost.
+"""
+function maintenance_cost(env::Environment, maintenance::BitVector)
+    return maintenance_cost(env) * sum(maintenance)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Compute degradation cost.
+"""
+function degradation_cost(env::Environment)
+    N = component_count(env) 
+    n = degradation_levels(env)
+    return failure_cost(env) * count(==(n), env.degradation_state)
+end
+
+
+"""
+$TYPEDSIGNATURES
+
+Outputs the seed of the environment.
+"""
+Utils.get_seed(env::Environment) = env.seed
+
+"""
+$TYPEDSIGNATURES
+
+Resets the environment to the initial state:
+- reset the rng if `reset_rng` is true
+- reset the step to 1
+- reset the degradation state to the starting state
+"""
+function Utils.reset!(env::Environment; reset_rng=false, seed=env.seed)
+    reset_rng && Random.seed!(env.rng, seed)
+    env.step = 1
+    env.degradation_state .= starting_state(env)
+    return nothing
+end
+
+"""
+$TYPEDSIGNATURES
+
+Checks if the environment has reached the maximum number of steps.
+"""
+function Utils.is_terminated(env::Environment)
+    return env.step > max_steps(env)
+end
+
+"""
+$TYPEDSIGNATURES
+
+Returns features, state tuple.
+The features observed by the agent at current step are the degradation states of all components.
+It is also the internal state, so we return the same thing twice.
+
+"""
+function Utils.observe(env::Environment)
+    state = env.degradation_state
+    return state, state
+end
+
+"""
+$TYPEDSIGNATURES
+
+Performs one step in the environment given a maintenance.
+Draw random degradations for components that are not maintained.
+"""
+function Utils.step!(env::Environment, maintenance::BitVector)
+    @assert !Utils.is_terminated(env) "Environment is terminated, cannot act!"
+    cost = maintenance_cost(env, maintenance) + degradation_cost(env)
+    degrad!(env)
+    maintain!(env, maintenance)
+    env.step += 1
+    return cost
+end
+
+