DecisionFocusedLearningBenchmarks.jl/src/SubsetSelection/SubsetSelection.jl at cf031b3165921a04f35b14e3d9516b2289d7d2a6 · JuliaDecisionFocusedLearning/DecisionFocusedLearningBenchmarks.jl · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
module SubsetSelection

using ..Utils
using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
using Flux: Chain, Dense
using LinearAlgebra: dot
using Random

"""
$TYPEDEF

Benchmark problem for the subset selection problem.
Reference: <https://arxiv.org/abs/2307.13565>.

The goal is to select the best `k` items from a set of `n` items,
without knowing their values, but only observing some features.

# Fields
$TYPEDFIELDS
"""
struct SubsetSelectionBenchmark{M} <: AbstractStaticBenchmark
    "total number of items"
    n::Int
    "number of items to select"
    k::Int
    "hidden unknown mapping from features to costs"
    mapping::M
end

function Base.show(io::IO, bench::SubsetSelectionBenchmark)
    (; n, k) = bench
    return print(io, "SubsetSelectionBenchmark(n=$n, k=$k)")
end

Utils.objective_value(::SubsetSelectionBenchmark, sample::DataSample, y) = dot(sample.θ, y)
Utils.is_minimization_problem(::SubsetSelectionBenchmark) = false

function SubsetSelectionBenchmark(; n::Int=25, k::Int=5, identity_mapping::Bool=true)
    @assert n >= k "number of items n must be greater than k"
    mapping = if identity_mapping
        copy
    else
        Dense(n => n; bias=false)
    end
    return SubsetSelectionBenchmark(n, k, mapping)
end

function top_k(v::AbstractVector, k::Int)
    indices = sortperm(v; rev=true)[1:k]
    res = falses(length(v))
    res[indices] .= true
    return res
end

"""
$TYPEDSIGNATURES

Return a top k maximizer.
"""
function Utils.generate_maximizer(bench::SubsetSelectionBenchmark)
    (; k) = bench
    return Base.Fix2(top_k, k)
end

"""
$TYPEDSIGNATURES

Generate a labeled instance for the subset selection problem.
"""
function Utils.generate_sample(bench::SubsetSelectionBenchmark, rng::AbstractRNG)
    (; n, k, mapping) = bench
    features = randn(rng, Float32, n)
    θ_true = mapping(features)
    y_true = top_k(θ_true, k)
    return DataSample(; x=features, θ=θ_true, y=y_true)
end

"""
$TYPEDSIGNATURES

Initialize a linear model for `bench` using `Flux`.
"""
function Utils.generate_statistical_model(bench::SubsetSelectionBenchmark; seed=nothing)
    Random.seed!(seed)
    (; n) = bench
    return Dense(n => n; bias=false)
end

export SubsetSelectionBenchmark
end