-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathSubsetSelection.jl
More file actions
90 lines (74 loc) · 2.19 KB
/
SubsetSelection.jl
File metadata and controls
90 lines (74 loc) · 2.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
module SubsetSelection
using ..Utils
using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
using Flux: Chain, Dense
using LinearAlgebra: dot
using Random
"""
$TYPEDEF
Benchmark problem for the subset selection problem.
Reference: <https://arxiv.org/abs/2307.13565>.
The goal is to select the best `k` items from a set of `n` items,
without knowing their values, but only observing some features.
# Fields
$TYPEDFIELDS
"""
struct SubsetSelectionBenchmark{M} <: AbstractStaticBenchmark
"total number of items"
n::Int
"number of items to select"
k::Int
"hidden unknown mapping from features to costs"
mapping::M
end
function Base.show(io::IO, bench::SubsetSelectionBenchmark)
(; n, k) = bench
return print(io, "SubsetSelectionBenchmark(n=$n, k=$k)")
end
Utils.objective_value(::SubsetSelectionBenchmark, sample::DataSample, y) = dot(sample.θ, y)
Utils.is_minimization_problem(::SubsetSelectionBenchmark) = false
function SubsetSelectionBenchmark(; n::Int=25, k::Int=5, identity_mapping::Bool=true)
@assert n >= k "number of items n must be greater than k"
mapping = if identity_mapping
copy
else
Dense(n => n; bias=false)
end
return SubsetSelectionBenchmark(n, k, mapping)
end
function top_k(v::AbstractVector, k::Int)
indices = sortperm(v; rev=true)[1:k]
res = falses(length(v))
res[indices] .= true
return res
end
"""
$TYPEDSIGNATURES
Return a top k maximizer.
"""
function Utils.generate_maximizer(bench::SubsetSelectionBenchmark)
(; k) = bench
return Base.Fix2(top_k, k)
end
"""
$TYPEDSIGNATURES
Generate a labeled instance for the subset selection problem.
"""
function Utils.generate_sample(bench::SubsetSelectionBenchmark, rng::AbstractRNG)
(; n, k, mapping) = bench
features = randn(rng, Float32, n)
θ_true = mapping(features)
y_true = top_k(θ_true, k)
return DataSample(; x=features, θ=θ_true, y=y_true)
end
"""
$TYPEDSIGNATURES
Initialize a linear model for `bench` using `Flux`.
"""
function Utils.generate_statistical_model(bench::SubsetSelectionBenchmark; seed=nothing)
Random.seed!(seed)
(; n) = bench
return Dense(n => n; bias=false)
end
export SubsetSelectionBenchmark
end