Skip to content

Commit a050c67

Browse files
Enhance Library Sort: key/reverse, gap rebalancing, docs
1 parent 788d95b commit a050c67

1 file changed

Lines changed: 194 additions & 0 deletions

File tree

sorts/library_sort.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
"""
2+
Library Sort (Gapped Insertion) — Enhanced and Documented
3+
4+
Concept:
5+
- Library Sort is a gapped version of insertion sort that keeps extra empty
6+
slots between placed elements, so most insertions do not require shifting
7+
a long suffix of the array. The algorithm occasionally "rebalances" the
8+
layout to redistribute elements and restore evenly spaced gaps. This yields
9+
an expected average complexity of O(n log n) under typical (non-adversarial)
10+
input distributions.
11+
12+
API:
13+
- library_sort(data, key=None, reverse=False, epsilon=1.0) -> List
14+
- data: any iterable of items
15+
- key: optional key extractor like built-in sorted()/list.sort()
16+
- reverse: set True for descending order
17+
- epsilon: extra-space factor controlling how many gaps are left in the
18+
backing array (larger epsilon = more gaps = fewer collisions,
19+
more memory)
20+
21+
Complexity (informal):
22+
- Average (with high probability): O(n log n) thanks to binary search for the
23+
logical order plus amortized O(1) gap insertions.
24+
- Worst case: O(n^2) if inputs are adversarial and repeatedly force dense
25+
regions, even after rebalancing.
26+
- Space: O(n * (1 + epsilon)) for the gapped array.
27+
28+
Notes for learners:
29+
- We maintain two parallel arrays:
30+
- A_keys: holds keys in a sparse (gapped) physical layout.
31+
- A_vals: holds the original values at the same indices as A_keys.
32+
- We also maintain a 'pos' list that records the indices of the FILLED
33+
positions in sorted, logical order. This lets us:
34+
(1) binary-search the correct logical position for a new key
35+
(2) ask for a "desired physical slot" roughly midway between neighbors
36+
so we’re likely to find (or make) a gap near that logical position.
37+
- If there is no gap at the desired slot, we "rebalance" by redistributing
38+
the existing items farther apart, restoring even gaps before retrying.
39+
40+
This annotated implementation focuses on clarity and pedagogy rather than
41+
micro-optimizations, so readers can trace each step of the algorithm.
42+
"""
43+
44+
from typing import Callable, Iterable, List, Optional, Tuple
45+
import bisect
46+
47+
48+
def library_sort(
49+
data: Iterable,
50+
key: Optional[Callable] = None,
51+
reverse: bool = False,
52+
epsilon: float = 1.0,
53+
) -> List:
54+
"""
55+
Sort 'data' using Library Sort (gapped insertion) and return a new list.
56+
57+
Parameters
58+
----------
59+
data : Iterable
60+
Items to sort.
61+
key : Callable | None
62+
Optional key extractor (like built-in sorted()).
63+
reverse : bool
64+
If True, return results in descending order.
65+
epsilon : float
66+
Extra-space factor controlling gap density; larger values create
67+
more gaps, which reduces collisions but uses more memory.
68+
69+
Returns
70+
-------
71+
List
72+
A new list containing the sorted items.
73+
74+
Teaching tip:
75+
- Think of 'pos' as the logical, in-order view (where elements "should"
76+
be if there were no gaps), and 'A_keys/A_vals' as the physical shelves
77+
that include empty spots to make insertions cheap.
78+
"""
79+
# Materialize input and handle trivial sizes.
80+
items = list(data)
81+
n = len(items)
82+
if n < 2:
83+
return items.copy()
84+
85+
# Normalize to a key/value representation so we can sort arbitrary objects.
86+
key_fn = key if key is not None else (lambda x: x)
87+
keyed: List[Tuple] = [(key_fn(x), x) for x in items]
88+
89+
# Capacity with slack: leave about 'epsilon' * n empty slots as gaps.
90+
# The +1 ensures at least one spare slot even for small inputs.
91+
cap = max(3, int((1.0 + epsilon) * n) + 1)
92+
93+
# Sparse physical storage for keys/values; None marks a gap (empty slot).
94+
A_keys: List[Optional[Tuple]] = [None] * cap
95+
A_vals: List[Optional[object]] = [None] * cap
96+
97+
# 'pos' tracks indices of FILLED slots in sorted order of keys.
98+
# This lets us binary-search by logical rank, independent of gaps.
99+
pos: List[int] = []
100+
101+
# Seed the structure by placing the first element near the middle so
102+
# we can grow to both sides without immediate rebalancing.
103+
mid = cap // 2
104+
A_keys[mid] = keyed[0][0]
105+
A_vals[mid] = keyed[0][1]
106+
pos.append(mid)
107+
108+
def rebalance(target_count: int) -> None:
109+
"""
110+
Redistribute elements with fresh gaps.
111+
112+
Given we currently have 'target_count' filled items, rebuild 'pos'
113+
into a new array of size ≈ (1 + epsilon) * target_count, spacing
114+
items roughly evenly so subsequent insertions are likely to find
115+
nearby gaps.
116+
"""
117+
nonlocal A_keys, A_vals, pos, cap
118+
119+
# Grow capacity if needed to preserve slack proportional to item count.
120+
cap = max(cap, int((1.0 + epsilon) * target_count) + 3)
121+
122+
# Compute a stride so that (target_count) items are spaced out with gaps.
123+
step = max(1, cap // (target_count + 1))
124+
start = step // 2 # small offset so ends aren't packed
125+
126+
new_keys: List[Optional[Tuple]] = [None] * cap
127+
new_vals: List[Optional[object]] = [None] * cap
128+
new_pos: List[int] = []
129+
130+
# Copy each existing filled slot to its new, spaced-out location.
131+
for i, old_idx in enumerate(pos):
132+
new_index = start + i * step
133+
new_keys[new_index] = A_keys[old_idx]
134+
new_vals[new_index] = A_vals[old_idx]
135+
new_pos.append(new_index)
136+
137+
A_keys, A_vals, pos = new_keys, new_vals, new_pos
138+
139+
def desired_slot(rank: int) -> int:
140+
"""
141+
Given the logical insertion rank (the index where the new key would go
142+
in sorted order), return a physical index that lies between its neighbors.
143+
144+
This heuristic aims for the midpoint between adjacent filled indices
145+
to maximize the chance we land on, or near, a gap.
146+
"""
147+
if rank == 0:
148+
return pos[0] - 1 # just before first filled slot
149+
if rank == len(pos):
150+
return pos[-1] + 1 # just after last filled slot
151+
return (pos[rank - 1] + pos[rank]) // 2 # midpoint between neighbors
152+
153+
# Insert remaining items one by one.
154+
for k, v in keyed[1:]:
155+
# Binary-search the logical order of existing keys using 'pos'.
156+
logical_keys = [A_keys[i] for i in pos]
157+
ins_rank = bisect.bisect_left(logical_keys, k)
158+
159+
tries = 0
160+
while True:
161+
# Ask for a good physical slot near the desired rank.
162+
idx = desired_slot(ins_rank)
163+
164+
# If it's a valid gap, claim it and record the new filled position.
165+
if 0 <= idx < cap and A_keys[idx] is None:
166+
A_keys[idx] = k
167+
A_vals[idx] = v
168+
pos.insert(ins_rank, idx)
169+
break
170+
171+
# Otherwise, things are too dense around there; rebalance to
172+
# re-open gaps and try again.
173+
tries += 1
174+
rebalance(len(pos) + 1)
175+
176+
# Safety valve: if local density remains high after a few passes,
177+
# gradually increase epsilon (more gaps) and rebalance again.
178+
if tries > 3:
179+
epsilon *= 1.25
180+
rebalance(len(pos) + 1)
181+
tries = 0
182+
183+
# Stitch the final, in-order values back together using 'pos'.
184+
out = [A_vals[i] for i in pos] # type: ignore
185+
if reverse:
186+
out.reverse()
187+
return out
188+
189+
190+
if __name__ == "__main__":
191+
# Minimal demo to visualize behavior.
192+
data = [34, 7, 23, 32, 5, 62, 14, 19, 45, 38]
193+
print("Before:", data)
194+
print("After: ", library_sort(data))

0 commit comments

Comments
 (0)