|
| 1 | +""" |
| 2 | +Library Sort (Gapped Insertion) — Enhanced and Documented |
| 3 | +
|
| 4 | +Concept: |
| 5 | +- Library Sort is a gapped version of insertion sort that keeps extra empty |
| 6 | + slots between placed elements, so most insertions do not require shifting |
| 7 | + a long suffix of the array. The algorithm occasionally "rebalances" the |
| 8 | + layout to redistribute elements and restore evenly spaced gaps. This yields |
| 9 | + an expected average complexity of O(n log n) under typical (non-adversarial) |
| 10 | + input distributions. |
| 11 | +
|
| 12 | +API: |
| 13 | +- library_sort(data, key=None, reverse=False, epsilon=1.0) -> List |
| 14 | + - data: any iterable of items |
| 15 | + - key: optional key extractor like built-in sorted()/list.sort() |
| 16 | + - reverse: set True for descending order |
| 17 | + - epsilon: extra-space factor controlling how many gaps are left in the |
| 18 | + backing array (larger epsilon = more gaps = fewer collisions, |
| 19 | + more memory) |
| 20 | +
|
| 21 | +Complexity (informal): |
| 22 | +- Average (with high probability): O(n log n) thanks to binary search for the |
| 23 | + logical order plus amortized O(1) gap insertions. |
| 24 | +- Worst case: O(n^2) if inputs are adversarial and repeatedly force dense |
| 25 | + regions, even after rebalancing. |
| 26 | +- Space: O(n * (1 + epsilon)) for the gapped array. |
| 27 | +
|
| 28 | +Notes for learners: |
| 29 | +- We maintain two parallel arrays: |
| 30 | + - A_keys: holds keys in a sparse (gapped) physical layout. |
| 31 | + - A_vals: holds the original values at the same indices as A_keys. |
| 32 | +- We also maintain a 'pos' list that records the indices of the FILLED |
| 33 | + positions in sorted, logical order. This lets us: |
| 34 | + (1) binary-search the correct logical position for a new key |
| 35 | + (2) ask for a "desired physical slot" roughly midway between neighbors |
| 36 | + so we’re likely to find (or make) a gap near that logical position. |
| 37 | +- If there is no gap at the desired slot, we "rebalance" by redistributing |
| 38 | + the existing items farther apart, restoring even gaps before retrying. |
| 39 | +
|
| 40 | +This annotated implementation focuses on clarity and pedagogy rather than |
| 41 | +micro-optimizations, so readers can trace each step of the algorithm. |
| 42 | +""" |
| 43 | + |
| 44 | +from typing import Callable, Iterable, List, Optional, Tuple |
| 45 | +import bisect |
| 46 | + |
| 47 | + |
| 48 | +def library_sort( |
| 49 | + data: Iterable, |
| 50 | + key: Optional[Callable] = None, |
| 51 | + reverse: bool = False, |
| 52 | + epsilon: float = 1.0, |
| 53 | +) -> List: |
| 54 | + """ |
| 55 | + Sort 'data' using Library Sort (gapped insertion) and return a new list. |
| 56 | +
|
| 57 | + Parameters |
| 58 | + ---------- |
| 59 | + data : Iterable |
| 60 | + Items to sort. |
| 61 | + key : Callable | None |
| 62 | + Optional key extractor (like built-in sorted()). |
| 63 | + reverse : bool |
| 64 | + If True, return results in descending order. |
| 65 | + epsilon : float |
| 66 | + Extra-space factor controlling gap density; larger values create |
| 67 | + more gaps, which reduces collisions but uses more memory. |
| 68 | +
|
| 69 | + Returns |
| 70 | + ------- |
| 71 | + List |
| 72 | + A new list containing the sorted items. |
| 73 | +
|
| 74 | + Teaching tip: |
| 75 | + - Think of 'pos' as the logical, in-order view (where elements "should" |
| 76 | + be if there were no gaps), and 'A_keys/A_vals' as the physical shelves |
| 77 | + that include empty spots to make insertions cheap. |
| 78 | + """ |
| 79 | + # Materialize input and handle trivial sizes. |
| 80 | + items = list(data) |
| 81 | + n = len(items) |
| 82 | + if n < 2: |
| 83 | + return items.copy() |
| 84 | + |
| 85 | + # Normalize to a key/value representation so we can sort arbitrary objects. |
| 86 | + key_fn = key if key is not None else (lambda x: x) |
| 87 | + keyed: List[Tuple] = [(key_fn(x), x) for x in items] |
| 88 | + |
| 89 | + # Capacity with slack: leave about 'epsilon' * n empty slots as gaps. |
| 90 | + # The +1 ensures at least one spare slot even for small inputs. |
| 91 | + cap = max(3, int((1.0 + epsilon) * n) + 1) |
| 92 | + |
| 93 | + # Sparse physical storage for keys/values; None marks a gap (empty slot). |
| 94 | + A_keys: List[Optional[Tuple]] = [None] * cap |
| 95 | + A_vals: List[Optional[object]] = [None] * cap |
| 96 | + |
| 97 | + # 'pos' tracks indices of FILLED slots in sorted order of keys. |
| 98 | + # This lets us binary-search by logical rank, independent of gaps. |
| 99 | + pos: List[int] = [] |
| 100 | + |
| 101 | + # Seed the structure by placing the first element near the middle so |
| 102 | + # we can grow to both sides without immediate rebalancing. |
| 103 | + mid = cap // 2 |
| 104 | + A_keys[mid] = keyed[0][0] |
| 105 | + A_vals[mid] = keyed[0][1] |
| 106 | + pos.append(mid) |
| 107 | + |
| 108 | + def rebalance(target_count: int) -> None: |
| 109 | + """ |
| 110 | + Redistribute elements with fresh gaps. |
| 111 | +
|
| 112 | + Given we currently have 'target_count' filled items, rebuild 'pos' |
| 113 | + into a new array of size ≈ (1 + epsilon) * target_count, spacing |
| 114 | + items roughly evenly so subsequent insertions are likely to find |
| 115 | + nearby gaps. |
| 116 | + """ |
| 117 | + nonlocal A_keys, A_vals, pos, cap |
| 118 | + |
| 119 | + # Grow capacity if needed to preserve slack proportional to item count. |
| 120 | + cap = max(cap, int((1.0 + epsilon) * target_count) + 3) |
| 121 | + |
| 122 | + # Compute a stride so that (target_count) items are spaced out with gaps. |
| 123 | + step = max(1, cap // (target_count + 1)) |
| 124 | + start = step // 2 # small offset so ends aren't packed |
| 125 | + |
| 126 | + new_keys: List[Optional[Tuple]] = [None] * cap |
| 127 | + new_vals: List[Optional[object]] = [None] * cap |
| 128 | + new_pos: List[int] = [] |
| 129 | + |
| 130 | + # Copy each existing filled slot to its new, spaced-out location. |
| 131 | + for i, old_idx in enumerate(pos): |
| 132 | + new_index = start + i * step |
| 133 | + new_keys[new_index] = A_keys[old_idx] |
| 134 | + new_vals[new_index] = A_vals[old_idx] |
| 135 | + new_pos.append(new_index) |
| 136 | + |
| 137 | + A_keys, A_vals, pos = new_keys, new_vals, new_pos |
| 138 | + |
| 139 | + def desired_slot(rank: int) -> int: |
| 140 | + """ |
| 141 | + Given the logical insertion rank (the index where the new key would go |
| 142 | + in sorted order), return a physical index that lies between its neighbors. |
| 143 | +
|
| 144 | + This heuristic aims for the midpoint between adjacent filled indices |
| 145 | + to maximize the chance we land on, or near, a gap. |
| 146 | + """ |
| 147 | + if rank == 0: |
| 148 | + return pos[0] - 1 # just before first filled slot |
| 149 | + if rank == len(pos): |
| 150 | + return pos[-1] + 1 # just after last filled slot |
| 151 | + return (pos[rank - 1] + pos[rank]) // 2 # midpoint between neighbors |
| 152 | + |
| 153 | + # Insert remaining items one by one. |
| 154 | + for k, v in keyed[1:]: |
| 155 | + # Binary-search the logical order of existing keys using 'pos'. |
| 156 | + logical_keys = [A_keys[i] for i in pos] |
| 157 | + ins_rank = bisect.bisect_left(logical_keys, k) |
| 158 | + |
| 159 | + tries = 0 |
| 160 | + while True: |
| 161 | + # Ask for a good physical slot near the desired rank. |
| 162 | + idx = desired_slot(ins_rank) |
| 163 | + |
| 164 | + # If it's a valid gap, claim it and record the new filled position. |
| 165 | + if 0 <= idx < cap and A_keys[idx] is None: |
| 166 | + A_keys[idx] = k |
| 167 | + A_vals[idx] = v |
| 168 | + pos.insert(ins_rank, idx) |
| 169 | + break |
| 170 | + |
| 171 | + # Otherwise, things are too dense around there; rebalance to |
| 172 | + # re-open gaps and try again. |
| 173 | + tries += 1 |
| 174 | + rebalance(len(pos) + 1) |
| 175 | + |
| 176 | + # Safety valve: if local density remains high after a few passes, |
| 177 | + # gradually increase epsilon (more gaps) and rebalance again. |
| 178 | + if tries > 3: |
| 179 | + epsilon *= 1.25 |
| 180 | + rebalance(len(pos) + 1) |
| 181 | + tries = 0 |
| 182 | + |
| 183 | + # Stitch the final, in-order values back together using 'pos'. |
| 184 | + out = [A_vals[i] for i in pos] # type: ignore |
| 185 | + if reverse: |
| 186 | + out.reverse() |
| 187 | + return out |
| 188 | + |
| 189 | + |
| 190 | +if __name__ == "__main__": |
| 191 | + # Minimal demo to visualize behavior. |
| 192 | + data = [34, 7, 23, 32, 5, 62, 14, 19, 45, 38] |
| 193 | + print("Before:", data) |
| 194 | + print("After: ", library_sort(data)) |
0 commit comments