|
| 1 | +/* |
| 2 | +This script determines the order quality for eligible columns of all columnstore indexes in the current database. |
| 3 | +It can be used for columnstore indexes built with or without the ORDER clause to find explicitly declared or implict order. |
| 4 | +
|
| 5 | +For more information about order in columnstore indexes, see |
| 6 | +https://learn.microsoft.com/sql/relational-databases/indexes/ordered-columnstore-indexes |
| 7 | +
|
| 8 | +The script works in SQL Server 2022 and later versions, Azure SQL Database, and Azure SQL Managed Instance. |
| 9 | +
|
| 10 | +The order quality for a column is defined as the average of the order quality of its segments. |
| 11 | +The order quality for a segment is defined by the following formula: |
| 12 | +
|
| 13 | +order_quality_percent = (1 - segment_overlap_count / (total_segment_count - 1)) * 100 |
| 14 | +
|
| 15 | +When a segment doesn't overlap with *any other* segment in a partition, its order quality is 100 percent. |
| 16 | +When a segment overlaps with *every other* segment in a partition, its order quality is 0 percent. |
| 17 | +
|
| 18 | +The segment metadata required to determine order quality is exposed only for some data types and some encodings. |
| 19 | +The script excludes the columns where metadata isn't available. |
| 20 | +Even though order quality cannot be determined for ineligible columns using this script, |
| 21 | +segment elimination for these columns can still be improved with higher order quality. |
| 22 | +*/ |
| 23 | + |
| 24 | +DROP TABLE IF EXISTS #column_segment; |
| 25 | + |
| 26 | +CREATE TABLE #column_segment |
| 27 | +( |
| 28 | +partition_id bigint NOT NULL, |
| 29 | +object_id int NOT NULL, |
| 30 | +index_id int NOT NULL, |
| 31 | +partition_number int NOT NULL, |
| 32 | +column_id int NOT NULL, |
| 33 | +type_name sysname NOT NULL, |
| 34 | +segment_id int NOT NULL, |
| 35 | +row_count bigint NOT NULL, |
| 36 | +on_disk_size bigint NOT NULL, |
| 37 | +min_data_value varbinary(18) NOT NULL, |
| 38 | +max_data_value varbinary(18) NOT NULL, |
| 39 | +count_starts bigint NOT NULL, |
| 40 | +count_ends bigint NOT NULL, |
| 41 | +max_overlaps bigint NOT NULL, |
| 42 | +PRIMARY KEY (partition_id, column_id, segment_id) WITH (DATA_COMPRESSION = ROW), |
| 43 | +INDEX ix_starts (partition_id, column_id, min_data_value, count_starts) WITH (DATA_COMPRESSION = ROW), |
| 44 | +INDEX ix_ends (partition_id, column_id, max_data_value, count_ends) WITH (DATA_COMPRESSION = ROW) |
| 45 | +); |
| 46 | + |
| 47 | +/* |
| 48 | +Persist an indexed subset of sys.column_store_segments for eligible segments, i.e. |
| 49 | +the segments using the types and encodings where trustworthy min/max data values are available in sys.column_store_segments. |
| 50 | +*/ |
| 51 | +INSERT INTO #column_segment |
| 52 | +( |
| 53 | +partition_id, |
| 54 | +object_id, |
| 55 | +index_id, |
| 56 | +partition_number, |
| 57 | +column_id, |
| 58 | +type_name, |
| 59 | +segment_id, |
| 60 | +row_count, |
| 61 | +on_disk_size, |
| 62 | +min_data_value, |
| 63 | +max_data_value, |
| 64 | +count_starts, |
| 65 | +count_ends, |
| 66 | +max_overlaps |
| 67 | +) |
| 68 | +SELECT cs.partition_id, |
| 69 | + p.object_id, |
| 70 | + p.index_id, |
| 71 | + p.partition_number, |
| 72 | + cs.column_id, |
| 73 | + t.name AS type_name, |
| 74 | + cs.segment_id, |
| 75 | + CAST(cs.row_count AS bigint) AS row_count, |
| 76 | + cs.on_disk_size, |
| 77 | + mm.min_data_value, |
| 78 | + mm.max_data_value, |
| 79 | + COUNT(1) OVER ( |
| 80 | + PARTITION BY cs.partition_id, cs.column_id |
| 81 | + ORDER BY mm.min_data_value |
| 82 | + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| 83 | + ) |
| 84 | + AS count_starts, /* The cumulative number of segment starts before the start of the current segment */ |
| 85 | + COUNT(1) OVER ( |
| 86 | + PARTITION BY cs.partition_id, cs.column_id |
| 87 | + ORDER BY mm.max_data_value |
| 88 | + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
| 89 | + ) |
| 90 | + AS count_ends, /* The cumulative number of segment ends before the end of the current segment */ |
| 91 | + COUNT(1) OVER ( |
| 92 | + PARTITION BY cs.partition_id, cs.column_id |
| 93 | + ) - 1 |
| 94 | + AS max_overlaps /* The maximum number of overlaps is the number of segments minus one */ |
| 95 | +FROM sys.column_store_segments AS cs |
| 96 | +INNER JOIN sys.partitions AS p |
| 97 | +ON cs.partition_id = p.partition_id |
| 98 | +INNER JOIN sys.columns AS c |
| 99 | +ON p.object_id = c.object_id |
| 100 | + AND |
| 101 | + cs.column_id = c.column_id |
| 102 | +INNER JOIN sys.types AS t |
| 103 | +on c.user_type_id = t.user_type_id |
| 104 | +CROSS APPLY ( |
| 105 | + SELECT CASE |
| 106 | + WHEN t.name IN ('bit','tinyint','smallint','bigint','money') |
| 107 | + AND |
| 108 | + cs.encoding_type IN (1,2) AND cs.min_data_id <= cs.max_data_id |
| 109 | + THEN 'simple' /* |
| 110 | + min_data_id and max_data_id columns have the actual min/max data values for the segment. |
| 111 | + */ |
| 112 | + WHEN t.name IN ('binary','varbinary','char','nchar','varchar','nvarchar') |
| 113 | + AND |
| 114 | + cs.encoding_type IN (3,5) AND cs.min_deep_data <= cs.max_deep_data |
| 115 | + THEN 'deep' /* |
| 116 | + min_deep_data and max_deep_data columns are populated using a binary representation |
| 117 | + of the min/max data values for the segment, and the values are comparable. |
| 118 | + */ |
| 119 | + ELSE 'unsupported' |
| 120 | + END AS segment_type |
| 121 | + ) AS st |
| 122 | +CROSS APPLY ( |
| 123 | + SELECT CASE |
| 124 | + WHEN st.segment_type = 'simple' THEN CAST(cs.min_data_id AS varbinary(18)) |
| 125 | + WHEN st.segment_type = 'deep' THEN cs.min_deep_data |
| 126 | + END |
| 127 | + AS min_data_value, |
| 128 | + CASE |
| 129 | + WHEN st.segment_type = 'simple' THEN CAST(cs.max_data_id AS varbinary(18)) |
| 130 | + WHEN st.segment_type = 'deep' THEN cs.max_deep_data |
| 131 | + END |
| 132 | + AS max_data_value |
| 133 | + ) AS mm |
| 134 | +WHERE cs.partition_id IS NOT NULL AND cs.column_id IS NOT NULL AND cs.segment_id IS NOT NULL AND cs.row_count IS NOT NULL |
| 135 | + AND |
| 136 | + st.segment_type IN ('simple','deep'); |
| 137 | + |
| 138 | +/* |
| 139 | +Return the result set. |
| 140 | +Each row represents a column in a columnstore index. |
| 141 | +*/ |
| 142 | +SELECT OBJECT_SCHEMA_NAME(cs.object_id) AS schema_name, |
| 143 | + OBJECT_NAME(cs.object_id) AS object_name, |
| 144 | + i.name AS index_name, |
| 145 | + COL_NAME(cs.object_id, cs.column_id) AS column_name, |
| 146 | + cs.type_name, |
| 147 | + cs.column_id, |
| 148 | + cs.partition_number, |
| 149 | + ic.column_store_order_ordinal, |
| 150 | + INDEXPROPERTY(cs.object_id, i.name, 'IsClustered') AS is_clustered_column_store, |
| 151 | + SUM(cs.row_count) AS row_count, |
| 152 | + CAST(SUM(cs.on_disk_size) / 1024. / 1024 AS decimal(28,3)) AS on_disk_size_mb, |
| 153 | + COUNT(1) AS eligible_segment_count, |
| 154 | + MIN(o.count_overlaps) AS min_segment_overlaps, |
| 155 | + AVG(o.count_overlaps) AS avg_segment_overlaps, |
| 156 | + MAX(o.count_overlaps) AS max_segment_overlaps, |
| 157 | + (1 - AVG(olr.overlap_ratio)) * 100 AS order_quality_percent |
| 158 | +FROM #column_segment AS cs |
| 159 | +INNER JOIN sys.indexes AS i |
| 160 | +ON cs.object_id = i.object_id |
| 161 | + AND |
| 162 | + cs.index_id = i.index_id |
| 163 | +INNER JOIN sys.index_columns AS ic |
| 164 | +ON cs.object_id = ic.object_id |
| 165 | + AND |
| 166 | + cs.column_id = ic.column_id |
| 167 | +OUTER APPLY ( |
| 168 | + SELECT TOP (1) count_starts |
| 169 | + FROM #column_segment AS s |
| 170 | + WHERE s.partition_id = cs.partition_id |
| 171 | + AND |
| 172 | + s.column_id = cs.column_id |
| 173 | + AND |
| 174 | + s.min_data_value < cs.max_data_value |
| 175 | + ORDER BY s.min_data_value DESC |
| 176 | + ) AS s /* The max cumulative number of segment starts before the end of the current segment */ |
| 177 | +OUTER APPLY ( |
| 178 | + SELECT TOP (1) count_ends |
| 179 | + FROM #column_segment AS e |
| 180 | + WHERE e.partition_id = cs.partition_id |
| 181 | + AND |
| 182 | + e.column_id = cs.column_id |
| 183 | + AND |
| 184 | + e.max_data_value <= cs.min_data_value |
| 185 | + ORDER BY e.max_data_value DESC |
| 186 | + ) AS e /* The max cumulative number of segment ends after the start of the current segment */ |
| 187 | +CROSS APPLY ( |
| 188 | + /* |
| 189 | + For non-overlapping segments, the number of starts is the same as the number of ends. |
| 190 | + For overlapping segments, the difference is the number of overlaps. |
| 191 | + Subtract one to omit the current segment. |
| 192 | + */ |
| 193 | + SELECT ISNULL(s.count_starts, 0) - ISNULL(e.count_ends, 0) - 1 AS diff |
| 194 | + ) AS d |
| 195 | +CROSS APPLY ( |
| 196 | + /* |
| 197 | + A negative difference occurs when the end of the previous segment is the same as |
| 198 | + the start of the next segment. In the context of columnstore, this is not an overlap. |
| 199 | + */ |
| 200 | + SELECT IIF(d.diff >= 0, d.diff, 0) AS count_overlaps |
| 201 | + ) AS o |
| 202 | +CROSS APPLY ( |
| 203 | + SELECT CAST(o.count_overlaps AS float) / NULLIF(cs.max_overlaps, 0) AS overlap_ratio |
| 204 | + ) AS olr |
| 205 | +GROUP BY cs.object_id, i.name, cs.type_name, cs.column_id, cs.partition_number, ic.column_store_order_ordinal |
| 206 | +ORDER BY schema_name, object_name, index_name, column_id, column_store_order_ordinal; |
0 commit comments