@@ -85673,8 +85673,9 @@ void blobSpotFree(BlobSpot *pBlobSpot);
8567385673
8567485674/*
8567585675 * Accessor for node binary format
85676- * - v1 format is the following:
85677- * [u64 nRowid] [u16 nEdges] [node vector] [edge vector] * nEdges [trash vector] * (nMaxEdges - nEdges) ([u64 legacyField] [u64 edgeId]) * nEdges
85676+ * - default format is the following:
85677+ * [u64 nRowid] [u16 nEdges] [6 byte padding] [node vector] [edge vector] * nEdges [trash vector] * (nMaxEdges - nEdges) ([u32 unused] [f32 distance] [u64 edgeId]) * nEdges
85678+ * Note, that 6 byte padding after nEdges required to align [node vector] by word boundary and avoid unaligned reads
8567885679 * Note, that node vector and edge vector can have different representations (and edge vector can be smaller in size than node vector)
8567985680*/
8568085681int nodeEdgesMaxCount(const DiskAnnIndex *pIndex);
@@ -85713,9 +85714,11 @@ typedef u8 MetricType;
8571385714/*
8571485715 * 1 - v1 version; node block format: [node meta] [node vector] [edge vectors] ... [ [u64 unused ] [u64 edge rowid] ] ...
8571585716 * 2 - v2 version; node block format: [node meta] [node vector] [edge vectors] ... [ [u32 unused] [f32 distance] [u64 edge rowid] ] ...
85717+ * 3 - v3 version; node meta aligned to 8-byte boundary (instead of having u64 + u16 size - we round it up to u64 + u64)
8571685718*/
8571785719#define VECTOR_FORMAT_V1 1
85718- #define VECTOR_FORMAT_DEFAULT 2
85720+ #define VECTOR_FORMAT_V2 2
85721+ #define VECTOR_FORMAT_DEFAULT 3
8571985722
8572085723/* type of the vector index */
8572185724#define VECTOR_INDEX_TYPE_PARAM_ID 2
@@ -212727,8 +212730,6 @@ SQLITE_PRIVATE void sqlite3RegisterVectorFunctions(void){
212727212730*/
212728212731#define DISKANN_BLOCK_SIZE_SHIFT 9
212729212732
212730- #define VECTOR_NODE_METADATA_SIZE (sizeof(u64) + sizeof(u16))
212731- #define VECTOR_EDGE_METADATA_SIZE (sizeof(u64) + sizeof(u64))
212732212733
212733212734typedef struct VectorPair VectorPair;
212734212735typedef struct DiskAnnSearchCtx DiskAnnSearchCtx;
@@ -212951,46 +212952,58 @@ void blobSpotFree(BlobSpot *pBlobSpot) {
212951212952** Layout specific utilities
212952212953**************************************************************************/
212953212954
212954- int nodeEdgeOverhead(int nEdgeVectorSize){
212955- return nEdgeVectorSize + VECTOR_EDGE_METADATA_SIZE;
212955+ int nodeMetadataSize(int nFormatVersion){
212956+ if( nFormatVersion <= VECTOR_FORMAT_V2 ){
212957+ return (sizeof(u64) + sizeof(u16));
212958+ }else{
212959+ return (sizeof(u64) + sizeof(u64));
212960+ }
212961+ }
212962+
212963+ int edgeMetadataSize(int nFormatVersion){
212964+ return (sizeof(u64) + sizeof(u64));
212965+ }
212966+
212967+ int nodeEdgeOverhead(int nFormatVersion, int nEdgeVectorSize){
212968+ return nEdgeVectorSize + edgeMetadataSize(nFormatVersion);
212956212969}
212957212970
212958- int nodeOverhead(int nNodeVectorSize){
212959- return nNodeVectorSize + VECTOR_NODE_METADATA_SIZE ;
212971+ int nodeOverhead(int nFormatVersion, int nNodeVectorSize){
212972+ return nNodeVectorSize + nodeMetadataSize(nFormatVersion) ;
212960212973}
212961212974
212962212975int nodeEdgesMaxCount(const DiskAnnIndex *pIndex){
212963- unsigned int nMaxEdges = (pIndex->nBlockSize - nodeOverhead(pIndex->nNodeVectorSize)) / nodeEdgeOverhead(pIndex->nEdgeVectorSize);
212976+ unsigned int nMaxEdges = (pIndex->nBlockSize - nodeOverhead(pIndex->nFormatVersion, pIndex-> nNodeVectorSize)) / nodeEdgeOverhead(pIndex->nFormatVersion, pIndex->nEdgeVectorSize);
212964212977 assert( nMaxEdges > 0);
212965212978 return nMaxEdges;
212966212979}
212967212980
212968212981int nodeEdgesMetadataOffset(const DiskAnnIndex *pIndex){
212969212982 unsigned int offset;
212970212983 unsigned int nMaxEdges = nodeEdgesMaxCount(pIndex);
212971- offset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + nMaxEdges * pIndex->nEdgeVectorSize;
212984+ offset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + nMaxEdges * pIndex->nEdgeVectorSize;
212972212985 assert( offset <= pIndex->nBlockSize );
212973212986 return offset;
212974212987}
212975212988
212976212989void nodeBinInit(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, u64 nRowid, Vector *pVector){
212977- assert( VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
212990+ assert( nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
212978212991
212979212992 memset(pBlobSpot->pBuffer, 0, pBlobSpot->nBufferSize);
212980212993 writeLE64(pBlobSpot->pBuffer, nRowid);
212981212994 // neighbours count already zero after memset - no need to set it explicitly
212982212995
212983- vectorSerializeToBlob(pVector, pBlobSpot->pBuffer + VECTOR_NODE_METADATA_SIZE , pIndex->nNodeVectorSize);
212996+ vectorSerializeToBlob(pVector, pBlobSpot->pBuffer + nodeMetadataSize(pIndex->nFormatVersion) , pIndex->nNodeVectorSize);
212984212997}
212985212998
212986212999void nodeBinVector(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, Vector *pVector) {
212987- assert( VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
213000+ assert( nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
212988213001
212989- vectorInitStatic(pVector, pIndex->nNodeVectorType, pIndex->nVectorDims, pBlobSpot->pBuffer + VECTOR_NODE_METADATA_SIZE );
213002+ vectorInitStatic(pVector, pIndex->nNodeVectorType, pIndex->nVectorDims, pBlobSpot->pBuffer + nodeMetadataSize(pIndex->nFormatVersion) );
212990213003}
212991213004
212992213005u16 nodeBinEdges(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot) {
212993- assert( VECTOR_NODE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213006+ assert( nodeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
212994213007
212995213008 return readLE16(pBlobSpot->pBuffer + sizeof(u64));
212996213009}
@@ -213000,20 +213013,20 @@ void nodeBinEdge(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, int iEdg
213000213013 int offset = nodeEdgesMetadataOffset(pIndex);
213001213014
213002213015 if( pRowid != NULL ){
213003- assert( offset + (iEdge + 1) * VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213004- *pRowid = readLE64(pBlobSpot->pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof(u64));
213016+ assert( offset + (iEdge + 1) * edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
213017+ *pRowid = readLE64(pBlobSpot->pBuffer + offset + iEdge * edgeMetadataSize(pIndex->nFormatVersion) + sizeof(u64));
213005213018 }
213006213019 if( pIndex->nFormatVersion != VECTOR_FORMAT_V1 && pDistance != NULL ){
213007- distance = readLE32(pBlobSpot->pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof(u32));
213020+ distance = readLE32(pBlobSpot->pBuffer + offset + iEdge * edgeMetadataSize(pIndex->nFormatVersion) + sizeof(u32));
213008213021 *pDistance = *((float*)&distance);
213009213022 }
213010213023 if( pVector != NULL ){
213011- assert( VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize < offset );
213024+ assert( nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize < offset );
213012213025 vectorInitStatic(
213013213026 pVector,
213014213027 pIndex->nEdgeVectorType,
213015213028 pIndex->nVectorDims,
213016- pBlobSpot->pBuffer + VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize
213029+ pBlobSpot->pBuffer + nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize
213017213030 );
213018213031 }
213019213032}
@@ -213050,11 +213063,11 @@ void nodeBinReplaceEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iRe
213050213063 nEdges++;
213051213064 }
213052213065
213053- edgeVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iReplace * pIndex->nEdgeVectorSize;
213054- edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iReplace * VECTOR_EDGE_METADATA_SIZE ;
213066+ edgeVectorOffset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iReplace * pIndex->nEdgeVectorSize;
213067+ edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iReplace * edgeMetadataSize(pIndex->nFormatVersion) ;
213055213068
213056213069 assert( edgeVectorOffset + pIndex->nEdgeVectorSize <= pBlobSpot->nBufferSize );
213057- assert( edgeMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213070+ assert( edgeMetaOffset + edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
213058213071
213059213072 vectorSerializeToBlob(pVector, pBlobSpot->pBuffer + edgeVectorOffset, pIndex->nEdgeVectorSize);
213060213073 writeLE32(pBlobSpot->pBuffer + edgeMetaOffset + sizeof(u32), *((u32*)&distance));
@@ -213070,19 +213083,19 @@ void nodeBinDeleteEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iDel
213070213083
213071213084 assert( 0 <= iDelete && iDelete < nEdges );
213072213085
213073- edgeVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iDelete * pIndex->nEdgeVectorSize;
213074- lastVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + (nEdges - 1) * pIndex->nEdgeVectorSize;
213075- edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iDelete * VECTOR_EDGE_METADATA_SIZE ;
213076- lastMetaOffset = nodeEdgesMetadataOffset(pIndex) + (nEdges - 1) * VECTOR_EDGE_METADATA_SIZE ;
213086+ edgeVectorOffset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iDelete * pIndex->nEdgeVectorSize;
213087+ lastVectorOffset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + (nEdges - 1) * pIndex->nEdgeVectorSize;
213088+ edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iDelete * edgeMetadataSize(pIndex->nFormatVersion) ;
213089+ lastMetaOffset = nodeEdgesMetadataOffset(pIndex) + (nEdges - 1) * edgeMetadataSize(pIndex->nFormatVersion) ;
213077213090
213078213091 assert( edgeVectorOffset + pIndex->nEdgeVectorSize <= pBlobSpot->nBufferSize );
213079213092 assert( lastVectorOffset + pIndex->nEdgeVectorSize <= pBlobSpot->nBufferSize );
213080- assert( edgeMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213081- assert( lastMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213093+ assert( edgeMetaOffset + edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
213094+ assert( lastMetaOffset + edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
213082213095
213083213096 if( edgeVectorOffset < lastVectorOffset ){
213084213097 memmove(pBlobSpot->pBuffer + edgeVectorOffset, pBlobSpot->pBuffer + lastVectorOffset, pIndex->nEdgeVectorSize);
213085- memmove(pBlobSpot->pBuffer + edgeMetaOffset, pBlobSpot->pBuffer + lastMetaOffset, VECTOR_EDGE_METADATA_SIZE );
213098+ memmove(pBlobSpot->pBuffer + edgeMetaOffset, pBlobSpot->pBuffer + lastMetaOffset, edgeMetadataSize(pIndex->nFormatVersion) );
213086213099 }
213087213100
213088213101 writeLE16(pBlobSpot->pBuffer + sizeof(u64), nEdges - 1);
@@ -213168,9 +213181,9 @@ int diskAnnCreateIndex(
213168213181 if( maxNeighborsParam == 0 ){
213169213182 // 3 D**(1/2) gives good recall values (90%+)
213170213183 // we also want to keep disk overhead at moderate level - 50x of the disk size increase is the current upper bound
213171- maxNeighborsParam = MIN(3 * ((int)(sqrt(dims)) + 1), (50 * nodeOverhead(vectorDataSize(type, dims))) / nodeEdgeOverhead(vectorDataSize(neighbours, dims)) + 1);
213184+ maxNeighborsParam = MIN(3 * ((int)(sqrt(dims)) + 1), (50 * nodeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(type, dims))) / nodeEdgeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(neighbours, dims)) + 1);
213172213185 }
213173- blockSizeBytes = nodeOverhead(vectorDataSize(type, dims)) + maxNeighborsParam * (u64)nodeEdgeOverhead(vectorDataSize(neighbours, dims));
213186+ blockSizeBytes = nodeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(type, dims)) + maxNeighborsParam * (u64)nodeEdgeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(neighbours, dims));
213174213187 if( blockSizeBytes > DISKANN_MAX_BLOCK_SZ ){
213175213188 return SQLITE_ERROR;
213176213189 }
0 commit comments