@@ -85673,8 +85673,8 @@ void blobSpotFree(BlobSpot *pBlobSpot);
8567385673
8567485674/*
8567585675 * Accessor for node binary format
85676- * - v1 format is the following:
85677- * [u64 nRowid] [u16 nEdges] [node vector] [edge vector] * nEdges [trash vector] * (nMaxEdges - nEdges) ([u64 legacyField ] [u64 edgeId]) * nEdges
85676+ * - default format is the following:
85677+ * [u64 nRowid] [u16 nEdges] [2 byte padding] [ node vector] [edge vector] * nEdges [trash vector] * (nMaxEdges - nEdges) ([u32 unused] [f32 distance ] [u64 edgeId]) * nEdges
8567885678 * Note, that node vector and edge vector can have different representations (and edge vector can be smaller in size than node vector)
8567985679*/
8568085680int nodeEdgesMaxCount(const DiskAnnIndex *pIndex);
@@ -85713,9 +85713,11 @@ typedef u8 MetricType;
8571385713/*
8571485714 * 1 - v1 version; node block format: [node meta] [node vector] [edge vectors] ... [ [u64 unused ] [u64 edge rowid] ] ...
8571585715 * 2 - v2 version; node block format: [node meta] [node vector] [edge vectors] ... [ [u32 unused] [f32 distance] [u64 edge rowid] ] ...
85716+ * 3 - v3 version; node meta aligned to 8-byte boundary (instead of having u64 + u16 size - we round it up to u64 + u64)
8571685717*/
8571785718#define VECTOR_FORMAT_V1 1
85718- #define VECTOR_FORMAT_DEFAULT 2
85719+ #define VECTOR_FORMAT_V2 2
85720+ #define VECTOR_FORMAT_DEFAULT 3
8571985721
8572085722/* type of the vector index */
8572185723#define VECTOR_INDEX_TYPE_PARAM_ID 2
@@ -212727,8 +212729,6 @@ SQLITE_PRIVATE void sqlite3RegisterVectorFunctions(void){
212727212729*/
212728212730#define DISKANN_BLOCK_SIZE_SHIFT 9
212729212731
212730- #define VECTOR_NODE_METADATA_SIZE (sizeof(u64) + sizeof(u16))
212731- #define VECTOR_EDGE_METADATA_SIZE (sizeof(u64) + sizeof(u64))
212732212732
212733212733typedef struct VectorPair VectorPair;
212734212734typedef struct DiskAnnSearchCtx DiskAnnSearchCtx;
@@ -212951,46 +212951,58 @@ void blobSpotFree(BlobSpot *pBlobSpot) {
212951212951** Layout specific utilities
212952212952**************************************************************************/
212953212953
212954- int nodeEdgeOverhead(int nEdgeVectorSize){
212955- return nEdgeVectorSize + VECTOR_EDGE_METADATA_SIZE;
212954+ int nodeMetadataSize(int nFormatVersion){
212955+ if( nFormatVersion <= VECTOR_FORMAT_V2 ){
212956+ return (sizeof(u64) + sizeof(u16));
212957+ }else{
212958+ return (sizeof(u64) + sizeof(u64));
212959+ }
212960+ }
212961+
212962+ int edgeMetadataSize(int nFormatVersion){
212963+ return (sizeof(u64) + sizeof(u64));
212964+ }
212965+
212966+ int nodeEdgeOverhead(int nFormatVersion, int nEdgeVectorSize){
212967+ return nEdgeVectorSize + edgeMetadataSize(nFormatVersion);
212956212968}
212957212969
212958- int nodeOverhead(int nNodeVectorSize){
212959- return nNodeVectorSize + VECTOR_NODE_METADATA_SIZE ;
212970+ int nodeOverhead(int nFormatVersion, int nNodeVectorSize){
212971+ return nNodeVectorSize + nodeMetadataSize(nFormatVersion) ;
212960212972}
212961212973
212962212974int nodeEdgesMaxCount(const DiskAnnIndex *pIndex){
212963- unsigned int nMaxEdges = (pIndex->nBlockSize - nodeOverhead(pIndex->nNodeVectorSize)) / nodeEdgeOverhead(pIndex->nEdgeVectorSize);
212975+ unsigned int nMaxEdges = (pIndex->nBlockSize - nodeOverhead(pIndex->nFormatVersion, pIndex-> nNodeVectorSize)) / nodeEdgeOverhead(pIndex->nFormatVersion, pIndex->nEdgeVectorSize);
212964212976 assert( nMaxEdges > 0);
212965212977 return nMaxEdges;
212966212978}
212967212979
212968212980int nodeEdgesMetadataOffset(const DiskAnnIndex *pIndex){
212969212981 unsigned int offset;
212970212982 unsigned int nMaxEdges = nodeEdgesMaxCount(pIndex);
212971- offset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + nMaxEdges * pIndex->nEdgeVectorSize;
212983+ offset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + nMaxEdges * pIndex->nEdgeVectorSize;
212972212984 assert( offset <= pIndex->nBlockSize );
212973212985 return offset;
212974212986}
212975212987
212976212988void nodeBinInit(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, u64 nRowid, Vector *pVector){
212977- assert( VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
212989+ assert( nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
212978212990
212979212991 memset(pBlobSpot->pBuffer, 0, pBlobSpot->nBufferSize);
212980212992 writeLE64(pBlobSpot->pBuffer, nRowid);
212981212993 // neighbours count already zero after memset - no need to set it explicitly
212982212994
212983- vectorSerializeToBlob(pVector, pBlobSpot->pBuffer + VECTOR_NODE_METADATA_SIZE , pIndex->nNodeVectorSize);
212995+ vectorSerializeToBlob(pVector, pBlobSpot->pBuffer + nodeMetadataSize(pIndex->nFormatVersion) , pIndex->nNodeVectorSize);
212984212996}
212985212997
212986212998void nodeBinVector(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, Vector *pVector) {
212987- assert( VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
212999+ assert( nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize <= pBlobSpot->nBufferSize );
212988213000
212989- vectorInitStatic(pVector, pIndex->nNodeVectorType, pIndex->nVectorDims, pBlobSpot->pBuffer + VECTOR_NODE_METADATA_SIZE );
213001+ vectorInitStatic(pVector, pIndex->nNodeVectorType, pIndex->nVectorDims, pBlobSpot->pBuffer + nodeMetadataSize(pIndex->nFormatVersion) );
212990213002}
212991213003
212992213004u16 nodeBinEdges(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot) {
212993- assert( VECTOR_NODE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213005+ assert( nodeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
212994213006
212995213007 return readLE16(pBlobSpot->pBuffer + sizeof(u64));
212996213008}
@@ -213000,20 +213012,20 @@ void nodeBinEdge(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, int iEdg
213000213012 int offset = nodeEdgesMetadataOffset(pIndex);
213001213013
213002213014 if( pRowid != NULL ){
213003- assert( offset + (iEdge + 1) * VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213004- *pRowid = readLE64(pBlobSpot->pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof(u64));
213015+ assert( offset + (iEdge + 1) * edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
213016+ *pRowid = readLE64(pBlobSpot->pBuffer + offset + iEdge * edgeMetadataSize(pIndex->nFormatVersion) + sizeof(u64));
213005213017 }
213006213018 if( pIndex->nFormatVersion != VECTOR_FORMAT_V1 && pDistance != NULL ){
213007- distance = readLE32(pBlobSpot->pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof(u32));
213019+ distance = readLE32(pBlobSpot->pBuffer + offset + iEdge * edgeMetadataSize(pIndex->nFormatVersion) + sizeof(u32));
213008213020 *pDistance = *((float*)&distance);
213009213021 }
213010213022 if( pVector != NULL ){
213011- assert( VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize < offset );
213023+ assert( nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize < offset );
213012213024 vectorInitStatic(
213013213025 pVector,
213014213026 pIndex->nEdgeVectorType,
213015213027 pIndex->nVectorDims,
213016- pBlobSpot->pBuffer + VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize
213028+ pBlobSpot->pBuffer + nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize
213017213029 );
213018213030 }
213019213031}
@@ -213050,11 +213062,11 @@ void nodeBinReplaceEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iRe
213050213062 nEdges++;
213051213063 }
213052213064
213053- edgeVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iReplace * pIndex->nEdgeVectorSize;
213054- edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iReplace * VECTOR_EDGE_METADATA_SIZE ;
213065+ edgeVectorOffset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iReplace * pIndex->nEdgeVectorSize;
213066+ edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iReplace * edgeMetadataSize(pIndex->nFormatVersion) ;
213055213067
213056213068 assert( edgeVectorOffset + pIndex->nEdgeVectorSize <= pBlobSpot->nBufferSize );
213057- assert( edgeMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213069+ assert( edgeMetaOffset + edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
213058213070
213059213071 vectorSerializeToBlob(pVector, pBlobSpot->pBuffer + edgeVectorOffset, pIndex->nEdgeVectorSize);
213060213072 writeLE32(pBlobSpot->pBuffer + edgeMetaOffset + sizeof(u32), *((u32*)&distance));
@@ -213070,19 +213082,19 @@ void nodeBinDeleteEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iDel
213070213082
213071213083 assert( 0 <= iDelete && iDelete < nEdges );
213072213084
213073- edgeVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iDelete * pIndex->nEdgeVectorSize;
213074- lastVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + (nEdges - 1) * pIndex->nEdgeVectorSize;
213075- edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iDelete * VECTOR_EDGE_METADATA_SIZE ;
213076- lastMetaOffset = nodeEdgesMetadataOffset(pIndex) + (nEdges - 1) * VECTOR_EDGE_METADATA_SIZE ;
213085+ edgeVectorOffset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + iDelete * pIndex->nEdgeVectorSize;
213086+ lastVectorOffset = nodeMetadataSize(pIndex->nFormatVersion) + pIndex->nNodeVectorSize + (nEdges - 1) * pIndex->nEdgeVectorSize;
213087+ edgeMetaOffset = nodeEdgesMetadataOffset(pIndex) + iDelete * edgeMetadataSize(pIndex->nFormatVersion) ;
213088+ lastMetaOffset = nodeEdgesMetadataOffset(pIndex) + (nEdges - 1) * edgeMetadataSize(pIndex->nFormatVersion) ;
213077213089
213078213090 assert( edgeVectorOffset + pIndex->nEdgeVectorSize <= pBlobSpot->nBufferSize );
213079213091 assert( lastVectorOffset + pIndex->nEdgeVectorSize <= pBlobSpot->nBufferSize );
213080- assert( edgeMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213081- assert( lastMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
213092+ assert( edgeMetaOffset + edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
213093+ assert( lastMetaOffset + edgeMetadataSize(pIndex->nFormatVersion) <= pBlobSpot->nBufferSize );
213082213094
213083213095 if( edgeVectorOffset < lastVectorOffset ){
213084213096 memmove(pBlobSpot->pBuffer + edgeVectorOffset, pBlobSpot->pBuffer + lastVectorOffset, pIndex->nEdgeVectorSize);
213085- memmove(pBlobSpot->pBuffer + edgeMetaOffset, pBlobSpot->pBuffer + lastMetaOffset, VECTOR_EDGE_METADATA_SIZE );
213097+ memmove(pBlobSpot->pBuffer + edgeMetaOffset, pBlobSpot->pBuffer + lastMetaOffset, edgeMetadataSize(pIndex->nFormatVersion) );
213086213098 }
213087213099
213088213100 writeLE16(pBlobSpot->pBuffer + sizeof(u64), nEdges - 1);
@@ -213168,9 +213180,9 @@ int diskAnnCreateIndex(
213168213180 if( maxNeighborsParam == 0 ){
213169213181 // 3 D**(1/2) gives good recall values (90%+)
213170213182 // we also want to keep disk overhead at moderate level - 50x of the disk size increase is the current upper bound
213171- maxNeighborsParam = MIN(3 * ((int)(sqrt(dims)) + 1), (50 * nodeOverhead(vectorDataSize(type, dims))) / nodeEdgeOverhead(vectorDataSize(neighbours, dims)) + 1);
213183+ maxNeighborsParam = MIN(3 * ((int)(sqrt(dims)) + 1), (50 * nodeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(type, dims))) / nodeEdgeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(neighbours, dims)) + 1);
213172213184 }
213173- blockSizeBytes = nodeOverhead(vectorDataSize(type, dims)) + maxNeighborsParam * (u64)nodeEdgeOverhead(vectorDataSize(neighbours, dims));
213185+ blockSizeBytes = nodeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(type, dims)) + maxNeighborsParam * (u64)nodeEdgeOverhead(VECTOR_FORMAT_DEFAULT, vectorDataSize(neighbours, dims));
213174213186 if( blockSizeBytes > DISKANN_MAX_BLOCK_SZ ){
213175213187 return SQLITE_ERROR;
213176213188 }
0 commit comments