@@ -129,6 +129,10 @@ static inline u16 readLE16(const unsigned char *p){
129129 return (u16 )p [0 ] | (u16 )p [1 ] << 8 ;
130130}
131131
132+ static inline u32 readLE32 (const unsigned char * p ){
133+ return (u32 )p [0 ] | (u32 )p [1 ] << 8 | (u32 )p [2 ] << 16 | (u32 )p [3 ] << 24 ;
134+ }
135+
132136static inline u64 readLE64 (const unsigned char * p ){
133137 return (u64 )p [0 ]
134138 | (u64 )p [1 ] << 8
@@ -145,6 +149,13 @@ static inline void writeLE16(unsigned char *p, u16 v){
145149 p [1 ] = v >> 8 ;
146150}
147151
152+ static inline void writeLE32 (unsigned char * p , u32 v ){
153+ p [0 ] = v ;
154+ p [1 ] = v >> 8 ;
155+ p [2 ] = v >> 16 ;
156+ p [3 ] = v >> 24 ;
157+ }
158+
148159static inline void writeLE64 (unsigned char * p , u64 v ){
149160 p [0 ] = v ;
150161 p [1 ] = v >> 8 ;
@@ -333,13 +344,18 @@ u16 nodeBinEdges(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot) {
333344 return readLE16 (pBlobSpot -> pBuffer + sizeof (u64 ));
334345}
335346
336- void nodeBinEdge (const DiskAnnIndex * pIndex , const BlobSpot * pBlobSpot , int iEdge , u64 * pRowid , Vector * pVector ) {
347+ void nodeBinEdge (const DiskAnnIndex * pIndex , const BlobSpot * pBlobSpot , int iEdge , u64 * pRowid , float * pDistance , Vector * pVector ) {
348+ u32 distance ;
337349 int offset = nodeEdgesMetadataOffset (pIndex );
338350
339351 if ( pRowid != NULL ){
340352 assert ( offset + (iEdge + 1 ) * VECTOR_EDGE_METADATA_SIZE <= pBlobSpot -> nBufferSize );
341353 * pRowid = readLE64 (pBlobSpot -> pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof (u64 ));
342354 }
355+ if ( pIndex -> nFormatVersion != VECTOR_FORMAT_V1 && pDistance != NULL ){
356+ distance = readLE32 (pBlobSpot -> pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof (u32 ));
357+ * pDistance = * ((float * )& distance );
358+ }
343359 if ( pVector != NULL ){
344360 assert ( VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize + iEdge * pIndex -> nEdgeVectorSize < offset );
345361 vectorInitStatic (
@@ -356,7 +372,7 @@ int nodeBinEdgeFindIdx(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, u6
356372 // todo: if edges will be sorted by identifiers we can use binary search here (although speed up will be visible only on pretty loaded nodes: >128 edges)
357373 for (i = 0 ; i < nEdges ; i ++ ){
358374 u64 edgeId ;
359- nodeBinEdge (pIndex , pBlobSpot , i , & edgeId , NULL );
375+ nodeBinEdge (pIndex , pBlobSpot , i , & edgeId , NULL , NULL );
360376 if ( edgeId == nRowid ){
361377 return i ;
362378 }
@@ -371,7 +387,7 @@ void nodeBinPruneEdges(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int nPru
371387}
372388
373389// replace edge at position iReplace or add new one if iReplace == nEdges
374- void nodeBinReplaceEdge (const DiskAnnIndex * pIndex , BlobSpot * pBlobSpot , int iReplace , u64 nRowid , Vector * pVector ) {
390+ void nodeBinReplaceEdge (const DiskAnnIndex * pIndex , BlobSpot * pBlobSpot , int iReplace , u64 nRowid , float distance , Vector * pVector ) {
375391 int nMaxEdges = nodeEdgesMaxCount (pIndex );
376392 int nEdges = nodeBinEdges (pIndex , pBlobSpot );
377393 int edgeVectorOffset , edgeMetaOffset , itemsToMove ;
@@ -390,6 +406,7 @@ void nodeBinReplaceEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iRe
390406 assert ( edgeMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot -> nBufferSize );
391407
392408 vectorSerializeToBlob (pVector , pBlobSpot -> pBuffer + edgeVectorOffset , pIndex -> nEdgeVectorSize );
409+ writeLE32 (pBlobSpot -> pBuffer + edgeMetaOffset + sizeof (u32 ), * ((u32 * )& distance ));
393410 writeLE64 (pBlobSpot -> pBuffer + edgeMetaOffset + sizeof (u64 ), nRowid );
394411
395412 writeLE16 (pBlobSpot -> pBuffer + sizeof (u64 ), nEdges );
@@ -424,6 +441,7 @@ void nodeBinDebug(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot) {
424441#if defined(SQLITE_DEBUG ) && defined(SQLITE_VECTOR_TRACE )
425442 int nEdges , nMaxEdges , i ;
426443 u64 nRowid ;
444+ float distance = 0 ;
427445 Vector vector ;
428446
429447 nEdges = nodeBinEdges (pIndex , pBlobSpot );
@@ -434,8 +452,8 @@ void nodeBinDebug(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot) {
434452 DiskAnnTrace ((" nEdges=%d, nMaxEdges=%d, vector=" , nEdges , nMaxEdges ));
435453 vectorDump (& vector );
436454 for (i = 0 ; i < nEdges ; i ++ ){
437- nodeBinEdge (pIndex , pBlobSpot , i , & nRowid , & vector );
438- DiskAnnTrace ((" to=%lld, vector=" , nRowid , nRowid ));
455+ nodeBinEdge (pIndex , pBlobSpot , i , & nRowid , & distance , & vector );
456+ DiskAnnTrace ((" to=%lld, distance=%f, vector=" , nRowid , distance ));
439457 vectorDump (& vector );
440458 }
441459#endif
@@ -1126,7 +1144,8 @@ static int diskAnnReplaceEdgeIdx(
11261144 BlobSpot * pNodeBlob ,
11271145 u64 newRowid ,
11281146 VectorPair * pNewVector ,
1129- VectorPair * pPlaceholder
1147+ VectorPair * pPlaceholder ,
1148+ float * pNodeToNew
11301149) {
11311150 int i , nEdges , nMaxEdges , iReplace = -1 ;
11321151 Vector nodeVector , edgeVector ;
@@ -1139,19 +1158,23 @@ static int diskAnnReplaceEdgeIdx(
11391158
11401159 // we need to evaluate potentially approximate distance here in order to correctly compare it with edge distances
11411160 nodeToNew = diskAnnVectorDistance (pIndex , pPlaceholder -> pEdge , pNewVector -> pEdge );
1161+ * pNodeToNew = nodeToNew ;
11421162
11431163 for (i = nEdges - 1 ; i >= 0 ; i -- ){
11441164 u64 edgeRowid ;
11451165 float edgeToNew , nodeToEdge ;
11461166
1147- nodeBinEdge (pIndex , pNodeBlob , i , & edgeRowid , & edgeVector );
1167+ nodeBinEdge (pIndex , pNodeBlob , i , & edgeRowid , & nodeToEdge , & edgeVector );
11481168 if ( edgeRowid == newRowid ){
11491169 // deletes can leave "zombie" edges in the graph and we must override them and not store duplicate edges in the node
11501170 return i ;
11511171 }
11521172
1173+ if ( pIndex -> nFormatVersion == VECTOR_FORMAT_V1 ){
1174+ nodeToEdge = diskAnnVectorDistance (pIndex , pPlaceholder -> pEdge , & edgeVector );
1175+ }
1176+
11531177 edgeToNew = diskAnnVectorDistance (pIndex , & edgeVector , pNewVector -> pEdge );
1154- nodeToEdge = diskAnnVectorDistance (pIndex , pPlaceholder -> pEdge , & edgeVector );
11551178 if ( nodeToNew > pIndex -> pruningAlpha * edgeToNew ){
11561179 return -1 ;
11571180 }
@@ -1186,21 +1209,24 @@ static void diskAnnPruneEdges(const DiskAnnIndex *pIndex, BlobSpot *pNodeBlob, i
11861209 nodeBinDebug (pIndex , pNodeBlob );
11871210#endif
11881211
1189- nodeBinEdge (pIndex , pNodeBlob , iInserted , & hintRowid , & hintEdgeVector );
1212+ nodeBinEdge (pIndex , pNodeBlob , iInserted , & hintRowid , NULL , & hintEdgeVector );
11901213
11911214 // remove edges which is no longer interesting due to the addition of iInserted
11921215 i = 0 ;
11931216 while ( i < nEdges ){
11941217 Vector edgeVector ;
11951218 float nodeToEdge , hintToEdge ;
11961219 u64 edgeRowid ;
1197- nodeBinEdge (pIndex , pNodeBlob , i , & edgeRowid , & edgeVector );
1220+ nodeBinEdge (pIndex , pNodeBlob , i , & edgeRowid , & nodeToEdge , & edgeVector );
11981221
11991222 if ( hintRowid == edgeRowid ){
12001223 i ++ ;
12011224 continue ;
12021225 }
1203- nodeToEdge = diskAnnVectorDistance (pIndex , pPlaceholder -> pEdge , & edgeVector );
1226+ if ( pIndex -> nFormatVersion == VECTOR_FORMAT_V1 ){
1227+ nodeToEdge = diskAnnVectorDistance (pIndex , pPlaceholder -> pEdge , & edgeVector );
1228+ }
1229+
12041230 hintToEdge = diskAnnVectorDistance (pIndex , & hintEdgeVector , & edgeVector );
12051231 if ( nodeToEdge > pIndex -> pruningAlpha * hintToEdge ){
12061232 nodeBinDeleteEdge (pIndex , pNodeBlob , i );
@@ -1315,7 +1341,7 @@ static int diskAnnSearchInternal(DiskAnnIndex *pIndex, DiskAnnSearchCtx *pCtx, u
13151341 float edgeDistance ;
13161342 int iInsert ;
13171343 DiskAnnNode * pNewCandidate ;
1318- nodeBinEdge (pIndex , pCandidateBlob , i , & edgeRowid , & edgeVector );
1344+ nodeBinEdge (pIndex , pCandidateBlob , i , & edgeRowid , NULL , & edgeVector );
13191345 if ( diskAnnSearchCtxIsVisited (pCtx , edgeRowid ) || diskAnnSearchCtxHasCandidate (pCtx , edgeRowid ) ){
13201346 continue ;
13211347 }
@@ -1512,28 +1538,30 @@ int diskAnnInsert(
15121538 for (pVisited = ctx .visitedList ; pVisited != NULL ; pVisited = pVisited -> pNext ){
15131539 Vector nodeVector ;
15141540 int iReplace ;
1541+ float nodeToNew ;
15151542
15161543 nodeBinVector (pIndex , pVisited -> pBlobSpot , & nodeVector );
15171544 loadVectorPair (& vCandidate , & nodeVector );
15181545
1519- iReplace = diskAnnReplaceEdgeIdx (pIndex , pBlobSpot , pVisited -> nRowid , & vCandidate , & vInsert );
1546+ iReplace = diskAnnReplaceEdgeIdx (pIndex , pBlobSpot , pVisited -> nRowid , & vCandidate , & vInsert , & nodeToNew );
15201547 if ( iReplace == -1 ){
15211548 continue ;
15221549 }
1523- nodeBinReplaceEdge (pIndex , pBlobSpot , iReplace , pVisited -> nRowid , vCandidate .pEdge );
1550+ nodeBinReplaceEdge (pIndex , pBlobSpot , iReplace , pVisited -> nRowid , nodeToNew , vCandidate .pEdge );
15241551 diskAnnPruneEdges (pIndex , pBlobSpot , iReplace , & vInsert );
15251552 }
15261553
15271554 // second pass - add new node as a potential neighbour of all visited nodes
15281555 loadVectorPair (& vInsert , pVectorInRow -> pVector );
15291556 for (pVisited = ctx .visitedList ; pVisited != NULL ; pVisited = pVisited -> pNext ){
15301557 int iReplace ;
1558+ float nodeToNew ;
15311559
1532- iReplace = diskAnnReplaceEdgeIdx (pIndex , pVisited -> pBlobSpot , nNewRowid , & vInsert , & vCandidate );
1560+ iReplace = diskAnnReplaceEdgeIdx (pIndex , pVisited -> pBlobSpot , nNewRowid , & vInsert , & vCandidate , & nodeToNew );
15331561 if ( iReplace == -1 ){
15341562 continue ;
15351563 }
1536- nodeBinReplaceEdge (pIndex , pVisited -> pBlobSpot , iReplace , nNewRowid , vInsert .pEdge );
1564+ nodeBinReplaceEdge (pIndex , pVisited -> pBlobSpot , iReplace , nNewRowid , nodeToNew , vInsert .pEdge );
15371565 diskAnnPruneEdges (pIndex , pVisited -> pBlobSpot , iReplace , & vCandidate );
15381566
15391567 rc = blobSpotFlush (pIndex , pVisited -> pBlobSpot );
@@ -1598,7 +1626,7 @@ int diskAnnDelete(
15981626 nNeighbours = nodeBinEdges (pIndex , pNodeBlob );
15991627 for (i = 0 ; i < nNeighbours ; i ++ ){
16001628 u64 edgeRowid ;
1601- nodeBinEdge (pIndex , pNodeBlob , i , & edgeRowid , NULL );
1629+ nodeBinEdge (pIndex , pNodeBlob , i , & edgeRowid , NULL , NULL );
16021630 rc = blobSpotReload (pIndex , pEdgeBlob , edgeRowid , pIndex -> nBlockSize );
16031631 if ( rc == DISKANN_ROW_NOT_FOUND ){
16041632 continue ;
0 commit comments