Skip to content

Commit 0ec0147

Browse files
committed
extend binary format and store distance to edges in node blocks
1 parent eac5d90 commit 0ec0147

2 files changed

Lines changed: 51 additions & 21 deletions

File tree

libsql-sqlite3/src/vectorIndexInt.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,10 @@ int nodeEdgesMetadataOffset(const DiskAnnIndex *pIndex);
7373
void nodeBinInit(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, u64 nRowid, Vector *pVector);
7474
void nodeBinVector(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, Vector *pVector);
7575
u16 nodeBinEdges(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot);
76-
void nodeBinEdge(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, int iEdge, u64 *pRowid, Vector *pVector);
76+
void nodeBinEdge(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, int iEdge, u64 *pRowid, float *distance, Vector *pVector);
7777
int nodeBinEdgeFindIdx(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, u64 nRowid);
7878
void nodeBinPruneEdges(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int nPruned);
79-
void nodeBinReplaceEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iReplace, u64 nRowid, Vector *pVector);
79+
void nodeBinReplaceEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iReplace, u64 nRowid, float distance, Vector *pVector);
8080
void nodeBinDeleteEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iDelete);
8181
void nodeBinDebug(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot);
8282

@@ -102,9 +102,11 @@ typedef u8 MetricType;
102102
/* format version which can help to upgrade vector on-disk format without breaking older version of the db */
103103
#define VECTOR_FORMAT_PARAM_ID 1
104104
/*
105-
* 1 - initial version
105+
* 1 - v1 version; node block format: [node meta] [node vector] [edge vectors] ... [ [u64 unused ] [u64 edge rowid] ] ...
106+
* 2 - v2 version; node block format: [node meta] [node vector] [edge vectors] ... [ [u32 unused] [f32 distance] [u64 edge rowid] ] ...
106107
*/
107-
#define VECTOR_FORMAT_DEFAULT 1
108+
#define VECTOR_FORMAT_V1 1
109+
#define VECTOR_FORMAT_DEFAULT 2
108110

109111
/* type of the vector index */
110112
#define VECTOR_INDEX_TYPE_PARAM_ID 2

libsql-sqlite3/src/vectordiskann.c

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,10 @@ static inline u16 readLE16(const unsigned char *p){
129129
return (u16)p[0] | (u16)p[1] << 8;
130130
}
131131

132+
static inline u32 readLE32(const unsigned char *p){
133+
return (u32)p[0] | (u32)p[1] << 8 | (u32)p[2] << 16 | (u32)p[3] << 24;
134+
}
135+
132136
static inline u64 readLE64(const unsigned char *p){
133137
return (u64)p[0]
134138
| (u64)p[1] << 8
@@ -145,6 +149,13 @@ static inline void writeLE16(unsigned char *p, u16 v){
145149
p[1] = v >> 8;
146150
}
147151

152+
static inline void writeLE32(unsigned char *p, u32 v){
153+
p[0] = v;
154+
p[1] = v >> 8;
155+
p[2] = v >> 16;
156+
p[3] = v >> 24;
157+
}
158+
148159
static inline void writeLE64(unsigned char *p, u64 v){
149160
p[0] = v;
150161
p[1] = v >> 8;
@@ -333,13 +344,18 @@ u16 nodeBinEdges(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot) {
333344
return readLE16(pBlobSpot->pBuffer + sizeof(u64));
334345
}
335346

336-
void nodeBinEdge(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, int iEdge, u64 *pRowid, Vector *pVector) {
347+
void nodeBinEdge(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, int iEdge, u64 *pRowid, float *pDistance, Vector *pVector) {
348+
u32 distance;
337349
int offset = nodeEdgesMetadataOffset(pIndex);
338350

339351
if( pRowid != NULL ){
340352
assert( offset + (iEdge + 1) * VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
341353
*pRowid = readLE64(pBlobSpot->pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof(u64));
342354
}
355+
if( pIndex->nFormatVersion != VECTOR_FORMAT_V1 && pDistance != NULL ){
356+
distance = readLE32(pBlobSpot->pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof(u32));
357+
*pDistance = *((float*)&distance);
358+
}
343359
if( pVector != NULL ){
344360
assert( VECTOR_NODE_METADATA_SIZE + pIndex->nNodeVectorSize + iEdge * pIndex->nEdgeVectorSize < offset );
345361
vectorInitStatic(
@@ -356,7 +372,7 @@ int nodeBinEdgeFindIdx(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, u6
356372
// todo: if edges will be sorted by identifiers we can use binary search here (although speed up will be visible only on pretty loaded nodes: >128 edges)
357373
for(i = 0; i < nEdges; i++){
358374
u64 edgeId;
359-
nodeBinEdge(pIndex, pBlobSpot, i, &edgeId, NULL);
375+
nodeBinEdge(pIndex, pBlobSpot, i, &edgeId, NULL, NULL);
360376
if( edgeId == nRowid ){
361377
return i;
362378
}
@@ -371,7 +387,7 @@ void nodeBinPruneEdges(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int nPru
371387
}
372388

373389
// replace edge at position iReplace or add new one if iReplace == nEdges
374-
void nodeBinReplaceEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iReplace, u64 nRowid, Vector *pVector) {
390+
void nodeBinReplaceEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iReplace, u64 nRowid, float distance, Vector *pVector) {
375391
int nMaxEdges = nodeEdgesMaxCount(pIndex);
376392
int nEdges = nodeBinEdges(pIndex, pBlobSpot);
377393
int edgeVectorOffset, edgeMetaOffset, itemsToMove;
@@ -390,6 +406,7 @@ void nodeBinReplaceEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iRe
390406
assert( edgeMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot->nBufferSize );
391407

392408
vectorSerializeToBlob(pVector, pBlobSpot->pBuffer + edgeVectorOffset, pIndex->nEdgeVectorSize);
409+
writeLE32(pBlobSpot->pBuffer + edgeMetaOffset + sizeof(u32), *((u32*)&distance));
393410
writeLE64(pBlobSpot->pBuffer + edgeMetaOffset + sizeof(u64), nRowid);
394411

395412
writeLE16(pBlobSpot->pBuffer + sizeof(u64), nEdges);
@@ -424,6 +441,7 @@ void nodeBinDebug(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot) {
424441
#if defined(SQLITE_DEBUG) && defined(SQLITE_VECTOR_TRACE)
425442
int nEdges, nMaxEdges, i;
426443
u64 nRowid;
444+
float distance = 0;
427445
Vector vector;
428446

429447
nEdges = nodeBinEdges(pIndex, pBlobSpot);
@@ -434,8 +452,8 @@ void nodeBinDebug(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot) {
434452
DiskAnnTrace((" nEdges=%d, nMaxEdges=%d, vector=", nEdges, nMaxEdges));
435453
vectorDump(&vector);
436454
for(i = 0; i < nEdges; i++){
437-
nodeBinEdge(pIndex, pBlobSpot, i, &nRowid, &vector);
438-
DiskAnnTrace((" to=%lld, vector=", nRowid, nRowid));
455+
nodeBinEdge(pIndex, pBlobSpot, i, &nRowid, &distance, &vector);
456+
DiskAnnTrace((" to=%lld, distance=%f, vector=", nRowid, distance));
439457
vectorDump(&vector);
440458
}
441459
#endif
@@ -1126,7 +1144,8 @@ static int diskAnnReplaceEdgeIdx(
11261144
BlobSpot *pNodeBlob,
11271145
u64 newRowid,
11281146
VectorPair *pNewVector,
1129-
VectorPair *pPlaceholder
1147+
VectorPair *pPlaceholder,
1148+
float *pNodeToNew
11301149
) {
11311150
int i, nEdges, nMaxEdges, iReplace = -1;
11321151
Vector nodeVector, edgeVector;
@@ -1139,19 +1158,23 @@ static int diskAnnReplaceEdgeIdx(
11391158

11401159
// we need to evaluate potentially approximate distance here in order to correctly compare it with edge distances
11411160
nodeToNew = diskAnnVectorDistance(pIndex, pPlaceholder->pEdge, pNewVector->pEdge);
1161+
*pNodeToNew = nodeToNew;
11421162

11431163
for(i = nEdges - 1; i >= 0; i--){
11441164
u64 edgeRowid;
11451165
float edgeToNew, nodeToEdge;
11461166

1147-
nodeBinEdge(pIndex, pNodeBlob, i, &edgeRowid, &edgeVector);
1167+
nodeBinEdge(pIndex, pNodeBlob, i, &edgeRowid, &nodeToEdge, &edgeVector);
11481168
if( edgeRowid == newRowid ){
11491169
// deletes can leave "zombie" edges in the graph and we must override them and not store duplicate edges in the node
11501170
return i;
11511171
}
11521172

1173+
if( pIndex->nFormatVersion == VECTOR_FORMAT_V1 ){
1174+
nodeToEdge = diskAnnVectorDistance(pIndex, pPlaceholder->pEdge, &edgeVector);
1175+
}
1176+
11531177
edgeToNew = diskAnnVectorDistance(pIndex, &edgeVector, pNewVector->pEdge);
1154-
nodeToEdge = diskAnnVectorDistance(pIndex, pPlaceholder->pEdge, &edgeVector);
11551178
if( nodeToNew > pIndex->pruningAlpha * edgeToNew ){
11561179
return -1;
11571180
}
@@ -1186,21 +1209,24 @@ static void diskAnnPruneEdges(const DiskAnnIndex *pIndex, BlobSpot *pNodeBlob, i
11861209
nodeBinDebug(pIndex, pNodeBlob);
11871210
#endif
11881211

1189-
nodeBinEdge(pIndex, pNodeBlob, iInserted, &hintRowid, &hintEdgeVector);
1212+
nodeBinEdge(pIndex, pNodeBlob, iInserted, &hintRowid, NULL, &hintEdgeVector);
11901213

11911214
// remove edges which is no longer interesting due to the addition of iInserted
11921215
i = 0;
11931216
while( i < nEdges ){
11941217
Vector edgeVector;
11951218
float nodeToEdge, hintToEdge;
11961219
u64 edgeRowid;
1197-
nodeBinEdge(pIndex, pNodeBlob, i, &edgeRowid, &edgeVector);
1220+
nodeBinEdge(pIndex, pNodeBlob, i, &edgeRowid, &nodeToEdge, &edgeVector);
11981221

11991222
if( hintRowid == edgeRowid ){
12001223
i++;
12011224
continue;
12021225
}
1203-
nodeToEdge = diskAnnVectorDistance(pIndex, pPlaceholder->pEdge, &edgeVector);
1226+
if( pIndex->nFormatVersion == VECTOR_FORMAT_V1 ){
1227+
nodeToEdge = diskAnnVectorDistance(pIndex, pPlaceholder->pEdge, &edgeVector);
1228+
}
1229+
12041230
hintToEdge = diskAnnVectorDistance(pIndex, &hintEdgeVector, &edgeVector);
12051231
if( nodeToEdge > pIndex->pruningAlpha * hintToEdge ){
12061232
nodeBinDeleteEdge(pIndex, pNodeBlob, i);
@@ -1315,7 +1341,7 @@ static int diskAnnSearchInternal(DiskAnnIndex *pIndex, DiskAnnSearchCtx *pCtx, u
13151341
float edgeDistance;
13161342
int iInsert;
13171343
DiskAnnNode *pNewCandidate;
1318-
nodeBinEdge(pIndex, pCandidateBlob, i, &edgeRowid, &edgeVector);
1344+
nodeBinEdge(pIndex, pCandidateBlob, i, &edgeRowid, NULL, &edgeVector);
13191345
if( diskAnnSearchCtxIsVisited(pCtx, edgeRowid) || diskAnnSearchCtxHasCandidate(pCtx, edgeRowid) ){
13201346
continue;
13211347
}
@@ -1512,28 +1538,30 @@ int diskAnnInsert(
15121538
for(pVisited = ctx.visitedList; pVisited != NULL; pVisited = pVisited->pNext){
15131539
Vector nodeVector;
15141540
int iReplace;
1541+
float nodeToNew;
15151542

15161543
nodeBinVector(pIndex, pVisited->pBlobSpot, &nodeVector);
15171544
loadVectorPair(&vCandidate, &nodeVector);
15181545

1519-
iReplace = diskAnnReplaceEdgeIdx(pIndex, pBlobSpot, pVisited->nRowid, &vCandidate, &vInsert);
1546+
iReplace = diskAnnReplaceEdgeIdx(pIndex, pBlobSpot, pVisited->nRowid, &vCandidate, &vInsert, &nodeToNew);
15201547
if( iReplace == -1 ){
15211548
continue;
15221549
}
1523-
nodeBinReplaceEdge(pIndex, pBlobSpot, iReplace, pVisited->nRowid, vCandidate.pEdge);
1550+
nodeBinReplaceEdge(pIndex, pBlobSpot, iReplace, pVisited->nRowid, nodeToNew, vCandidate.pEdge);
15241551
diskAnnPruneEdges(pIndex, pBlobSpot, iReplace, &vInsert);
15251552
}
15261553

15271554
// second pass - add new node as a potential neighbour of all visited nodes
15281555
loadVectorPair(&vInsert, pVectorInRow->pVector);
15291556
for(pVisited = ctx.visitedList; pVisited != NULL; pVisited = pVisited->pNext){
15301557
int iReplace;
1558+
float nodeToNew;
15311559

1532-
iReplace = diskAnnReplaceEdgeIdx(pIndex, pVisited->pBlobSpot, nNewRowid, &vInsert, &vCandidate);
1560+
iReplace = diskAnnReplaceEdgeIdx(pIndex, pVisited->pBlobSpot, nNewRowid, &vInsert, &vCandidate, &nodeToNew);
15331561
if( iReplace == -1 ){
15341562
continue;
15351563
}
1536-
nodeBinReplaceEdge(pIndex, pVisited->pBlobSpot, iReplace, nNewRowid, vInsert.pEdge);
1564+
nodeBinReplaceEdge(pIndex, pVisited->pBlobSpot, iReplace, nNewRowid, nodeToNew, vInsert.pEdge);
15371565
diskAnnPruneEdges(pIndex, pVisited->pBlobSpot, iReplace, &vCandidate);
15381566

15391567
rc = blobSpotFlush(pIndex, pVisited->pBlobSpot);
@@ -1598,7 +1626,7 @@ int diskAnnDelete(
15981626
nNeighbours = nodeBinEdges(pIndex, pNodeBlob);
15991627
for(i = 0; i < nNeighbours; i++){
16001628
u64 edgeRowid;
1601-
nodeBinEdge(pIndex, pNodeBlob, i, &edgeRowid, NULL);
1629+
nodeBinEdge(pIndex, pNodeBlob, i, &edgeRowid, NULL, NULL);
16021630
rc = blobSpotReload(pIndex, pEdgeBlob, edgeRowid, pIndex->nBlockSize);
16031631
if( rc == DISKANN_ROW_NOT_FOUND ){
16041632
continue;

0 commit comments

Comments
 (0)