Skip to content

Commit fd1f5c4

Browse files
authored
Merge pull request #1661 from tursodatabase/vector-search-full-support-all-column-types
vector search: full support all column types
2 parents d86b5b7 + 8604065 commit fd1f5c4

15 files changed

Lines changed: 1371 additions & 669 deletions

File tree

libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c

Lines changed: 472 additions & 259 deletions
Large diffs are not rendered by default.

libsql-ffi/bundled/src/sqlite3.c

Lines changed: 472 additions & 259 deletions
Large diffs are not rendered by default.

libsql-sqlite3/Makefile.in

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ LIBOBJS0 = alter.lo analyze.lo attach.lo auth.lo \
195195
sqlite3session.lo select.lo sqlite3rbu.lo status.lo stmt.lo \
196196
table.lo threads.lo tokenize.lo treeview.lo trigger.lo \
197197
update.lo userauth.lo upsert.lo util.lo vacuum.lo \
198-
vector.lo vectorfloat32.lo vectorfloat64.lo vector1bit.lo \
198+
vector.lo vectorfloat32.lo vectorfloat64.lo vectorfloat1bit.lo \
199199
vectorIndex.lo vectordiskann.lo vectorvtab.lo \
200200
vdbe.lo vdbeapi.lo vdbeaux.lo vdbeblob.lo vdbemem.lo vdbesort.lo \
201201
vdbetrace.lo vdbevtab.lo \
@@ -302,8 +302,8 @@ SRC = \
302302
$(TOP)/src/util.c \
303303
$(TOP)/src/vacuum.c \
304304
$(TOP)/src/vector.c \
305-
$(TOP)/src/vector1bit.c \
306305
$(TOP)/src/vectorInt.h \
306+
$(TOP)/src/vectorfloat1bit.c \
307307
$(TOP)/src/vectorfloat32.c \
308308
$(TOP)/src/vectorfloat64.c \
309309
$(TOP)/src/vectorIndexInt.h \
@@ -1139,8 +1139,8 @@ vacuum.lo: $(TOP)/src/vacuum.c $(HDR)
11391139
vector.lo: $(TOP)/src/vector.c $(HDR)
11401140
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/vector.c
11411141

1142-
vector1bit.lo: $(TOP)/src/vector1bit.c $(HDR)
1143-
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/vector1bit.c
1142+
vectorfloat1bit.lo: $(TOP)/src/vectorfloat1bit.c $(HDR)
1143+
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/vectorfloat1bit.c
11441144

11451145
vectorfloat32.lo: $(TOP)/src/vectorfloat32.c $(HDR)
11461146
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/vectorfloat32.c

libsql-sqlite3/benchmark/workload.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ def recall_uniform(dim, n, q):
1010
print(f'CREATE TABLE queries ( emb FLOAT32({dim}) );')
1111
print(f'BEGIN TRANSACTION;')
1212
for i in range(n):
13-
vector = f"[{','.join(map(str, np.random.uniform(size=dim)))}]"
13+
vector = f"[{','.join(map(str, np.random.uniform(-1, 1, size=dim)))}]"
1414
print(f'INSERT INTO data VALUES ({i}, vector(\'{vector}\'));')
1515
for i in range(q):
16-
vector = f"[{','.join(map(str, np.random.uniform(size=dim)))}]"
16+
vector = f"[{','.join(map(str, np.random.uniform(-1, 1, size=dim)))}]"
1717
print(f'INSERT INTO queries VALUES (vector(\'{vector}\'));')
1818
print(f'COMMIT;')
1919
print('---insert everything')
@@ -29,7 +29,7 @@ def recall_normal(dim, n, q):
2929
vector = f"[{','.join(map(str, np.random.uniform(size=64)))}]"
3030
print(f'INSERT INTO data VALUES ({i}, \'{vector}\');')
3131
for i in range(q):
32-
vector = f"[{','.join(map(str, np.random.uniform(size=64)))}]"
32+
vector = f"[{','.join(map(str, np.random.uniform(-1, 1, size=64)))}]"
3333
print(f'INSERT INTO queries VALUES (\'{vector}\');')
3434
print(f'COMMIT;')
3535
print('---insert everything')
@@ -40,7 +40,7 @@ def no_vectors(n, q):
4040
print('PRAGMA journal_mode=WAL;')
4141
print(f'CREATE TABLE x ( id INTEGER PRIMARY KEY, value TEXT );')
4242
for i in range(n):
43-
vector = f"[{','.join(map(str, np.random.uniform(size=64)))}]"
43+
vector = f"[{','.join(map(str, np.random.uniform(-1, 1, size=64)))}]"
4444
print(f'INSERT INTO x VALUES ({i}, \'{vector}\');')
4545
print('---inserts')
4646
for i in range(q):
@@ -54,11 +54,11 @@ def bruteforce(dim, n, q):
5454
print('PRAGMA journal_mode=WAL;')
5555
print(f'CREATE TABLE x ( id INTEGER PRIMARY KEY, embedding FLOAT32({dim}) );')
5656
for i in range(n):
57-
vector = f"[{','.join(map(str, np.random.uniform(size=dim)))}]"
57+
vector = f"[{','.join(map(str, np.random.uniform(-1, 1, size=dim)))}]"
5858
print(f'INSERT INTO x VALUES ({i}, vector(\'{vector}\'));')
5959
print('---inserts')
6060
for i in range(q):
61-
vector = f"[{','.join(map(str, np.random.uniform(size=dim)))}]"
61+
vector = f"[{','.join(map(str, np.random.uniform(-1, 1, size=dim)))}]"
6262
print(f'SELECT id FROM x ORDER BY vector_distance_cos(embedding, vector(\'{vector}\')) LIMIT 1;')
6363
print('---search')
6464

@@ -68,13 +68,13 @@ def diskann(dim, n, q):
6868
q = int(q)
6969
print('PRAGMA journal_mode=WAL;')
7070
print(f'CREATE TABLE x ( id INTEGER PRIMARY KEY, embedding FLOAT32({dim}) );')
71-
print(f'CREATE INDEX x_idx ON x( libsql_vector_idx(embedding) );')
71+
print(f"CREATE INDEX x_idx ON x( libsql_vector_idx(embedding) );")
7272
for i in range(n):
73-
vector = f"[{','.join(map(str, np.random.uniform(size=dim)))}]"
73+
vector = f"[{','.join(map(str, np.random.uniform(-1, 1, size=dim)))}]"
7474
print(f'INSERT INTO x VALUES ({i}, vector(\'{vector}\'));')
7575
print('---inserts')
7676
for i in range(q):
77-
vector = f"[{','.join(map(str, np.random.uniform(size=dim)))}]"
77+
vector = f"[{','.join(map(str, np.random.uniform(-1, 1, size=dim)))}]"
7878
print(f'SELECT id FROM vector_top_k(\'x_idx\', vector(\'{vector}\'), 1);')
7979
print('---search')
8080

0 commit comments

Comments
 (0)