mirror of
https://github.com/tursodatabase/libsql.git
synced 2025-05-20 12:38:12 +00:00
feature in one commit
This commit is contained in:
libsql-sqlite3
Makefile.in
src
build.cinsert.cmain.cparse.yselect.csqliteInt.htest_libsql_diskann.cvdbe.cvdbeInt.hvdbeaux.cvector.cvectorIndex.cvectorIndexInt.hvectordiskann.cvectorvtab.c
test
tool
libsql/tests
vendored/sqlite3-parser/src/parser
@ -196,7 +196,7 @@ LIBOBJS0 = alter.lo analyze.lo attach.lo auth.lo \
|
||||
table.lo threads.lo tokenize.lo treeview.lo trigger.lo \
|
||||
update.lo userauth.lo upsert.lo util.lo vacuum.lo \
|
||||
vector.lo vectorfloat32.lo vectorfloat64.lo \
|
||||
vectordiskann.lo \
|
||||
vectorIndex.lo vectordiskann.lo vectorvtab.lo \
|
||||
vdbe.lo vdbeapi.lo vdbeaux.lo vdbeblob.lo vdbemem.lo vdbesort.lo \
|
||||
vdbetrace.lo vdbevtab.lo \
|
||||
wal.lo walker.lo wasmedge_bindings.lo where.lo wherecode.lo whereexpr.lo \
|
||||
@ -306,7 +306,9 @@ SRC = \
|
||||
$(TOP)/src/vectorfloat32.c \
|
||||
$(TOP)/src/vectorfloat64.c \
|
||||
$(TOP)/src/vectorIndexInt.h \
|
||||
$(TOP)/src/vectorIndex.c \
|
||||
$(TOP)/src/vectordiskann.c \
|
||||
$(TOP)/src/vectorvtab.c \
|
||||
$(TOP)/src/vdbe.c \
|
||||
$(TOP)/src/vdbe.h \
|
||||
$(TOP)/src/vdbeapi.c \
|
||||
@ -1122,9 +1124,15 @@ vectorfloat32.lo: $(TOP)/src/vectorfloat32.c $(HDR)
|
||||
vectorfloat64.lo: $(TOP)/src/vectorfloat64.c $(HDR)
|
||||
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/vectorfloat64.c
|
||||
|
||||
vectorIndex.lo: $(TOP)/src/vectorIndex.c $(HDR)
|
||||
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/vectorIndex.c
|
||||
|
||||
vectordiskann.lo: $(TOP)/src/vectordiskann.c $(HDR)
|
||||
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/vectordiskann.c
|
||||
|
||||
vectorvtab.lo: $(TOP)/src/vectorvtab.c $(HDR)
|
||||
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/vectorvtab.c
|
||||
|
||||
vdbe.lo: $(TOP)/src/vdbe.c $(HDR)
|
||||
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/vdbe.c
|
||||
|
||||
|
@ -23,6 +23,9 @@
|
||||
** ROLLBACK
|
||||
*/
|
||||
#include "sqliteInt.h"
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
#include "vectorIndexInt.h"
|
||||
#endif
|
||||
|
||||
#ifndef SQLITE_OMIT_SHARED_CACHE
|
||||
/*
|
||||
@ -830,7 +833,7 @@ static void SQLITE_NOINLINE deleteTable(sqlite3 *db, Table *pTable){
|
||||
for(pIndex = pTable->pIndex; pIndex; pIndex=pNext){
|
||||
pNext = pIndex->pNext;
|
||||
assert( pIndex->pSchema==pTable->pSchema
|
||||
|| (IsVirtual(pTable) && pIndex->idxType!=SQLITE_IDXTYPE_APPDEF) );
|
||||
|| (IsVirtual(pTable) && (pIndex->idxType&3)!=SQLITE_IDXTYPE_APPDEF) ); // '&3' is the LibSQL fix to treat VECTOR index as APPDEF
|
||||
if( db->pnBytesFreed==0 && !IsVirtual(pTable) ){
|
||||
char *zName = pIndex->zName;
|
||||
TESTONLY ( Index *pOld = ) sqlite3HashInsert(
|
||||
@ -1887,7 +1890,7 @@ void sqlite3AddPrimaryKey(
|
||||
#endif
|
||||
}else{
|
||||
sqlite3CreateIndex(pParse, 0, 0, 0, pList, onError, 0,
|
||||
0, sortOrder, 0, SQLITE_IDXTYPE_PRIMARYKEY);
|
||||
0, sortOrder, 0, SQLITE_IDXTYPE_PRIMARYKEY, 0);
|
||||
pList = 0;
|
||||
}
|
||||
|
||||
@ -2388,7 +2391,7 @@ static void convertToWithoutRowidTable(Parse *pParse, Table *pTab){
|
||||
assert( pParse->pNewTable==pTab );
|
||||
pTab->iPKey = -1;
|
||||
sqlite3CreateIndex(pParse, 0, 0, 0, pList, pTab->keyConf, 0, 0, 0, 0,
|
||||
SQLITE_IDXTYPE_PRIMARYKEY);
|
||||
SQLITE_IDXTYPE_PRIMARYKEY, 0);
|
||||
if( pParse->nErr ){
|
||||
pTab->tabFlags &= ~TF_WithoutRowid;
|
||||
return;
|
||||
@ -3319,9 +3322,23 @@ static void destroyTable(Parse *pParse, Table *pTab){
|
||||
*/
|
||||
Pgno iTab = pTab->tnum;
|
||||
Pgno iDestroyed = 0;
|
||||
Index *pIdx;
|
||||
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
/*
|
||||
* There are several places to delete vector index:
|
||||
* 1. We can add this capability in the OP_Destroy op code. The problem is that it operates with root pages and since we are not maintain them properly - there is a small risk that we can delete something unrelated to vector index
|
||||
* 2. We can add this capability in the OP_DropIndex op code. The problem is that db schema is locked at this moment and we will not be able to execute sqlite3_exec required for vectorIndexDrop
|
||||
* 3. Delete index during the parsing stage (implemented variant) - it's hacky and dirty but seems to me as more safe way to delete only something that we really want
|
||||
*/
|
||||
for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
|
||||
if( IsVectorIndex(pIdx) ){
|
||||
vectorIndexDrop(pParse->db, pIdx->zName);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
while( 1 ){
|
||||
Index *pIdx;
|
||||
Pgno iLargest = 0;
|
||||
|
||||
if( iDestroyed==0 || iTab<iDestroyed ){
|
||||
@ -3808,9 +3825,21 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){
|
||||
sqlite3VdbeAddOp2(v, OP_Next, iTab, addr1+1); VdbeCoverage(v);
|
||||
sqlite3VdbeJumpHere(v, addr1);
|
||||
if( memRootPage<0 ) sqlite3VdbeAddOp2(v, OP_Clear, tnum, iDb);
|
||||
sqlite3VdbeAddOp4(v, OP_OpenWrite, iIdx, (int)tnum, iDb,
|
||||
(char *)pKey, P4_KEYINFO);
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
/*
|
||||
* Emit OP_OpenVectorIdx op code and set P5 to OPFLAG_FORDELETE if we are in the REINDEX phase and need to clear previous index
|
||||
*/
|
||||
if( IsVectorIndex(pIndex) ){
|
||||
sqlite3VdbeAddOp4(v, OP_OpenVectorIdx, iIdx, (int)tnum, iDb, (char *)pKey, P4_KEYINFO);
|
||||
sqlite3VdbeChangeP5(v, (memRootPage<0)?OPFLAG_FORDELETE:0);
|
||||
}else{
|
||||
sqlite3VdbeAddOp4(v, OP_OpenWrite, iIdx, (int)tnum, iDb, (char *)pKey, P4_KEYINFO);
|
||||
sqlite3VdbeChangeP5(v, OPFLAG_BULKCSR|((memRootPage>=0)?OPFLAG_P2ISREG:0));
|
||||
}
|
||||
#else
|
||||
sqlite3VdbeAddOp4(v, OP_OpenWrite, iIdx, (int)tnum, iDb, (char *)pKey, P4_KEYINFO);
|
||||
sqlite3VdbeChangeP5(v, OPFLAG_BULKCSR|((memRootPage>=0)?OPFLAG_P2ISREG:0));
|
||||
#endif
|
||||
|
||||
addr1 = sqlite3VdbeAddOp2(v, OP_SorterSort, iSorter, 0); VdbeCoverage(v);
|
||||
if( IsUniqueIndex(pIndex) ){
|
||||
@ -3841,7 +3870,14 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){
|
||||
** a different order from the main table.
|
||||
** See ticket: https://www.sqlite.org/src/info/bba7b69f9849b5bf
|
||||
*/
|
||||
sqlite3VdbeAddOp1(v, OP_SeekEnd, iIdx);
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
// optimization have no sense for vector index - so we didn't implement OP_SeekEnd op code for vector index and should omit it
|
||||
if( !IsVectorIndex(pIndex) ){
|
||||
sqlite3VdbeAddOp1(v, OP_SeekEnd, iIdx);
|
||||
}
|
||||
#else
|
||||
sqlite3VdbeAddOp1(v, OP_SeekEnd, iIdx);
|
||||
#endif
|
||||
}
|
||||
sqlite3VdbeAddOp2(v, OP_IdxInsert, iIdx, regRecord);
|
||||
sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT);
|
||||
@ -3933,7 +3969,8 @@ void sqlite3CreateIndex(
|
||||
Expr *pPIWhere, /* WHERE clause for partial indices */
|
||||
int sortOrder, /* Sort order of primary key when pList==NULL */
|
||||
int ifNotExist, /* Omit error if index already exists */
|
||||
u8 idxType /* The index type */
|
||||
u8 idxType, /* The index type */
|
||||
IdList *pUsing /* Using */
|
||||
){
|
||||
Table *pTab = 0; /* Table to be indexed */
|
||||
Index *pIndex = 0; /* The index to be created */
|
||||
@ -4258,6 +4295,14 @@ void sqlite3CreateIndex(
|
||||
pIndex->aSortOrder[i] = (u8)requestedSortOrder;
|
||||
}
|
||||
|
||||
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
if( vectorIndexCreate(pParse, pIndex, pUsing) != SQLITE_OK ) {
|
||||
goto exit_create_index;
|
||||
}
|
||||
idxType = pIndex->idxType; // vectorIndexCreate can update idxType to 4 (VECTOR INDEX)
|
||||
#endif
|
||||
|
||||
/* Append the table key to the end of the index. For WITHOUT ROWID
|
||||
** tables (when pPk!=0) this will be the declared PRIMARY KEY. For
|
||||
** normal tables (when pPk==0) this will be the rowid.
|
||||
@ -4604,11 +4649,22 @@ void sqlite3DropIndex(Parse *pParse, SrcList *pName, int ifExists){
|
||||
pParse->checkSchema = 1;
|
||||
goto exit_drop_index;
|
||||
}
|
||||
if( pIndex->idxType!=SQLITE_IDXTYPE_APPDEF ){
|
||||
if( (pIndex->idxType&3)!=SQLITE_IDXTYPE_APPDEF ){ // '&3' is a LibSQL fix to treat VECTOR index as APPDEF
|
||||
sqlite3ErrorMsg(pParse, "index associated with UNIQUE "
|
||||
"or PRIMARY KEY constraint cannot be dropped", 0);
|
||||
goto exit_drop_index;
|
||||
}
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
/*
|
||||
* There are several places to delete vector index:
|
||||
* 1. We can add this capability in the OP_Destroy op code. The problem is that it operates with root pages and since we are not maintain them properly - there is a small risk that we can delete something unrelated to vector index
|
||||
* 2. We can add this capability in the OP_DropIndex op code. The problem is that db schema is locked at this moment and we will not be able to execute sqlite3_exec required for vectorIndexDrop
|
||||
* 3. Delete index during the parsing stage (implemented variant) - it's hacky and dirty but seems to me as more safe way to delete only something that we really want
|
||||
*/
|
||||
if( IsVectorIndex(pIndex) ){
|
||||
vectorIndexDrop(pParse->db, pIndex->zName);
|
||||
}
|
||||
#endif
|
||||
iDb = sqlite3SchemaToIndex(db, pIndex->pSchema);
|
||||
#ifndef SQLITE_OMIT_AUTHORIZATION
|
||||
{
|
||||
@ -5563,6 +5619,7 @@ KeyInfo *sqlite3KeyInfoOfIndex(Parse *pParse, Index *pIdx){
|
||||
}
|
||||
if( pKey ){
|
||||
assert( sqlite3KeyInfoIsWriteable(pKey) );
|
||||
pKey->zIndexName = sqlite3DbStrDup(pParse->db, pIdx->zName); // LibSQL patch: necessary fix for vector search to make it work
|
||||
for(i=0; i<nCol; i++){
|
||||
const char *zColl = pIdx->azColl[i];
|
||||
pKey->aColl[i] = zColl==sqlite3StrBINARY ? 0 :
|
||||
|
@ -2692,9 +2692,26 @@ int sqlite3OpenTableAndIndices(
|
||||
p5 = 0;
|
||||
}
|
||||
if( aToOpen==0 || aToOpen[i+1] ){
|
||||
/*
|
||||
** sqlite3OpenTableAndIndices is called for 'PRAGMA integrity_check' and we can't emit OP_OpenVectorIdx command for this operation;
|
||||
** As vector index creates empty B-tree index - it's safe to issue OP_OpenRead command for it
|
||||
** TODO: with current implementation, integrity_check will output error for vector index as rows will be missed in it
|
||||
** It's better to remove this error in future - but for now it's unclear how to do that with minimal code changes
|
||||
*/
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
if( IsVectorIndex(pIdx) && op == OP_OpenWrite ){
|
||||
sqlite3VdbeAddOp3(v, OP_OpenVectorIdx, iIdxCur, pIdx->tnum, iDb);
|
||||
sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
|
||||
}else{
|
||||
sqlite3VdbeAddOp3(v, op, iIdxCur, pIdx->tnum, iDb);
|
||||
sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
|
||||
sqlite3VdbeChangeP5(v, p5);
|
||||
}
|
||||
#else
|
||||
sqlite3VdbeAddOp3(v, op, iIdxCur, pIdx->tnum, iDb);
|
||||
sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
|
||||
sqlite3VdbeChangeP5(v, p5);
|
||||
#endif
|
||||
VdbeComment((v, "%s", pIdx->zName));
|
||||
}
|
||||
}
|
||||
@ -2730,6 +2747,11 @@ static int xferCompatibleIndex(Index *pDest, Index *pSrc){
|
||||
int i;
|
||||
assert( pDest && pSrc );
|
||||
assert( pDest->pTable!=pSrc->pTable );
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
if( IsVectorIndex(pSrc) || IsVectorIndex(pDest) ){
|
||||
return 0; /* Vector index is not intended for xferOptimization */
|
||||
}
|
||||
#endif
|
||||
if( pDest->nKeyCol!=pSrc->nKeyCol || pDest->nColumn!=pSrc->nColumn ){
|
||||
return 0; /* Different number of columns */
|
||||
}
|
||||
|
@ -44,6 +44,9 @@ static int sqlite3TestExtInit(sqlite3 *db){
|
||||
#ifdef SQLITE_ENABLE_FTS5
|
||||
int sqlite3Fts5Init(sqlite3*);
|
||||
#endif
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
int vectorVtabInit(sqlite3*);
|
||||
#endif
|
||||
#ifdef SQLITE_ENABLE_STMTVTAB
|
||||
int sqlite3StmtVtabInit(sqlite3*);
|
||||
#endif
|
||||
@ -61,6 +64,9 @@ static int (*const sqlite3BuiltinExtensions[])(sqlite3*) = {
|
||||
#ifdef SQLITE_ENABLE_FTS5
|
||||
sqlite3Fts5Init,
|
||||
#endif
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
vectorVtabInit,
|
||||
#endif
|
||||
#if defined(SQLITE_ENABLE_ICU) || defined(SQLITE_ENABLE_ICU_COLLATIONS)
|
||||
sqlite3IcuInit,
|
||||
#endif
|
||||
|
@ -399,7 +399,7 @@ ccons ::= NOT NULL onconf(R). {sqlite3AddNotNull(pParse, R);}
|
||||
ccons ::= PRIMARY KEY sortorder(Z) onconf(R) autoinc(I).
|
||||
{sqlite3AddPrimaryKey(pParse,0,R,I,Z);}
|
||||
ccons ::= UNIQUE onconf(R). {sqlite3CreateIndex(pParse,0,0,0,0,R,0,0,0,0,
|
||||
SQLITE_IDXTYPE_UNIQUE);}
|
||||
SQLITE_IDXTYPE_UNIQUE,0);}
|
||||
ccons ::= CHECK LP(A) expr(X) RP(B). {sqlite3AddCheckConstraint(pParse,X,A.z,B.z);}
|
||||
ccons ::= REFERENCES nm(T) eidlist_opt(TA) refargs(R).
|
||||
{sqlite3CreateForeignKey(pParse,0,&T,TA,R);}
|
||||
@ -453,7 +453,7 @@ tcons ::= PRIMARY KEY LP sortlist(X) autoinc(I) RP onconf(R).
|
||||
{sqlite3AddPrimaryKey(pParse,X,R,I,0);}
|
||||
tcons ::= UNIQUE LP sortlist(X) RP onconf(R).
|
||||
{sqlite3CreateIndex(pParse,0,0,0,X,R,0,0,0,0,
|
||||
SQLITE_IDXTYPE_UNIQUE);}
|
||||
SQLITE_IDXTYPE_UNIQUE,0);}
|
||||
tcons ::= CHECK LP(A) expr(E) RP(B) onconf.
|
||||
{sqlite3AddCheckConstraint(pParse,E,A.z,B.z);}
|
||||
tcons ::= FOREIGN KEY LP eidlist(FA) RP
|
||||
@ -1448,11 +1448,15 @@ paren_exprlist(A) ::= LP exprlist(X) RP. {A = X;}
|
||||
|
||||
///////////////////////////// The CREATE INDEX command ///////////////////////
|
||||
//
|
||||
cmd ::= createkw(S) uniqueflag(U) INDEX ifnotexists(NE) nm(X) dbnm(D)
|
||||
cmd ::= createkw(S) uniqueflag(U) INDEX ifnotexists(NE) nm(X) dbnm(D) indextype(T)
|
||||
ON nm(Y) LP sortlist(Z) RP where_opt(W). {
|
||||
u8 idxType = SQLITE_IDXTYPE_APPDEF;
|
||||
if( T.pUsing!=0 ){
|
||||
idxType = SQLITE_IDXTYPE_VECTOR;
|
||||
}
|
||||
sqlite3CreateIndex(pParse, &X, &D,
|
||||
sqlite3SrcListAppend(pParse,0,&Y,0), Z, U,
|
||||
&S, W, SQLITE_SO_ASC, NE, SQLITE_IDXTYPE_APPDEF);
|
||||
&S, W, SQLITE_SO_ASC, NE, idxType, T.pUsing);
|
||||
if( IN_RENAME_OBJECT && pParse->pNewIndex ){
|
||||
sqlite3RenameTokenMap(pParse, pParse->pNewIndex->zName, &Y);
|
||||
}
|
||||
@ -1462,6 +1466,9 @@ cmd ::= createkw(S) uniqueflag(U) INDEX ifnotexists(NE) nm(X) dbnm(D)
|
||||
uniqueflag(A) ::= UNIQUE. {A = OE_Abort;}
|
||||
uniqueflag(A) ::= . {A = OE_None;}
|
||||
|
||||
%type indextype {OnOrUsing}
|
||||
indextype(T) ::= USING idlist(L). {T.pOn = 0; T.pUsing = L;}
|
||||
indextype(T) ::= . {T.pOn = 0; T.pUsing = 0;}
|
||||
|
||||
// The eidlist non-terminal (Expression Id List) generates an ExprList
|
||||
// from a list of identifiers. The identifier names are in ExprList.a[].zName.
|
||||
|
@ -1512,6 +1512,7 @@ KeyInfo *sqlite3KeyInfoAlloc(sqlite3 *db, int N, int X){
|
||||
p->enc = ENC(db);
|
||||
p->db = db;
|
||||
p->nRef = 1;
|
||||
p->zIndexName = NULL; // LibSQL patch: necessary fix for vector search to make it work
|
||||
memset(&p[1], 0, nExtra);
|
||||
}else{
|
||||
return (KeyInfo*)sqlite3OomFault(db);
|
||||
@ -1527,7 +1528,10 @@ void sqlite3KeyInfoUnref(KeyInfo *p){
|
||||
assert( p->db!=0 );
|
||||
assert( p->nRef>0 );
|
||||
p->nRef--;
|
||||
if( p->nRef==0 ) sqlite3DbNNFreeNN(p->db, p);
|
||||
if( p->nRef==0 ){
|
||||
sqlite3DbFree(p->db, p->zIndexName); // LibSQL patch: necessary fix for vector search to make it work
|
||||
sqlite3DbNNFreeNN(p->db, p);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2636,6 +2636,7 @@ struct FKey {
|
||||
** for the rowid at the end.
|
||||
*/
|
||||
struct KeyInfo {
|
||||
char *zIndexName; /* Name of the index. Might be NULL */
|
||||
u32 nRef; /* Number of references to this KeyInfo object */
|
||||
u8 enc; /* Text encoding - one of the SQLITE_UTF* values */
|
||||
u16 nKeyField; /* Number of key columns in the index */
|
||||
@ -2702,7 +2703,6 @@ struct UnpackedRecord {
|
||||
u8 eqSeen; /* True if an equality comparison has been seen */
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
** Each SQL index is represented in memory by an
|
||||
** instance of the following structure.
|
||||
@ -2766,7 +2766,7 @@ struct Index {
|
||||
u16 nKeyCol; /* Number of columns forming the key */
|
||||
u16 nColumn; /* Number of columns stored in the index */
|
||||
u8 onError; /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */
|
||||
unsigned idxType:2; /* 0:Normal 1:UNIQUE, 2:PRIMARY KEY, 3:IPK */
|
||||
unsigned idxType:3; /* 0:Normal 1:UNIQUE, 2:PRIMARY KEY, 3:IPK, 4:VECTOR INDEX */
|
||||
unsigned bUnordered:1; /* Use this index for == or IN queries only */
|
||||
unsigned uniqNotNull:1; /* True if UNIQUE and NOT NULL for all columns */
|
||||
unsigned isResized:1; /* True if resizeIndexObject() has been called */
|
||||
@ -2797,6 +2797,7 @@ struct Index {
|
||||
#define SQLITE_IDXTYPE_UNIQUE 1 /* Implements a UNIQUE constraint */
|
||||
#define SQLITE_IDXTYPE_PRIMARYKEY 2 /* Is the PRIMARY KEY for the table */
|
||||
#define SQLITE_IDXTYPE_IPK 3 /* INTEGER PRIMARY KEY index */
|
||||
#define SQLITE_IDXTYPE_VECTOR 4 /* Vector index */
|
||||
|
||||
/* Return true if index X is a PRIMARY KEY index */
|
||||
#define IsPrimaryKeyIndex(X) ((X)->idxType==SQLITE_IDXTYPE_PRIMARYKEY)
|
||||
@ -2804,6 +2805,9 @@ struct Index {
|
||||
/* Return true if index X is a UNIQUE index */
|
||||
#define IsUniqueIndex(X) ((X)->onError!=OE_None)
|
||||
|
||||
/* Return true if index X is a vector index */
|
||||
#define IsVectorIndex(X) ((X)->idxType==SQLITE_IDXTYPE_VECTOR)
|
||||
|
||||
/* The Index.aiColumn[] values are normally positive integer. But
|
||||
** there are some negative values that have special meaning:
|
||||
*/
|
||||
@ -4948,7 +4952,7 @@ void sqlite3ClearOnOrUsing(sqlite3*, OnOrUsing*);
|
||||
void sqlite3SrcListDelete(sqlite3*, SrcList*);
|
||||
Index *sqlite3AllocateIndexObject(sqlite3*,i16,int,char**);
|
||||
void sqlite3CreateIndex(Parse*,Token*,Token*,SrcList*,ExprList*,int,Token*,
|
||||
Expr*, int, int, u8);
|
||||
Expr*, int, int, u8, IdList*);
|
||||
void sqlite3DropIndex(Parse*, SrcList*, int);
|
||||
int sqlite3Select(Parse*, Select*, SelectDest*);
|
||||
Select *sqlite3SelectNew(Parse*,ExprList*,SrcList*,Expr*,ExprList*,
|
||||
|
@ -27,7 +27,7 @@ int main() {
|
||||
.zName = "t_idx",
|
||||
.zShadow = "t_idx_shadow",
|
||||
.nFormatVersion = 1,
|
||||
.nDistanceFunc = 0,
|
||||
.nDistanceFunc = VECTOR_METRIC_TYPE_COS,
|
||||
.nBlockSize = TEST_BLOCK_SIZE,
|
||||
.nVectorDims = 1,
|
||||
.nNodeVectorType = VECTOR_TYPE_FLOAT32,
|
||||
|
@ -23,6 +23,9 @@
|
||||
#ifdef LIBSQL_ENABLE_WASM_RUNTIME
|
||||
#include "ext/udf/wasm_bindings.h"
|
||||
#endif
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
#include "vectorIndexInt.h"
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Invoke this macro on memory cells just prior to changing the
|
||||
@ -239,6 +242,9 @@ static void test_trace_breakpoint(int pc, Op *pOp, Vdbe *v){
|
||||
/* Return true if the cursor was opened using the OP_OpenSorter opcode. */
|
||||
#define isSorter(x) ((x)->eCurType==CURTYPE_SORTER)
|
||||
|
||||
/* Return true if the cursor is of type CURYTPE_VECTOR_IDX. */
|
||||
#define isVectorIdx(x) ((x)->eCurType==CURTYPE_VECTOR_IDX)
|
||||
|
||||
/*
|
||||
** Allocate VdbeCursor number iCur. Return a pointer to it. Return NULL
|
||||
** if we run out of memory.
|
||||
@ -4207,6 +4213,46 @@ case OP_SetCookie: {
|
||||
break;
|
||||
}
|
||||
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
/* Opcode: OpenVectorIdx
|
||||
** Synopsis: root=P2 iDb=P3
|
||||
*/
|
||||
case OP_OpenVectorIdx: {
|
||||
// TODO: can we implement this right inside OP_OpenIdx?
|
||||
// TODO: Can we simplify this similar to OP_SorterOpen?
|
||||
KeyInfo *pKeyInfo = 0;
|
||||
VectorIdxCursor* cursor;
|
||||
int nField = 0;
|
||||
if( pOp->p4type==P4_KEYINFO ){
|
||||
pKeyInfo = pOp->p4.pKeyInfo;
|
||||
assert( pKeyInfo->enc==ENC(db) );
|
||||
assert( pKeyInfo->db==db );
|
||||
nField = pKeyInfo->nAllField;
|
||||
}else if( pOp->p4type==P4_INT32 ){
|
||||
nField = pOp->p4.i;
|
||||
}
|
||||
if( pOp->p5 == OPFLAG_FORDELETE ){
|
||||
vectorIndexClear(db, pKeyInfo->zIndexName);
|
||||
}
|
||||
rc = vectorIndexCursorInit(db, &cursor, pKeyInfo->zIndexName);
|
||||
if( rc ) {
|
||||
goto abort_due_to_error;
|
||||
}
|
||||
// After we will allocate cursor Vdbe will record it and will try to close it at the disposal
|
||||
// So, we need to ensure that no errors will occurred after successful cursor allocation
|
||||
VdbeCursor *pCur = allocateCursor(p, pOp->p1, nField, CURTYPE_VECTOR_IDX);
|
||||
if( pCur==0 ) goto no_mem;
|
||||
pCur->iDb = pOp->p3;
|
||||
pCur->nullRow = 1;
|
||||
pCur->isOrdered = 1;
|
||||
pCur->pgnoRoot = pOp->p2;
|
||||
pCur->pKeyInfo = pKeyInfo;
|
||||
pCur->isTable = 0;
|
||||
pCur->uc.pVecIdx = cursor;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Opcode: OpenRead P1 P2 P3 P4 P5
|
||||
** Synopsis: root=P2 iDb=P3
|
||||
**
|
||||
@ -6518,6 +6564,44 @@ case OP_IdxInsert: { /* in2 */
|
||||
assert( (pIn2->flags & MEM_Blob) || (pOp->p5 & OPFLAG_PREFORMAT) );
|
||||
if( pOp->p5 & OPFLAG_NCHANGE ) p->nChange++;
|
||||
if (!pC->isEphemeral) inc_row_written(p, 1);
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
if( isVectorIdx(pC) ) {
|
||||
UnpackedRecord idxKeyStatic;
|
||||
UnpackedRecord *pIdxKey = NULL;
|
||||
int i;
|
||||
rc = ExpandBlob(pIn2);
|
||||
if( rc ) goto abort_due_to_error;
|
||||
x.nKey = pIn2->n;
|
||||
x.pKey = pIn2->z;
|
||||
x.aMem = aMem + pOp->p3;
|
||||
x.nMem = (u16)pOp->p4.i;
|
||||
/*
|
||||
* Key can be provided in packed format (only pKey and nKey are set) to the btree (for example, during the REINDEX)
|
||||
* So we need to unpack it in some cases (x.nMem == 0 condition branch)
|
||||
*/
|
||||
assert( x.nMem > 0 || x.nKey > 0 );
|
||||
if( x.nMem == 0 ){
|
||||
pIdxKey = sqlite3VdbeAllocUnpackedRecord(pC->pKeyInfo);
|
||||
if( pIdxKey==0 ) goto no_mem;
|
||||
sqlite3VdbeRecordUnpack(pC->pKeyInfo, x.nKey, x.pKey, pIdxKey);
|
||||
rc = vectorIndexInsert(pC->uc.pVecIdx, pIdxKey, &p->zErrMsg);
|
||||
/*
|
||||
* vectorIndexInsert can allocate additiona memory for sqlite3_value (usually during sqlite3_value_text/sqlite3_value_blob calls)
|
||||
* so, we need to explicitly clear it before freeing whole UnpackedRecord with single free(...) call
|
||||
*/
|
||||
for(i = 0; i < pIdxKey->nField; i++){
|
||||
sqlite3VdbeMemRelease(pIdxKey->aMem + i);
|
||||
}
|
||||
sqlite3DbFreeNN(db, pIdxKey);
|
||||
}else {
|
||||
idxKeyStatic.nField = x.nMem;
|
||||
idxKeyStatic.aMem = x.aMem;
|
||||
rc = vectorIndexInsert(pC->uc.pVecIdx, &idxKeyStatic, &p->zErrMsg);
|
||||
}
|
||||
if( rc ) goto abort_due_to_error;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
assert( pC->eCurType==CURTYPE_BTREE );
|
||||
assert( pC->isTable==0 );
|
||||
rc = ExpandBlob(pIn2);
|
||||
@ -6587,6 +6671,18 @@ case OP_IdxDelete: {
|
||||
assert( pOp->p1>=0 && pOp->p1<p->nCursor );
|
||||
pC = p->apCsr[pOp->p1];
|
||||
assert( pC!=0 );
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
if( isVectorIdx(pC) ) {
|
||||
sqlite3VdbeIncrWriteCounter(p, pC);
|
||||
r.pKeyInfo = pC->pKeyInfo;
|
||||
r.nField = (u16)pOp->p3;
|
||||
r.default_rc = 0;
|
||||
r.aMem = &aMem[pOp->p2];
|
||||
rc = vectorIndexDelete(pC->uc.pVecIdx, &r, &p->zErrMsg);
|
||||
if( rc ) goto abort_due_to_error;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
assert( pC->eCurType==CURTYPE_BTREE );
|
||||
sqlite3VdbeIncrWriteCounter(p, pC);
|
||||
pCrsr = pC->uc.pCursor;
|
||||
|
@ -59,12 +59,21 @@ typedef struct AuxData AuxData;
|
||||
/* A cache of large TEXT or BLOB values in a VdbeCursor */
|
||||
typedef struct VdbeTxtBlbCache VdbeTxtBlbCache;
|
||||
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
/* Opaque type used in code in vectorIndex.c */
|
||||
typedef struct VectorIdxCursor VectorIdxCursor;
|
||||
#endif
|
||||
|
||||
/* Types of VDBE cursors */
|
||||
#define CURTYPE_BTREE 0
|
||||
#define CURTYPE_SORTER 1
|
||||
#define CURTYPE_VTAB 2
|
||||
#define CURTYPE_PSEUDO 3
|
||||
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
#define CURTYPE_VECTOR_IDX 64
|
||||
#endif
|
||||
|
||||
/*
|
||||
** A VdbeCursor is an superclass (a wrapper) for various cursor objects:
|
||||
**
|
||||
@ -117,6 +126,9 @@ struct VdbeCursor {
|
||||
BtCursor *pCursor; /* CURTYPE_BTREE or _PSEUDO. Btree cursor */
|
||||
sqlite3_vtab_cursor *pVCur; /* CURTYPE_VTAB. Vtab cursor */
|
||||
VdbeSorter *pSorter; /* CURTYPE_SORTER. Sorter object */
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
VectorIdxCursor *pVecIdx; /* CURTYPE_VECTOR_IDX. Vector index cursor */
|
||||
#endif
|
||||
} uc;
|
||||
KeyInfo *pKeyInfo; /* Info about index keys needed by index cursors */
|
||||
u32 iHdrOffset; /* Offset to next unparsed byte of the header */
|
||||
|
@ -15,6 +15,10 @@
|
||||
#include "sqliteInt.h"
|
||||
#include "vdbeInt.h"
|
||||
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
#include "vectorIndexInt.h"
|
||||
#endif
|
||||
|
||||
/* Forward references */
|
||||
static void freeEphemeralFunction(sqlite3 *db, FuncDef *pDef);
|
||||
static void vdbeFreeOpArray(sqlite3 *, Op *, int);
|
||||
@ -2745,6 +2749,12 @@ void sqlite3VdbeFreeCursorNN(Vdbe *p, VdbeCursor *pCx){
|
||||
pModule->xClose(pVCur);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
case CURTYPE_VECTOR_IDX: {
|
||||
vectorIndexCursorClose(p->db, pCx->uc.pVecIdx);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -575,6 +575,14 @@ out_free:
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Marker function which is used in index creation syntax: CREATE INDEX idx ON t(libsql_vector_idx(emb));
|
||||
*/
|
||||
static void libsqlVectorIdx(sqlite3_context *context, int argc, sqlite3_value **argv){
|
||||
// it's important for this function to be no-op as sqlite will apply this function to the column before feeding it to the index
|
||||
sqlite3_result_value(context, argv[0]);
|
||||
}
|
||||
|
||||
/*
|
||||
** Register vector functions.
|
||||
*/
|
||||
@ -585,6 +593,8 @@ void sqlite3RegisterVectorFunctions(void){
|
||||
FUNCTION(vector64, 1, 0, 0, vector64Func),
|
||||
FUNCTION(vector_extract, 1, 0, 0, vectorExtractFunc),
|
||||
FUNCTION(vector_distance_cos, 2, 0, 0, vectorDistanceCosFunc),
|
||||
|
||||
FUNCTION(libsql_vector_idx, -1, 0, 0, libsqlVectorIdx),
|
||||
};
|
||||
sqlite3InsertBuiltinFuncs(aVectorFuncs, ArraySize(aVectorFuncs));
|
||||
}
|
||||
|
917
libsql-sqlite3/src/vectorIndex.c
Normal file
917
libsql-sqlite3/src/vectorIndex.c
Normal file
@ -0,0 +1,917 @@
|
||||
/*
|
||||
** 2024-03-18
|
||||
**
|
||||
** Copyright 2024 the libSQL authors
|
||||
**
|
||||
** Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
** this software and associated documentation files (the "Software"), to deal in
|
||||
** the Software without restriction, including without limitation the rights to
|
||||
** use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
** the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
** subject to the following conditions:
|
||||
**
|
||||
** The above copyright notice and this permission notice shall be included in all
|
||||
** copies or substantial portions of the Software.
|
||||
**
|
||||
** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
** FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
** COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
** IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** libSQL vector search.
|
||||
*/
|
||||
#ifndef SQLITE_OMIT_VECTOR
|
||||
#include "sqlite3.h"
|
||||
#include "vdbeInt.h"
|
||||
#include "sqliteInt.h"
|
||||
#include "vectorIndexInt.h"
|
||||
|
||||
/**************************************************************************
|
||||
** VectorIdxParams utilities
|
||||
****************************************************************************/
|
||||
|
||||
void vectorIdxParamsInit(VectorIdxParams *pParams, u8 *pBinBuf, int nBinSize) {
|
||||
assert( nBinSize <= VECTOR_INDEX_PARAMS_BUF_SIZE );
|
||||
|
||||
pParams->nBinSize = nBinSize;
|
||||
if( pBinBuf != NULL ){
|
||||
memcpy(pParams->pBinBuf, pBinBuf, nBinSize);
|
||||
}
|
||||
}
|
||||
|
||||
u64 vectorIdxParamsGetU64(const VectorIdxParams *pParams, char tag) {
|
||||
int i, offset;
|
||||
u64 value = 0;
|
||||
for (i = 0; i + 9 <= pParams->nBinSize; i += 9){
|
||||
if( pParams->pBinBuf[i] != tag ){
|
||||
continue;
|
||||
}
|
||||
// choose latest value from the VectorIdxParams bin
|
||||
value = 0;
|
||||
for(offset = 0; offset < 8; offset++){
|
||||
value |= ((u64)(pParams->pBinBuf[i + 1 + offset]) << (u64)(8 * offset));
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
int vectorIdxParamsPutU64(VectorIdxParams *pParams, char tag, u64 value) {
|
||||
int i;
|
||||
if( pParams->nBinSize + 9 > VECTOR_INDEX_PARAMS_BUF_SIZE ){
|
||||
return -1;
|
||||
}
|
||||
pParams->pBinBuf[pParams->nBinSize++] = tag;
|
||||
for(i = 0; i < 8; i++){
|
||||
pParams->pBinBuf[pParams->nBinSize++] = value & 0xff;
|
||||
value >>= 8;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
double vectorIdxParamsGetF64(const VectorIdxParams *pParams, char tag) {
|
||||
u64 value = vectorIdxParamsGetU64(pParams, tag);
|
||||
return *((double*)&value);
|
||||
}
|
||||
|
||||
int vectorIdxParamsPutF64(VectorIdxParams *pParams, char tag, double value) {
|
||||
return vectorIdxParamsPutU64(pParams, tag, *((u64*)&value));
|
||||
}
|
||||
|
||||
/**************************************************************************
|
||||
** VectorIdxKey utilities
|
||||
****************************************************************************/
|
||||
|
||||
int vectorIdxKeyGet(Table *pTable, VectorIdxKey *pKey, const char **pzErrMsg) {
|
||||
int i;
|
||||
Index *pPk;
|
||||
if( !HasRowid(pTable) ){
|
||||
pPk = sqlite3PrimaryKeyIndex(pTable);
|
||||
if( pPk->nKeyCol > VECTOR_INDEX_MAX_KEY_COLUMNS ){
|
||||
*pzErrMsg = "exceeded limit for composite columns in primary key index";
|
||||
return -1;
|
||||
}
|
||||
pKey->nKeyColumns = pPk->nKeyCol;
|
||||
for(i = 0; i < pPk->nKeyCol; i++){
|
||||
pKey->aKeyAffinity[i] = pTable->aCol[pPk->aiColumn[i]].affinity;
|
||||
pKey->azKeyCollation[i] = pPk->azColl[i];
|
||||
}
|
||||
} else{
|
||||
pKey->nKeyColumns = 1;
|
||||
pKey->aKeyAffinity[0] = SQLITE_AFF_INTEGER;
|
||||
pKey->azKeyCollation[0] = "BINARY";
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vectorIdxKeyColumnRender(const VectorIdxKey *pKey, const char *prefix, char *pBuf, int nBufSize) {
|
||||
static const char * const azType[] = {
|
||||
/* SQLITE_AFF_BLOB */ " BLOB",
|
||||
/* SQLITE_AFF_TEXT */ " TEXT",
|
||||
/* SQLITE_AFF_NUMERIC */ " NUMERIC",
|
||||
/* SQLITE_AFF_INTEGER */ " INTEGER",
|
||||
/* SQLITE_AFF_REAL */ " REAL",
|
||||
/* SQLITE_AFF_FLEXNUM */ " NUMERIC",
|
||||
};
|
||||
int i, size;
|
||||
for(i = 0; i < pKey->nKeyColumns && nBufSize > 0; i++){
|
||||
const char *collation = pKey->azKeyCollation[i];
|
||||
if( sqlite3_strnicmp(collation, "BINARY", 6) == 0 ){
|
||||
collation = "";
|
||||
}
|
||||
if( i == 0 ){
|
||||
size = snprintf(pBuf, nBufSize, "%s %s %s", prefix, azType[pKey->aKeyAffinity[i] - SQLITE_AFF_BLOB], collation);
|
||||
}else {
|
||||
size = snprintf(pBuf, nBufSize, ",%s%d %s %s", prefix, i, azType[pKey->aKeyAffinity[i] - SQLITE_AFF_BLOB], collation);
|
||||
}
|
||||
if( size < 0 ){
|
||||
return -1;
|
||||
}
|
||||
pBuf += size;
|
||||
nBufSize -= size;
|
||||
}
|
||||
if( nBufSize <= 0 ){
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vectorIdxKeyPlaceholderRender(int nKeyColumns, const char *prefix, char *pBuf, int nBufSize) {
|
||||
int i, size;
|
||||
for(i = 0; i < nKeyColumns && nBufSize > 0; i++){
|
||||
if( i == 0 ){
|
||||
size = snprintf(pBuf, nBufSize, "%s", prefix);
|
||||
}else {
|
||||
size = snprintf(pBuf, nBufSize, ",%s%d", prefix, i);
|
||||
}
|
||||
if( size < 0 ){
|
||||
return -1;
|
||||
}
|
||||
pBuf += size;
|
||||
nBufSize -= size;
|
||||
}
|
||||
if( nBufSize <= 0 ){
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**************************************************************************
|
||||
** VectorInRow utilities
|
||||
****************************************************************************/
|
||||
|
||||
sqlite3_value* vectorInRowKey(const VectorInRow *pVectorInRow, int iKey) {
|
||||
assert( 0 <= iKey && iKey < pVectorInRow->nKeys );
|
||||
return pVectorInRow->pKeyValues + iKey;
|
||||
}
|
||||
|
||||
i64 vectorInRowLegacyId(const VectorInRow *pVectorInRow) {
|
||||
if( pVectorInRow->nKeys == 1 && sqlite3_value_type(pVectorInRow->pKeyValues + 0) == SQLITE_INTEGER ){
|
||||
return sqlite3_value_int64(pVectorInRow->pKeyValues);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vectorInRowTryGetRowid(const VectorInRow *pVectorInRow, u64 *nRowid) {
|
||||
if( pVectorInRow->nKeys != 1 ){
|
||||
return -1;
|
||||
}
|
||||
if( sqlite3_value_type(vectorInRowKey(pVectorInRow, 0)) != SQLITE_INTEGER ){
|
||||
return -1;
|
||||
}
|
||||
*nRowid = sqlite3_value_int64(vectorInRowKey(pVectorInRow, 0));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vectorInRowPlaceholderRender(const VectorInRow *pVectorInRow, char *pBuf, int nBufSize) {
|
||||
int i;
|
||||
assert( pVectorInRow->nKeys > 0 );
|
||||
if( nBufSize < 2 * pVectorInRow->nKeys ){
|
||||
return -1;
|
||||
}
|
||||
for(i = 0; i < pVectorInRow->nKeys; i++){
|
||||
*(pBuf++) = '?';
|
||||
*(pBuf++) = ',';
|
||||
}
|
||||
*(pBuf - 1) = '\0';
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vectorInRowAlloc(sqlite3 *db, const UnpackedRecord *pRecord, VectorInRow *pVectorInRow, char **pzErrMsg) {
|
||||
int rc = SQLITE_OK;
|
||||
int type, dims;
|
||||
struct sqlite3_value *pVectorValue = pRecord->aMem + 0;
|
||||
pVectorInRow->pKeyValues = pRecord->aMem + 1;
|
||||
pVectorInRow->nKeys = pRecord->nField - 1;
|
||||
pVectorInRow->pVector = NULL;
|
||||
|
||||
if( pVectorInRow->nKeys <= 0 ){
|
||||
rc = SQLITE_ERROR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if( sqlite3_value_type(pVectorValue)==SQLITE_NULL ){
|
||||
rc = SQLITE_OK;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if( detectVectorParameters(pVectorValue, VECTOR_TYPE_FLOAT32, &type, &dims, pzErrMsg) != 0 ){
|
||||
rc = SQLITE_ERROR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pVectorInRow->pVector = vectorAlloc(type, dims);
|
||||
if( pVectorInRow->pVector == NULL ){
|
||||
rc = SQLITE_NOMEM_BKPT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if( sqlite3_value_type(pVectorValue) == SQLITE_BLOB ){
|
||||
vectorInitFromBlob(pVectorInRow->pVector, sqlite3_value_blob(pVectorValue), sqlite3_value_bytes(pVectorValue));
|
||||
} else if( sqlite3_value_type(pVectorValue) == SQLITE_TEXT ){
|
||||
// users can put strings (e.g. '[1,2,3]') in the table and we should process them correctly
|
||||
if( vectorParse(pVectorValue, pVectorInRow->pVector, pzErrMsg) != 0 ){
|
||||
rc = SQLITE_ERROR;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
rc = SQLITE_OK;
|
||||
out:
|
||||
if( rc != SQLITE_OK ){
|
||||
vectorFree(pVectorInRow->pVector);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
void vectorInRowFree(sqlite3 *db, VectorInRow *pVectorInRow) {
|
||||
vectorFree(pVectorInRow->pVector);
|
||||
}
|
||||
|
||||
/**************************************************************************
|
||||
** VectorOutRows utilities
|
||||
****************************************************************************/
|
||||
|
||||
int vectorOutRowsAlloc(sqlite3 *db, VectorOutRows *pRows, int nRows, int nCols, char firstColumnAff){
|
||||
assert( nCols > 0 && nRows >= 0 );
|
||||
pRows->nRows = nRows;
|
||||
pRows->nCols = nCols;
|
||||
pRows->aRowids = NULL;
|
||||
pRows->ppValues = NULL;
|
||||
|
||||
if( (u64)nRows * (u64)nCols > VECTOR_OUT_ROWS_MAX_CELLS ){
|
||||
return SQLITE_NOMEM_BKPT;
|
||||
}
|
||||
|
||||
if( nCols == 1 && firstColumnAff == SQLITE_AFF_INTEGER ){
|
||||
pRows->aRowids = sqlite3DbMallocRaw(db, nRows * sizeof(i64));
|
||||
if( pRows->aRowids == NULL ){
|
||||
return SQLITE_NOMEM_BKPT;
|
||||
}
|
||||
}else{
|
||||
pRows->ppValues = sqlite3DbMallocZero(db, nRows * nCols * sizeof(sqlite3_value*));
|
||||
if( pRows->ppValues == NULL ){
|
||||
return SQLITE_NOMEM_BKPT;
|
||||
}
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
int vectorOutRowsPut(VectorOutRows *pRows, int iRow, int iCol, const u64 *pInt, sqlite3_value *pValue) {
|
||||
sqlite3_value *pCopy;
|
||||
assert( 0 <= iRow && iRow < pRows->nRows );
|
||||
assert( 0 <= iCol && iCol < pRows->nCols );
|
||||
assert( pRows->aRowids != NULL || pRows->ppValues != NULL );
|
||||
assert( pInt == NULL || pRows->aRowids != NULL );
|
||||
assert( pInt != NULL || pValue != NULL );
|
||||
|
||||
if( pRows->aRowids != NULL && pInt != NULL ){
|
||||
assert( pRows->nCols == 1 );
|
||||
pRows->aRowids[iRow] = *pInt;
|
||||
}else if( pRows->aRowids != NULL ){
|
||||
assert( pRows->nCols == 1 );
|
||||
assert( sqlite3_value_type(pValue) == SQLITE_INTEGER );
|
||||
pRows->aRowids[iRow] = sqlite3_value_int64(pValue);
|
||||
}else{
|
||||
// pValue can be unprotected and we must own sqlite3_value* - so we are making copy of it
|
||||
pCopy = sqlite3_value_dup(pValue);
|
||||
if( pCopy == NULL ){
|
||||
return SQLITE_NOMEM_BKPT;
|
||||
}
|
||||
pRows->ppValues[iRow * pRows->nCols + iCol] = pCopy;
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
void vectorOutRowsGet(sqlite3_context *context, const VectorOutRows *pRows, int iRow, int iCol) {
|
||||
assert( 0 <= iRow && iRow < pRows->nRows );
|
||||
assert( 0 <= iCol && iCol < pRows->nCols );
|
||||
assert( pRows->aRowids != NULL || pRows->ppValues != NULL );
|
||||
if( pRows->aRowids != NULL ){
|
||||
assert( pRows->nCols == 1 );
|
||||
sqlite3_result_int64(context, pRows->aRowids[iRow]);
|
||||
}else{
|
||||
sqlite3_result_value(context, pRows->ppValues[iRow * pRows->nCols + iCol]);
|
||||
}
|
||||
}
|
||||
|
||||
void vectorOutRowsFree(sqlite3 *db, VectorOutRows *pRows) {
|
||||
int i;
|
||||
|
||||
// both aRowids and ppValues can be null if processing failing in the middle and we didn't created VectorOutRows
|
||||
assert( pRows->aRowids == NULL || pRows->ppValues == NULL );
|
||||
|
||||
if( pRows->aRowids != NULL ){
|
||||
sqlite3DbFree(db, pRows->aRowids);
|
||||
}else if( pRows->ppValues != NULL ){
|
||||
for(i = 0; i < pRows->nRows * pRows->nCols; i++){
|
||||
if( pRows->ppValues[i] != NULL ){
|
||||
sqlite3_value_free(pRows->ppValues[i]);
|
||||
}
|
||||
}
|
||||
sqlite3DbFree(db, pRows->ppValues);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal type to represent VECTOR_COLUMN_TYPES array
|
||||
* We support both FLOATNN and FNN_BLOB type names for the following reasons:
|
||||
* 1. FLOATNN is easy to type for humans and generally OK to use for column type names
|
||||
* 2. FNN_BLOB is aligned with SQLite affinity rules and can be used in cases where compatibility with type affinity rules is important
|
||||
* For example, before loading some third-party extensions or analysis of DB file with tools from SQLite ecosystem)
|
||||
*/
|
||||
struct VectorColumnType {
|
||||
const char *zName;
|
||||
int nBits;
|
||||
};
|
||||
|
||||
static struct VectorColumnType VECTOR_COLUMN_TYPES[] = {
|
||||
{ "FLOAT32", 32 },
|
||||
{ "FLOAT64", 64 },
|
||||
{ "F32_BLOB", 32 },
|
||||
{ "F64_BLOB", 64 }
|
||||
};
|
||||
|
||||
/*
|
||||
* Internal type to represent VECTOR_PARAM_NAMES array with recognized parameters for index creation
|
||||
* For example, libsql_vector_idx(embedding, 'type=diskann', 'metric=cosine')
|
||||
*/
|
||||
struct VectorParamName {
|
||||
const char *zName;
|
||||
int tag;
|
||||
int type; // 0 - enum, 1 - integer, 2 - float
|
||||
const char *zValueStr;
|
||||
u64 value;
|
||||
};
|
||||
|
||||
static struct VectorParamName VECTOR_PARAM_NAMES[] = {
|
||||
{ "type", VECTOR_INDEX_TYPE_PARAM_ID, 0, "diskann", VECTOR_INDEX_TYPE_DISKANN },
|
||||
{ "metric", VECTOR_METRIC_TYPE_PARAM_ID, 0, "cosine", VECTOR_METRIC_TYPE_COS },
|
||||
{ "alpha", VECTOR_PRUNING_ALPHA_PARAM_ID, 2, 0, 0 },
|
||||
{ "search_l", VECTOR_SEARCH_L_PARAM_ID, 1, 0, 0 },
|
||||
{ "insert_l", VECTOR_INSERT_L_PARAM_ID, 2, 0, 0 },
|
||||
};
|
||||
|
||||
static int parseVectorIdxParam(const char *zParam, VectorIdxParams *pParams, const char **pErrMsg) {
|
||||
int i, iDelimiter = 0, nValueLen = 0;
|
||||
const char *zValue;
|
||||
while( zParam[iDelimiter] && zParam[iDelimiter] != '=' ){
|
||||
iDelimiter++;
|
||||
}
|
||||
if( zParam[iDelimiter] != '=' ){
|
||||
*pErrMsg = "unexpected parameter format";
|
||||
return -1;
|
||||
}
|
||||
zValue = zParam + iDelimiter + 1;
|
||||
nValueLen = sqlite3Strlen30(zValue);
|
||||
for(i = 0; i < ArraySize(VECTOR_PARAM_NAMES); i++){
|
||||
if( sqlite3_strnicmp(VECTOR_PARAM_NAMES[i].zName, zParam, iDelimiter) != 0 ){
|
||||
continue;
|
||||
}
|
||||
if( VECTOR_PARAM_NAMES[i].type == 1 ){
|
||||
u64 value = sqlite3Atoi(zValue);
|
||||
if( value == 0 ){
|
||||
*pErrMsg = "invalid representation of integer vector index parameter";
|
||||
return -1;
|
||||
}
|
||||
if( vectorIdxParamsPutU64(pParams, VECTOR_PARAM_NAMES[i].tag, value) != 0 ){
|
||||
*pErrMsg = "unable to serialize integer vector index parameter";
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}else if( VECTOR_PARAM_NAMES[i].type == 2 ){
|
||||
double value;
|
||||
// sqlite3AtoF returns value >= 1 if string is valid float
|
||||
if( sqlite3AtoF(zValue, &value, nValueLen, SQLITE_UTF8) <= 0 ){
|
||||
*pErrMsg = "invalid representation of floating point vector index parameter";
|
||||
return -1;
|
||||
}
|
||||
if( vectorIdxParamsPutF64(pParams, VECTOR_PARAM_NAMES[i].tag, value) != 0 ){
|
||||
*pErrMsg = "unable to serialize floating point vector index parameter";
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}else if( VECTOR_PARAM_NAMES[i].type == 0 && sqlite3_strnicmp(VECTOR_PARAM_NAMES[i].zValueStr, zValue, nValueLen) == 0 ){
|
||||
if( vectorIdxParamsPutU64(pParams, VECTOR_PARAM_NAMES[i].tag, VECTOR_PARAM_NAMES[i].value) != 0 ){
|
||||
*pErrMsg = "unable to serialize vector index parameter";
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}else{
|
||||
*pErrMsg = "unexpected parameter type";
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
*pErrMsg = "unexpected parameter key";
|
||||
return -1;
|
||||
}
|
||||
|
||||
int parseVectorIdxParams(Parse *pParse, VectorIdxParams *pParams, int type, int dims, struct ExprList_item *pArgList, int nArgs) {
|
||||
int i;
|
||||
const char *pErrMsg;
|
||||
if( vectorIdxParamsPutU64(pParams, VECTOR_FORMAT_PARAM_ID, VECTOR_FORMAT_DEFAULT) != 0 ){
|
||||
sqlite3ErrorMsg(pParse, "unable to serialize vector index parameter: format");
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
if( vectorIdxParamsPutU64(pParams, VECTOR_TYPE_PARAM_ID, type) != 0 ){
|
||||
sqlite3ErrorMsg(pParse, "unable to serialize vector index parameter: type");
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
if( vectorIdxParamsPutU64(pParams, VECTOR_DIM_PARAM_ID, dims) != 0 ){
|
||||
sqlite3ErrorMsg(pParse, "unable to serialize vector index parameter: dim");
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
for(i = 1; i < nArgs; i++){
|
||||
Expr *pArgExpr = pArgList[i].pExpr;
|
||||
if( pArgExpr->op != TK_STRING ){
|
||||
sqlite3ErrorMsg(pParse, "all arguments after first must be strings");
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
if( parseVectorIdxParam(pArgExpr->u.zToken, pParams, &pErrMsg) != 0 ){
|
||||
sqlite3ErrorMsg(pParse, "invalid vector index parameter '%s': %s", pArgExpr->u.zToken, pErrMsg);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/**************************************************************************
|
||||
** Vector index cursor implementations
|
||||
****************************************************************************/
|
||||
|
||||
/*
|
||||
** A VectorIdxCursor is a special cursor to perform vector index lookups.
|
||||
*/
|
||||
struct VectorIdxCursor {
|
||||
sqlite3 *db; /* Database connection */
|
||||
DiskAnnIndex *index; /* DiskANN index on disk */
|
||||
};
|
||||
|
||||
/**
|
||||
** Parses a type string such as `FLOAT32(3)` and set number of dimensions and bits
|
||||
**
|
||||
** Returns 0 if suceed and set correct values in both pDims and pType pointers
|
||||
** Returns -1 if the type string is not a valid vector type for index and set pErrMsg to static string with error description in this case
|
||||
**/
|
||||
int vectorIdxParseColumnType(const char *zType, int *pType, int *pDims, char **pErrMsg){
|
||||
int dimensions = 0;
|
||||
int i;
|
||||
for(i = 0; i < ArraySize(VECTOR_COLUMN_TYPES); i++){
|
||||
const char* name = VECTOR_COLUMN_TYPES[i].zName;
|
||||
const char* zTypePtr = zType + strlen(name);
|
||||
if( sqlite3_strnicmp(zType, name, strlen(name)) != 0 ){
|
||||
continue;
|
||||
}
|
||||
if( *zTypePtr != '(' ) {
|
||||
break;
|
||||
}
|
||||
zTypePtr++;
|
||||
|
||||
while( *zTypePtr && *zTypePtr != ')' ){
|
||||
if( !sqlite3Isdigit(*zTypePtr) ){
|
||||
*pErrMsg = "non digit symbol in vector column parameter";
|
||||
return -1;
|
||||
}
|
||||
dimensions = dimensions*10 + (*zTypePtr - '0');
|
||||
if( dimensions > MAX_VECTOR_SZ ) {
|
||||
*pErrMsg = "max vector dimension exceeded";
|
||||
return -1;
|
||||
}
|
||||
zTypePtr++;
|
||||
}
|
||||
if( *zTypePtr != ')' ){
|
||||
*pErrMsg = "missed closing brace for vector column type";
|
||||
return -1;
|
||||
}
|
||||
zTypePtr++;
|
||||
|
||||
if( *zTypePtr ) {
|
||||
*pErrMsg = "extra data after dimension parameter for vector column type";
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( dimensions <= 0 ){
|
||||
*pErrMsg = "vector column must have non-zero dimension for index";
|
||||
return -1;
|
||||
}
|
||||
|
||||
*pDims = dimensions;
|
||||
if( VECTOR_COLUMN_TYPES[i].nBits == 32 ) {
|
||||
*pType = VECTOR_TYPE_FLOAT32;
|
||||
} else if( VECTOR_COLUMN_TYPES[i].nBits == 64 ) {
|
||||
*pType = VECTOR_TYPE_FLOAT64;
|
||||
} else {
|
||||
*pErrMsg = "unsupported vector type";
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
*pErrMsg = "unexpected vector column type";
|
||||
return -1;
|
||||
}
|
||||
|
||||
int initVectorIndexMetaTable(sqlite3* db) {
|
||||
static const char *zSql = "CREATE TABLE IF NOT EXISTS " VECTOR_INDEX_GLOBAL_META_TABLE " ( name TEXT, metadata BLOB );";
|
||||
return sqlite3_exec(db, zSql, 0, 0, 0);
|
||||
}
|
||||
|
||||
int insertIndexParameters(sqlite3* db, const char *zName, VectorIdxParams *pParameters) {
|
||||
static const char *zSql = "INSERT INTO " VECTOR_INDEX_GLOBAL_META_TABLE " VALUES (?, ?)";
|
||||
sqlite3_stmt* pStatement = 0;
|
||||
int rc = SQLITE_ERROR;
|
||||
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStatement, 0);
|
||||
if( rc != SQLITE_OK ){
|
||||
goto clear_and_exit;
|
||||
}
|
||||
rc = sqlite3_bind_text(pStatement, 1, zName, -1, 0);
|
||||
if( rc != SQLITE_OK ){
|
||||
goto clear_and_exit;
|
||||
}
|
||||
rc = sqlite3_bind_blob(pStatement, 2, pParameters->pBinBuf, pParameters->nBinSize, SQLITE_TRANSIENT);
|
||||
if( rc != SQLITE_OK ){
|
||||
goto clear_and_exit;
|
||||
}
|
||||
rc = sqlite3_step(pStatement);
|
||||
if( rc != SQLITE_DONE ){
|
||||
rc = SQLITE_ERROR;
|
||||
} else {
|
||||
rc = SQLITE_OK;
|
||||
}
|
||||
clear_and_exit:
|
||||
if( pStatement ){
|
||||
sqlite3_finalize(pStatement);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
int removeIndexParameters(sqlite3* db, const char *zName) {
|
||||
static const char *zSql = "DELETE FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?";
|
||||
sqlite3_stmt* pStatement = 0;
|
||||
int rc = SQLITE_ERROR;
|
||||
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStatement, 0);
|
||||
if( rc != SQLITE_OK ){
|
||||
goto clear_and_exit;
|
||||
}
|
||||
rc = sqlite3_bind_text(pStatement, 1, zName, -1, 0);
|
||||
if( rc != SQLITE_OK ){
|
||||
goto clear_and_exit;
|
||||
}
|
||||
rc = sqlite3_step(pStatement);
|
||||
if( rc != SQLITE_DONE ){
|
||||
rc = SQLITE_ERROR;
|
||||
} else {
|
||||
rc = SQLITE_OK;
|
||||
}
|
||||
clear_and_exit:
|
||||
if( pStatement ){
|
||||
sqlite3_finalize(pStatement);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
int vectorIndexGetParameters(
|
||||
sqlite3 *db,
|
||||
const char *zIndexName,
|
||||
VectorIdxParams *pParams
|
||||
) {
|
||||
int rc = SQLITE_OK;
|
||||
sqlite3_stmt *pStmt = NULL;
|
||||
int nBinSize;
|
||||
|
||||
static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?";
|
||||
static const char* zSelectSqlOld = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE type = ? AND name = ?";
|
||||
rc = sqlite3_prepare_v2(db, zSelectSql, -1, &pStmt, 0);
|
||||
if( rc == SQLITE_OK ) {
|
||||
sqlite3_bind_text(pStmt, 1, zIndexName, -1, SQLITE_STATIC);
|
||||
if( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
nBinSize = sqlite3_column_bytes(pStmt, 0);
|
||||
if( nBinSize > VECTOR_INDEX_PARAMS_BUF_SIZE ){
|
||||
rc = SQLITE_ERROR;
|
||||
goto out_free;
|
||||
}
|
||||
vectorIdxParamsInit(pParams, (u8*)sqlite3_column_blob(pStmt, 0), nBinSize);
|
||||
goto out_free;
|
||||
}
|
||||
}
|
||||
if( pStmt ){
|
||||
sqlite3_finalize(pStmt);
|
||||
pStmt = NULL;
|
||||
}
|
||||
|
||||
rc = sqlite3_prepare_v2(db, zSelectSqlOld, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
goto out_free;
|
||||
}
|
||||
sqlite3_bind_text(pStmt, 1, "diskann", -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(pStmt, 2, zIndexName, -1, SQLITE_STATIC);
|
||||
if( sqlite3_step(pStmt)!=SQLITE_ROW ){
|
||||
rc = SQLITE_ERROR;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
vectorIdxParamsPutU64(pParams, VECTOR_FORMAT_PARAM_ID, 1);
|
||||
vectorIdxParamsPutU64(pParams, VECTOR_INDEX_TYPE_PARAM_ID, VECTOR_INDEX_TYPE_DISKANN);
|
||||
vectorIdxParamsPutU64(pParams, VECTOR_TYPE_PARAM_ID, VECTOR_TYPE_FLOAT32);
|
||||
vectorIdxParamsPutU64(pParams, VECTOR_DIM_PARAM_ID, sqlite3_column_int(pStmt, 2));
|
||||
vectorIdxParamsPutU64(pParams, VECTOR_METRIC_TYPE_PARAM_ID, VECTOR_METRIC_TYPE_COS);
|
||||
if( vectorIdxParamsPutU64(pParams, VECTOR_BLOCK_SIZE_PARAM_ID, sqlite3_column_int(pStmt, 1)) != 0 ){
|
||||
rc = SQLITE_ERROR;
|
||||
}
|
||||
out_free:
|
||||
if( pStmt != NULL ){
|
||||
sqlite3_finalize(pStmt);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
int vectorIndexDrop(sqlite3 *db, const char *zIdxName) {
|
||||
int rc;
|
||||
rc = diskAnnDropIndex(db, zIdxName);
|
||||
if( rc != SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
return removeIndexParameters(db, zIdxName);
|
||||
}
|
||||
|
||||
int vectorIndexClear(sqlite3 *db, const char *zIdxName) {
|
||||
return diskAnnClearIndex(db, zIdxName);
|
||||
}
|
||||
|
||||
int vectorIndexCreate(Parse *pParse, Index *pIdx, IdList *pUsing) {
|
||||
int rc, i;
|
||||
sqlite3 *db = pParse->db;
|
||||
Table *pTable = pIdx->pTable;
|
||||
struct ExprList_item *pListItem;
|
||||
ExprList *pArgsList;
|
||||
int iEmbeddingColumn;
|
||||
char* zEmbeddingColumnTypeName;
|
||||
int dims;
|
||||
int type;
|
||||
char *pErrMsg;
|
||||
int hasLibsqlVectorIdxFn = 0;
|
||||
int hasCollation = 0;
|
||||
VectorIdxKey idxKey;
|
||||
VectorIdxParams idxParams;
|
||||
u8 paramsBuf[VECTOR_INDEX_PARAMS_BUF_SIZE];
|
||||
vectorIdxParamsInit(&idxParams, NULL, 0);
|
||||
|
||||
// backward compatibility: preserve old indices with deprecated syntax but forbid creation of new indices with this syntax
|
||||
if( pParse->db->init.busy == 0 && pUsing != 0 ){
|
||||
if( pIdx->zName != 0 && pTable->zName != 0 && pIdx->nKeyCol == 1 && pIdx->aiColumn != 0 && pIdx->aiColumn[0] < pTable->nCol ){
|
||||
sqlite3ErrorMsg(pParse, "USING syntax is deprecated, please use plain CREATE INDEX: CREATE INDEX %s ON %s ( " VECTOR_INDEX_MARKER_FUNCTION "(%s) )", pIdx->zName, pTable->zName, pTable->aCol[pIdx->aiColumn[0]].zCnName);
|
||||
} else {
|
||||
sqlite3ErrorMsg(pParse, "USING syntax is deprecated, please use plain CREATE INDEX: CREATE INDEX xxx ON yyy ( " VECTOR_INDEX_MARKER_FUNCTION "(zzz) )");
|
||||
}
|
||||
goto failed;
|
||||
}
|
||||
if( pParse->db->init.busy == 1 && pUsing != 0 ){
|
||||
goto succeed;
|
||||
}
|
||||
|
||||
// vector index must have expressions over column
|
||||
if( pIdx->aColExpr == 0 ) {
|
||||
goto ignored;
|
||||
}
|
||||
|
||||
pListItem = pIdx->aColExpr->a;
|
||||
for(i=0; i<pIdx->aColExpr->nExpr; i++, pListItem++){
|
||||
Expr* pExpr = pListItem->pExpr;
|
||||
while( pExpr->op==TK_COLLATE ){
|
||||
pExpr = pExpr->pLeft;
|
||||
hasCollation = 1;
|
||||
}
|
||||
if( pExpr->op == TK_FUNCTION && sqlite3StrICmp(pExpr->u.zToken, VECTOR_INDEX_MARKER_FUNCTION) == 0 ) {
|
||||
hasLibsqlVectorIdxFn = 1;
|
||||
}
|
||||
}
|
||||
if( !hasLibsqlVectorIdxFn ) {
|
||||
goto ignored;
|
||||
}
|
||||
if( hasCollation ){
|
||||
sqlite3ErrorMsg(pParse, "vector index can't have collation");
|
||||
goto failed;
|
||||
}
|
||||
if( pIdx->aColExpr->nExpr != 1 ) {
|
||||
sqlite3ErrorMsg(pParse, "vector index must contain exactly one column wrapped into the " VECTOR_INDEX_MARKER_FUNCTION " function");
|
||||
goto failed;
|
||||
}
|
||||
if( pIdx->pPartIdxWhere != 0 ) {
|
||||
sqlite3ErrorMsg(pParse, "partial vector index is not supported");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
pArgsList = pIdx->aColExpr->a[0].pExpr->x.pList;
|
||||
pListItem = pArgsList->a;
|
||||
|
||||
if( pArgsList->nExpr < 1 ){
|
||||
sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " must contain at least one argument");
|
||||
goto failed;
|
||||
}
|
||||
if( pListItem[0].pExpr->op != TK_COLUMN ) {
|
||||
sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be a column token");
|
||||
goto failed;
|
||||
}
|
||||
iEmbeddingColumn = pListItem[0].pExpr->iColumn;
|
||||
if( iEmbeddingColumn < 0 ) {
|
||||
sqlite3ErrorMsg(pParse, VECTOR_INDEX_MARKER_FUNCTION " first argument must be column with vector type");
|
||||
goto failed;
|
||||
}
|
||||
assert( iEmbeddingColumn >= 0 && iEmbeddingColumn < pTable->nCol );
|
||||
|
||||
zEmbeddingColumnTypeName = sqlite3ColumnType(&pTable->aCol[iEmbeddingColumn], "");
|
||||
if( vectorIdxParseColumnType(zEmbeddingColumnTypeName, &type, &dims, &pErrMsg) != 0 ){
|
||||
sqlite3ErrorMsg(pParse, "%s: %s", pErrMsg, zEmbeddingColumnTypeName);
|
||||
goto failed;
|
||||
}
|
||||
|
||||
if( vectorIdxKeyGet(pTable, &idxKey, &pErrMsg) != 0 ){
|
||||
sqlite3ErrorMsg(pParse, "failed to detect underlying table key: %s", pErrMsg);
|
||||
goto failed;
|
||||
}
|
||||
if( idxKey.nKeyColumns != 1 ){
|
||||
sqlite3ErrorMsg(pParse, "vector index for tables without ROWID and composite primary key are not supported");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
// we deliberately ignore return code as SQLite will execute CREATE INDEX command twice and exec will fail on second attempt
|
||||
// todo: actually, it looks pretty fragile - maybe we should avoid sqlite3_exec calls from the inside of SQLite internals...
|
||||
if( initVectorIndexMetaTable(db) != 0 ){
|
||||
goto succeed;
|
||||
}
|
||||
if( parseVectorIdxParams(pParse, &idxParams, type, dims, pListItem + 1, pArgsList->nExpr - 1) != 0 ){
|
||||
goto failed;
|
||||
}
|
||||
if( diskAnnCreateIndex(db, pIdx->zName, &idxKey, &idxParams) != 0 ){
|
||||
sqlite3ErrorMsg(pParse, "unable to initialize diskann vector index");
|
||||
goto failed;
|
||||
}
|
||||
if( insertIndexParameters(db, pIdx->zName, &idxParams) != 0 ){
|
||||
sqlite3ErrorMsg(pParse, "unable to update global metadata table");
|
||||
goto failed;
|
||||
}
|
||||
succeed:
|
||||
pIdx->idxType = SQLITE_IDXTYPE_VECTOR;
|
||||
return SQLITE_OK;
|
||||
ignored:
|
||||
return SQLITE_OK;
|
||||
failed:
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
|
||||
int vectorIndexSearch(sqlite3 *db, int argc, sqlite3_value **argv, VectorOutRows *pRows, char **pzErrMsg) {
|
||||
const char *zIdxName;
|
||||
Vector *pVector;
|
||||
Index *pIndex;
|
||||
int type, dims, k, rc;
|
||||
DiskAnnIndex *pDiskAnn;
|
||||
VectorIdxKey pKey;
|
||||
VectorIdxParams idxParams;
|
||||
vectorIdxParamsInit(&idxParams, NULL, 0);
|
||||
|
||||
if( argc != 3 ){
|
||||
*pzErrMsg = sqlite3_mprintf("vector search must have exactly 3 parameters");
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
if( detectVectorParameters(argv[1], VECTOR_TYPE_FLOAT32, &type, &dims, pzErrMsg) != 0 ){
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
if( type != VECTOR_TYPE_FLOAT32 ){
|
||||
*pzErrMsg = sqlite3_mprintf("only f32 vectors are supported");
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
pVector = vectorAlloc(type, dims);
|
||||
if( pVector == NULL ){
|
||||
return SQLITE_NOMEM_BKPT;
|
||||
}
|
||||
// TODO: free resources!
|
||||
if( vectorParse(argv[1], pVector, pzErrMsg) != 0 ){
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
if( sqlite3_value_type(argv[2]) != SQLITE_INTEGER ){
|
||||
*pzErrMsg = sqlite3_mprintf("vector search third parameter (k) must be an integer");
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
k = sqlite3_value_int(argv[2]);
|
||||
if( k < 0 ){
|
||||
*pzErrMsg = sqlite3_mprintf("k must be a non-negative integer");
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
if( sqlite3_value_type(argv[0]) != SQLITE_TEXT ){
|
||||
*pzErrMsg = sqlite3_mprintf("vector search first parameter (index) must be a string");
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
zIdxName = (const char*)sqlite3_value_text(argv[0]);
|
||||
if( vectorIndexGetParameters(db, zIdxName, &idxParams) != 0 ){
|
||||
*pzErrMsg = sqlite3_mprintf("failed to parse vector index parameters");
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
pIndex = sqlite3FindIndex(db, zIdxName, db->aDb[0].zDbSName);
|
||||
if( pIndex == NULL ){
|
||||
*pzErrMsg = sqlite3_mprintf("vector index not found");
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
if( vectorIdxKeyGet(pIndex->pTable, &pKey, pzErrMsg) != 0 ){
|
||||
// TODO: pzErrMsg will be static here!
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
if( diskAnnOpenIndex(db, zIdxName, &idxParams, &pDiskAnn) != 0 ){
|
||||
*pzErrMsg = sqlite3_mprintf("failed to open diskann index");
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
rc = diskAnnSearch(pDiskAnn, pVector, k, &pKey, pRows, pzErrMsg);
|
||||
diskAnnCloseIndex(pDiskAnn);
|
||||
vectorFree(pVector);
|
||||
if( rc != 0 ){
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
int vectorIndexInsert(
|
||||
VectorIdxCursor *pCur,
|
||||
const UnpackedRecord *pX,
|
||||
char **pzErrMsg
|
||||
){
|
||||
VectorInRow vectorInRow;
|
||||
int rc;
|
||||
|
||||
if( vectorInRowAlloc(pCur->db, pX, &vectorInRow, pzErrMsg) != 0 ){
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
if( vectorInRow.pVector == NULL ){
|
||||
return SQLITE_OK;
|
||||
}
|
||||
rc = diskAnnInsert(pCur->index, &vectorInRow, pzErrMsg);
|
||||
vectorInRowFree(pCur->db, &vectorInRow);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int vectorIndexDelete(
|
||||
VectorIdxCursor *pCur,
|
||||
const UnpackedRecord *r,
|
||||
char **pzErrMsg
|
||||
){
|
||||
VectorInRow payload;
|
||||
payload.pVector = NULL;
|
||||
payload.nKeys = r->nField - 1;
|
||||
payload.pKeyValues = r->aMem + 1;
|
||||
return diskAnnDelete(pCur->index, &payload, pzErrMsg);
|
||||
}
|
||||
|
||||
int vectorIndexCursorInit(
|
||||
sqlite3 *db,
|
||||
VectorIdxCursor **ppCursor,
|
||||
const char *zIndexName
|
||||
){
|
||||
const char *zDbPath;
|
||||
VectorIdxCursor* pCursor;
|
||||
VectorIdxParams params;
|
||||
vectorIdxParamsInit(¶ms, NULL, 0);
|
||||
|
||||
if( vectorIndexGetParameters(db, zIndexName, ¶ms) != 0 ){
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
pCursor = sqlite3DbMallocZero(db, sizeof(VectorIdxCursor));
|
||||
if( pCursor == 0 ){
|
||||
return SQLITE_NOMEM_BKPT;
|
||||
}
|
||||
if( diskAnnOpenIndex(db, zIndexName, ¶ms, &pCursor->index) != 0 ){
|
||||
sqlite3DbFree(db, pCursor);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
pCursor->db = db;
|
||||
*ppCursor = pCursor;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
void vectorIndexCursorClose(sqlite3 *db, VectorIdxCursor *pCursor){
|
||||
diskAnnCloseIndex(pCursor->index);
|
||||
sqlite3DbFree(db, pCursor);
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_OMIT_VECTOR) */
|
@ -15,18 +15,21 @@ typedef struct BlobSpot BlobSpot;
|
||||
* Main type which holds all necessary index information and will be passed as a first argument in all index-related operations
|
||||
*/
|
||||
struct DiskAnnIndex {
|
||||
sqlite3 *db; /* Database connection */
|
||||
char *zDb; /* Database name */
|
||||
char *zName; /* Index name */
|
||||
char *zShadow; /* Shadow table name */
|
||||
int nFormatVersion; /* DiskAnn format version */
|
||||
int nDistanceFunc; /* Distance function */
|
||||
int nBlockSize; /* Size of the block which stores all data for single node */
|
||||
int nVectorDims; /* Vector dimensions */
|
||||
int nNodeVectorType; /* Vector type of each node */
|
||||
int nEdgeVectorType; /* Vector type of each node */
|
||||
int nNodeVectorSize; /* Vector size of each node in bytes */
|
||||
int nEdgeVectorSize; /* Vector size of each edge in bytes */
|
||||
sqlite3 *db; /* Database connection */
|
||||
char *zDb; /* Database name */
|
||||
char *zName; /* Index name */
|
||||
char *zShadow; /* Shadow table name */
|
||||
int nFormatVersion; /* DiskAnn format version */
|
||||
int nDistanceFunc; /* Distance function */
|
||||
int nBlockSize; /* Size of the block which stores all data for single node */
|
||||
int nVectorDims; /* Vector dimensions */
|
||||
int nNodeVectorType; /* Vector type of each node */
|
||||
int nEdgeVectorType; /* Vector type of each node */
|
||||
int nNodeVectorSize; /* Vector size of each node in bytes */
|
||||
int nEdgeVectorSize; /* Vector size of each edge in bytes */
|
||||
float pruningAlpha; /* Alpha parameter for edge pruning during INSERT operation */
|
||||
int insertL; /* Max size of candidate set (L) visited during INSERT operation */
|
||||
int searchL; /* Max size of candidate set (L) visited during SEARCH operation (can be overriden from query in future) */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -73,9 +76,164 @@ void nodeBinInsertEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iIns
|
||||
void nodeBinDeleteEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iDelete);
|
||||
void nodeBinDebug(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot);
|
||||
|
||||
/**************************************************************************
|
||||
** Vector index utilities
|
||||
****************************************************************************/
|
||||
|
||||
/* Vector index utility objects */
|
||||
typedef struct VectorIdxKey VectorIdxKey;
|
||||
typedef struct VectorIdxParams VectorIdxParams;
|
||||
typedef struct VectorInRow VectorInRow;
|
||||
typedef struct VectorOutRows VectorOutRows;
|
||||
|
||||
typedef u8 IndexType;
|
||||
typedef u8 MetricType;
|
||||
|
||||
/*
|
||||
* All vector index parameters must be known to the vectorIndex module although it's interpretation are up to the specific implementation of the index (so, there is no validation of parameter values in the vectorIndex module - all this work must be delegated to the specific implementation).
|
||||
* All enum-like type constants starts with 1 to make 0 an "unset" value placeholder
|
||||
*/
|
||||
|
||||
/* format version which can help to upgrade vector on-disk format without breaking older version of the db */
|
||||
#define VECTOR_FORMAT_PARAM_ID 1
|
||||
/*
|
||||
* 1 - initial version
|
||||
*/
|
||||
#define VECTOR_FORMAT_DEFAULT 1
|
||||
|
||||
/* type of the vector index */
|
||||
#define VECTOR_INDEX_TYPE_PARAM_ID 2
|
||||
#define VECTOR_INDEX_TYPE_DISKANN 1
|
||||
|
||||
/* type of the underlying vector for the vector index */
|
||||
#define VECTOR_TYPE_PARAM_ID 3
|
||||
/* dimension of the underlying vector for the vector index */
|
||||
#define VECTOR_DIM_PARAM_ID 4
|
||||
|
||||
/* metric type used for comparing two vectors */
|
||||
#define VECTOR_METRIC_TYPE_PARAM_ID 5
|
||||
#define VECTOR_METRIC_TYPE_COS 1
|
||||
|
||||
/* block size */
|
||||
#define VECTOR_BLOCK_SIZE_PARAM_ID 6
|
||||
#define VECTOR_BLOCK_SIZE_DEFAULT 128
|
||||
|
||||
#define VECTOR_PRUNING_ALPHA_PARAM_ID 7
|
||||
#define VECTOR_PRUNING_ALPHA_DEFAULT 1.2
|
||||
|
||||
#define VECTOR_INSERT_L_PARAM_ID 8
|
||||
#define VECTOR_INSERT_L_DEFAULT 70
|
||||
|
||||
#define VECTOR_SEARCH_L_PARAM_ID 9
|
||||
#define VECTOR_SEARCH_L_DEFAULT 200
|
||||
|
||||
/* total amount of vector index parameters */
|
||||
#define VECTOR_PARAM_IDS_COUNT 9
|
||||
|
||||
/*
|
||||
* Vector index parameters are stored in simple binary format (1 byte tag + 8 byte u64 integer)
|
||||
* This will allow us to add parameters in future version more easily as we have full control over the format (compared to the "rigit" SQL schema)
|
||||
* For now, VectorIdxParams allocated on stack and have 128 bytes hard limit (so far we have 6 parameters and 54 are enough for us)
|
||||
*/
|
||||
#define VECTOR_INDEX_PARAMS_BUF_SIZE 128
|
||||
struct VectorIdxParams {
|
||||
u8 pBinBuf[VECTOR_INDEX_PARAMS_BUF_SIZE];
|
||||
int nBinSize;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Structure which holds information about primary key of the base table for vector index
|
||||
* For tables with ROWID this structure will have information about single column with INTEGER affinity and BINARY collation
|
||||
* For now, VectorIdxKey allocated on stack have 16 columns hard limit (for now we are not supporting composite primary keys due to the limitation of virtual tables)
|
||||
*/
|
||||
#define VECTOR_INDEX_MAX_KEY_COLUMNS 16
|
||||
struct VectorIdxKey {
|
||||
int nKeyColumns;
|
||||
char aKeyAffinity[VECTOR_INDEX_MAX_KEY_COLUMNS];
|
||||
/* collation is owned by the caller and structure is not responsible for reclamation of collation string resources */
|
||||
const char *azKeyCollation[VECTOR_INDEX_MAX_KEY_COLUMNS];
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure which holds information about input payload for vector index (for INSERT/DELETE operations)
|
||||
* pVector must be NULL for DELETE operation
|
||||
*
|
||||
* Resources must be reclaimed with vectorInRowFree(...) method
|
||||
*/
|
||||
struct VectorInRow {
|
||||
Vector *pVector;
|
||||
int nKeys;
|
||||
sqlite3_value *pKeyValues;
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure which holds information about result set of SEARCH operation
|
||||
* It have special optimization for cases when single INTEGER primary key is used - in this case aRowids array stores all primary key values
|
||||
* In other case generic ppValues stores all column information
|
||||
*
|
||||
* Resources must be reclaimed with vectorOutRowsFree(...) method
|
||||
*/
|
||||
#define VECTOR_OUT_ROWS_MAX_CELLS (1<<30)
|
||||
struct VectorOutRows {
|
||||
int nRows;
|
||||
int nCols;
|
||||
i64 *aRowids;
|
||||
sqlite3_value **ppValues;
|
||||
};
|
||||
|
||||
void vectorIdxParamsInit(VectorIdxParams *, u8 *, int);
|
||||
u64 vectorIdxParamsGetU64(const VectorIdxParams *, char);
|
||||
double vectorIdxParamsGetF64(const VectorIdxParams *, char);
|
||||
int vectorIdxParamsPutU64(VectorIdxParams *, char, u64);
|
||||
int vectorIdxParamsPutF64(VectorIdxParams *, char, double);
|
||||
|
||||
int vectorIdxKeyGet(Table*, VectorIdxKey *, const char **);
|
||||
int vectorIdxKeyColumnRender(const VectorIdxKey *, const char *, char *, int);
|
||||
int vectorIdxKeyPlaceholderRender(int, const char *, char *, int);
|
||||
|
||||
int vectorInRowAlloc(sqlite3 *, const UnpackedRecord *, VectorInRow *, char **);
|
||||
sqlite3_value* vectorInRowKey(const VectorInRow *, int);
|
||||
int vectorInRowTryGetRowid(const VectorInRow *, u64 *);
|
||||
i64 vectorInRowLegacyId(const VectorInRow *);
|
||||
int vectorInRowPlaceholderRender(const VectorInRow *, char *, int);
|
||||
void vectorInRowFree(sqlite3 *, VectorInRow *);
|
||||
|
||||
int vectorOutRowsAlloc(sqlite3 *, VectorOutRows *, int, int, char);
|
||||
int vectorOutRowsPut(VectorOutRows *, int, int, const u64 *, sqlite3_value *);
|
||||
void vectorOutRowsGet(sqlite3_context *, const VectorOutRows *, int, int);
|
||||
void vectorOutRowsFree(sqlite3 *, VectorOutRows *);
|
||||
|
||||
int diskAnnCreateIndex(sqlite3 *, const char *, const VectorIdxKey *, VectorIdxParams *);
|
||||
int diskAnnDeleteIndex(sqlite3 *, const char *);
|
||||
int diskAnnClearIndex(sqlite3 *, const char *);
|
||||
int diskAnnDropIndex(sqlite3 *, const char *);
|
||||
int diskAnnOpenIndex(sqlite3 *, const char *, const VectorIdxParams *, DiskAnnIndex **);
|
||||
void diskAnnCloseIndex(DiskAnnIndex *);
|
||||
int diskAnnInsert(const DiskAnnIndex *, const VectorInRow *, char **);
|
||||
int diskAnnDelete(const DiskAnnIndex *, const VectorInRow *, char **);
|
||||
int diskAnnSearch(const DiskAnnIndex *, const Vector *, int, const VectorIdxKey *, VectorOutRows *, char **);
|
||||
|
||||
typedef struct VectorIdxCursor VectorIdxCursor;
|
||||
|
||||
#define VECTOR_INDEX_VTAB_NAME "vector_top_k"
|
||||
#define VECTOR_INDEX_GLOBAL_META_TABLE "libsql_vector_meta_shadow"
|
||||
#define VECTOR_INDEX_MARKER_FUNCTION "libsql_vector_idx"
|
||||
|
||||
int vectorIdxParseColumnType(const char *, int *, int *, char **);
|
||||
|
||||
int vectorIndexCreate(Parse*, Index*, IdList*);
|
||||
int vectorIndexClear(sqlite3 *, const char *);
|
||||
int vectorIndexDrop(sqlite3 *, const char *);
|
||||
int vectorIndexCursorInit(sqlite3 *, VectorIdxCursor **, const char *);
|
||||
void vectorIndexCursorClose(sqlite3 *, VectorIdxCursor *);
|
||||
int vectorIndexInsert(VectorIdxCursor *, const UnpackedRecord *, char **);
|
||||
int vectorIndexDelete(VectorIdxCursor *, const UnpackedRecord *, char **);
|
||||
int vectorIndexSearch(sqlite3 *, int, sqlite3_value **, VectorOutRows *, char **);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* end of the 'extern "C"' block */
|
||||
#endif
|
||||
|
||||
#endif /* _VECTOR_INDEX_H */
|
||||
#endif /* _VECTOR_H */
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
212
libsql-sqlite3/src/vectorvtab.c
Normal file
212
libsql-sqlite3/src/vectorvtab.c
Normal file
@ -0,0 +1,212 @@
|
||||
/*
|
||||
** 2024-04-25
|
||||
**
|
||||
** Copyright 2024 the libSQL authors
|
||||
**
|
||||
** Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
** this software and associated documentation files (the "Software"), to deal in
|
||||
** the Software without restriction, including without limitation the rights to
|
||||
** use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
** the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
** subject to the following conditions:
|
||||
**
|
||||
** The above copyright notice and this permission notice shall be included in all
|
||||
** copies or substantial portions of the Software.
|
||||
**
|
||||
** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
** FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
** COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
** IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** libSQL vector search.
|
||||
*/
|
||||
#if !defined(SQLITE_OMIT_VECTOR) && !defined(SQLITE_OMIT_VIRTUALTABLE)
|
||||
#include "sqlite3.h"
|
||||
#include "vdbeInt.h"
|
||||
#include "vectorIndexInt.h"
|
||||
|
||||
typedef struct vectorVtab vectorVtab;
|
||||
struct vectorVtab {
|
||||
sqlite3_vtab base; /* Base class - must be first */
|
||||
sqlite3 *db; /* Database connection */
|
||||
};
|
||||
|
||||
typedef struct vectorVtab_cursor vectorVtab_cursor;
|
||||
struct vectorVtab_cursor {
|
||||
sqlite3_vtab_cursor base; /* Base class - must be first */
|
||||
VectorOutRows rows;
|
||||
int iRow;
|
||||
};
|
||||
|
||||
/* Column numbers */
|
||||
#define VECTOR_COLUMN_IDX 0
|
||||
#define VECTOR_COLUMN_VECTOR 1
|
||||
#define VECTOR_COLUMN_K 2
|
||||
#define VECTOR_COLUMN_OFFSET 3
|
||||
|
||||
static int vectorVtabConnect(
|
||||
sqlite3 *db,
|
||||
void *pAux,
|
||||
int argc, const char *const *argv,
|
||||
sqlite3_vtab **ppVtab,
|
||||
char **pzErr
|
||||
){
|
||||
vectorVtab *pVtab;
|
||||
int rc;
|
||||
rc = sqlite3_declare_vtab(db, "CREATE TABLE x(idx hidden, vector hidden, k hidden, id);");
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
pVtab = sqlite3_malloc( sizeof(*pVtab) );
|
||||
if( pVtab==0 ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(pVtab, 0, sizeof(*pVtab));
|
||||
pVtab->db = db;
|
||||
*ppVtab = (sqlite3_vtab*)pVtab;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int vectorVtabDisconnect(sqlite3_vtab *pVtab){
|
||||
sqlite3_free(pVtab);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int vectorVtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
|
||||
vectorVtab *pVTab = (vectorVtab*)p;
|
||||
vectorVtab_cursor *pCur;
|
||||
pCur = sqlite3_malloc( sizeof(*pCur) );
|
||||
if( pCur==0 ) return SQLITE_NOMEM;
|
||||
memset(pCur, 0, sizeof(*pCur));
|
||||
*ppCursor = &pCur->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int vectorVtabClose(sqlite3_vtab_cursor *cur){
|
||||
vectorVtab_cursor *pCur = (vectorVtab_cursor*)cur;
|
||||
vectorVtab *pVTab = (vectorVtab *)cur->pVtab;
|
||||
vectorOutRowsFree(pVTab->db, &pCur->rows);
|
||||
sqlite3_free(pCur);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int vectorVtabNext(sqlite3_vtab_cursor *cur){
|
||||
vectorVtab_cursor *pCur = (vectorVtab_cursor*)cur;
|
||||
pCur->iRow++;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int vectorVtabEof(sqlite3_vtab_cursor *cur){
|
||||
vectorVtab_cursor *pCur = (vectorVtab_cursor*)cur;
|
||||
return pCur->iRow >= pCur->rows.nRows;
|
||||
}
|
||||
|
||||
static int vectorVtabColumn(
|
||||
sqlite3_vtab_cursor *cur, /* The cursor */
|
||||
sqlite3_context *context, /* First argument to sqlite3_result_...() */
|
||||
int iCol /* Which column to return */
|
||||
){
|
||||
vectorVtab_cursor *pCur = (vectorVtab_cursor*)cur;
|
||||
vectorOutRowsGet(context, &pCur->rows, pCur->iRow, iCol - VECTOR_COLUMN_OFFSET);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int vectorVtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
|
||||
vectorVtab_cursor *pCur = (vectorVtab_cursor*)cur;
|
||||
if( pCur->rows.aRowids != NULL ){
|
||||
*pRowid = pCur->rows.aRowids[pCur->iRow];
|
||||
}else{
|
||||
*pRowid = pCur->iRow;
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int vectorVtabFilter(
|
||||
sqlite3_vtab_cursor *pVtabCursor,
|
||||
int idxNum, const char *idxStr,
|
||||
int argc, sqlite3_value **argv
|
||||
){
|
||||
vectorVtab_cursor *pCur = (vectorVtab_cursor *)pVtabCursor;
|
||||
vectorVtab *pVTab = (vectorVtab *)pVtabCursor->pVtab;
|
||||
pCur->rows.aRowids = NULL;
|
||||
pCur->rows.ppValues = NULL;
|
||||
|
||||
if( vectorIndexSearch(pVTab->db, argc, argv, &pCur->rows, &pVTab->base.zErrMsg) != 0 ){
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
|
||||
assert( pCur->rows.nRows >= 0 );
|
||||
assert( pCur->rows.nCols > 0 );
|
||||
pCur->iRow = 0;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int vectorVtabBestIndex(
|
||||
sqlite3_vtab *tab,
|
||||
sqlite3_index_info *pIdxInfo
|
||||
){
|
||||
const struct sqlite3_index_constraint *pConstraint;
|
||||
int i;
|
||||
|
||||
pIdxInfo->estimatedCost = (double)1;
|
||||
pIdxInfo->estimatedRows = 100;
|
||||
pIdxInfo->idxNum = 1;
|
||||
|
||||
pConstraint = pIdxInfo->aConstraint;
|
||||
for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
|
||||
if( pConstraint->usable==0 ) continue;
|
||||
if( pConstraint->op!=SQLITE_INDEX_CONSTRAINT_EQ ) continue;
|
||||
switch( pConstraint->iColumn ){
|
||||
case VECTOR_COLUMN_IDX:
|
||||
pIdxInfo->aConstraintUsage[i].argvIndex = 1;
|
||||
pIdxInfo->aConstraintUsage[i].omit = 1;
|
||||
break;
|
||||
case VECTOR_COLUMN_VECTOR:
|
||||
pIdxInfo->aConstraintUsage[i].argvIndex = 2;
|
||||
pIdxInfo->aConstraintUsage[i].omit = 1;
|
||||
break;
|
||||
case VECTOR_COLUMN_K:
|
||||
pIdxInfo->aConstraintUsage[i].argvIndex = 3;
|
||||
pIdxInfo->aConstraintUsage[i].omit = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static sqlite3_module vectorModule = {
|
||||
/* iVersion */ 0,
|
||||
/* xCreate */ 0,
|
||||
/* xConnect */ vectorVtabConnect,
|
||||
/* xBestIndex */ vectorVtabBestIndex,
|
||||
/* xDisconnect */ vectorVtabDisconnect,
|
||||
/* xDestroy */ 0,
|
||||
/* xOpen */ vectorVtabOpen,
|
||||
/* xClose */ vectorVtabClose,
|
||||
/* xFilter */ vectorVtabFilter,
|
||||
/* xNext */ vectorVtabNext,
|
||||
/* xEof */ vectorVtabEof,
|
||||
/* xColumn */ vectorVtabColumn,
|
||||
/* xRowid */ vectorVtabRowid,
|
||||
/* xUpdate */ 0,
|
||||
/* xBegin */ 0,
|
||||
/* xSync */ 0,
|
||||
/* xCommit */ 0,
|
||||
/* xRollback */ 0,
|
||||
/* xFindMethod */ 0,
|
||||
/* xRename */ 0,
|
||||
/* xSavepoint */ 0,
|
||||
/* xRelease */ 0,
|
||||
/* xRollbackTo */ 0,
|
||||
/* xShadowName */ 0,
|
||||
/* xIntegrity */ 0
|
||||
};
|
||||
|
||||
int vectorVtabInit(sqlite3 *db){
|
||||
return sqlite3_create_module(db, VECTOR_INDEX_VTAB_NAME, &vectorModule, 0);
|
||||
}
|
||||
#endif /* !defined(SQLITE_OMIT_VECTOR) && !defined(SQLITE_OMIT_VIRTUALTABLE) */
|
@ -41,7 +41,9 @@ run_test_suite pcache90
|
||||
run_test_suite pcache100
|
||||
run_test_suite prepare
|
||||
run_test_suite mmap
|
||||
run_test suite libsql_alter
|
||||
run_test suite libsql_vector
|
||||
run_test suite libsql_vector_index
|
||||
|
||||
if {$::tcl_platform(platform)=="unix"} {
|
||||
ifcapable !default_autovacuum {
|
||||
|
223
libsql-sqlite3/test/libsql_vector_index.test
Normal file
223
libsql-sqlite3/test/libsql_vector_index.test
Normal file
@ -0,0 +1,223 @@
|
||||
# 2024-06-12
|
||||
#
|
||||
# Copyright 2024 the libSQL authors
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
# this software and associated documentation files (the "Software"), to deal in
|
||||
# the Software without restriction, including without limitation the rights to
|
||||
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
# the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
# subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
#***********************************************************************
|
||||
# This file implements regression tests for libSQL library. The
|
||||
# focus of this file is vector search.
|
||||
|
||||
|
||||
set testdir [file dirname $argv0]
|
||||
source $testdir/tester.tcl
|
||||
set testprefix vector
|
||||
|
||||
sqlite3_db_config_lookaside db 0 0 0
|
||||
|
||||
do_execsql_test vector-integrity {
|
||||
CREATE TABLE t_integrity( v FLOAT32(3) );
|
||||
CREATE INDEX t_integrity_idx ON t_integrity( libsql_vector_idx(v) );
|
||||
INSERT INTO t_integrity VALUES (vector('[1,2,3]'));
|
||||
PRAGMA integrity_check;
|
||||
} {{row 1 missing from index t_integrity_idx} {wrong # of entries in index t_integrity_idx}}
|
||||
|
||||
do_execsql_test vector-backfill {
|
||||
CREATE TABLE t_backfill( v FLOAT32(3) );
|
||||
INSERT INTO t_backfill VALUES (vector('[1,2,3]'));
|
||||
INSERT INTO t_backfill VALUES (vector('[2,3,4]'));
|
||||
INSERT INTO t_backfill VALUES (vector('[3,4,5]'));
|
||||
INSERT INTO t_backfill VALUES (vector('[4,5,6]'));
|
||||
CREATE INDEX t_backfill_idx ON t_backfill( libsql_vector_idx(v) );
|
||||
SELECT rowid FROM vector_top_k('t_backfill_idx', vector('[3,4,5]'), 4);
|
||||
} {3 4 2 1}
|
||||
|
||||
do_execsql_test vector-reindex {
|
||||
CREATE TABLE t_reindex( v FLOAT32(3) );
|
||||
CREATE INDEX t_reindex_idx ON t_reindex( libsql_vector_idx(v) );
|
||||
INSERT INTO t_reindex VALUES (vector('[1,2,3]'));
|
||||
INSERT INTO t_reindex VALUES (vector('[2,3,4]'));
|
||||
INSERT INTO t_reindex VALUES (vector('[3,4,5]'));
|
||||
INSERT INTO t_reindex VALUES (vector('[4,5,6]'));
|
||||
REINDEX t_reindex_idx;
|
||||
SELECT rowid FROM vector_top_k('t_reindex_idx', vector('[3,4,5]'), 4);
|
||||
} {3 4 2 1}
|
||||
|
||||
do_execsql_test vector-text-pk {
|
||||
CREATE TABLE t_text_pk( email TEXT PRIMARY KEY, v FLOAT32(3) );
|
||||
CREATE INDEX t_text_pk_idx ON t_text_pk( libsql_vector_idx(v) );
|
||||
INSERT INTO t_text_pk VALUES ('e-1', vector('[1,2,3]'));
|
||||
INSERT INTO t_text_pk VALUES ('e-2', vector('[2,3,4]'));
|
||||
INSERT INTO t_text_pk VALUES ('e-3', vector('[3,4,5]'));
|
||||
INSERT INTO t_text_pk VALUES ('e-4', vector('[4,5,6]'));
|
||||
SELECT rowid FROM vector_top_k('t_text_pk_idx', vector('[3,4,5]'), 4);
|
||||
} {3 4 2 1}
|
||||
|
||||
do_execsql_test vector-text-pk-norow {
|
||||
CREATE TABLE t_text_pk_norow( email TEXT PRIMARY KEY, v FLOAT32(3) ) WITHOUT ROWID;
|
||||
CREATE INDEX t_text_pk_norow_idx ON t_text_pk_norow( libsql_vector_idx(v) );
|
||||
INSERT INTO t_text_pk_norow VALUES ('e-1', vector('[1,2,3]'));
|
||||
INSERT INTO t_text_pk_norow VALUES ('e-2', vector('[2,3,4]'));
|
||||
INSERT INTO t_text_pk_norow VALUES ('e-3', vector('[3,4,5]'));
|
||||
INSERT INTO t_text_pk_norow VALUES ('e-4', vector('[4,5,6]'));
|
||||
SELECT id FROM vector_top_k('t_text_pk_norow_idx', vector('[3,4,5]'), 4);
|
||||
} {e-3 e-4 e-2 e-1}
|
||||
|
||||
do_execsql_test vector-delete {
|
||||
CREATE TABLE t_delete( id INTEGER PRIMARY KEY, v FLOAT32(3) );
|
||||
CREATE INDEX t_delete_idx ON t_delete( libsql_vector_idx(v) );
|
||||
INSERT INTO t_delete VALUES (10, vector('[1,1,1]'));
|
||||
INSERT INTO t_delete VALUES (20, vector('[-1,-1,-1]'));
|
||||
SELECT rowid FROM vector_top_k('t_delete_idx', vector('[1,1,1]'), 1);
|
||||
DELETE FROM t_delete WHERE id = 10;
|
||||
SELECT rowid FROM vector_top_k('t_delete_idx', vector('[1,1,1]'), 1);
|
||||
} {10 20}
|
||||
|
||||
do_execsql_test vector-update {
|
||||
CREATE TABLE t_update( id INTEGER PRIMARY KEY, v FLOAT32(3) );
|
||||
CREATE INDEX t_update_idx ON t_update( libsql_vector_idx(v) );
|
||||
INSERT INTO t_update VALUES (10, vector('[1,1,1]'));
|
||||
INSERT INTO t_update VALUES (20, vector('[-1,-1,-1]'));
|
||||
UPDATE t_update SET v = vector('[-1,-1,-1]') WHERE id = 10;
|
||||
UPDATE t_update SET v = vector('[1,1,1]') WHERE id = 20;
|
||||
SELECT rowid FROM vector_top_k('t_update_idx', vector('[1,1,1]'), 2);
|
||||
} {20 10}
|
||||
|
||||
do_execsql_test vector-simple {
|
||||
CREATE TABLE t_simple( v FLOAT32(3));
|
||||
CREATE INDEX t_simple_idx ON t_simple( libsql_vector_idx(v) );
|
||||
INSERT INTO t_simple VALUES(vector('[1,2,3]'));
|
||||
INSERT INTO t_simple VALUES(vector('[2,3,4]'));
|
||||
INSERT INTO t_simple VALUES(vector('[5,6,7]'));
|
||||
SELECT * FROM vector_top_k('t_simple_idx', '[1,2,3]', 1);
|
||||
SELECT * FROM vector_top_k('t_simple_idx', '[5,6,7]', 1);
|
||||
SELECT * FROM vector_top_k('t_simple_idx', vector('[1,2,3]'), 1);
|
||||
} {{1} {3} {1}}
|
||||
|
||||
do_execsql_test vector-empty {
|
||||
CREATE TABLE t_empty( v FLOAT32(3));
|
||||
CREATE INDEX t_empty_idx ON t_empty( libsql_vector_idx(v) );
|
||||
SELECT * FROM vector_top_k('t_empty_idx', '[1,2,3]', 1);
|
||||
INSERT INTO t_empty VALUES(vector('[1,2,3]'));
|
||||
DELETE FROM t_empty WHERE rowid = 1;
|
||||
SELECT * FROM vector_top_k('t_empty_idx', '[5,6,7]', 1);
|
||||
} {}
|
||||
|
||||
|
||||
do_execsql_test vector-null {
|
||||
CREATE TABLE t_null( v FLOAT32(3));
|
||||
CREATE INDEX t_null_idx ON t_null( libsql_vector_idx(v) );
|
||||
INSERT INTO t_null VALUES(vector('[1,2,3]'));
|
||||
INSERT INTO t_null VALUES(NULL);
|
||||
INSERT INTO t_null VALUES(vector('[2,3,4]'));
|
||||
SELECT * FROM vector_top_k('t_null_idx', '[1,2,3]', 2);
|
||||
} {1 3}
|
||||
|
||||
do_execsql_test vector-sql {
|
||||
CREATE TABLE t_sql( v FLOAT32(3));
|
||||
CREATE INDEX t_sql_idx ON t_sql( libsql_vector_idx(v) );
|
||||
INSERT INTO t_sql VALUES(vector('[1,2,3]')), (vector('[2,3,4]'));
|
||||
SELECT sql FROM sqlite_master WHERE name LIKE '%t_sql%';
|
||||
SELECT name FROM libsql_vector_meta_shadow WHERE name = 't_sql_idx';
|
||||
} {{CREATE TABLE t_sql( v FLOAT32(3))} {CREATE TABLE t_sql_idx_shadow (index_key INTEGER , data BLOB, PRIMARY KEY (index_key))} {CREATE INDEX t_sql_idx ON t_sql( libsql_vector_idx(v) )} {t_sql_idx}}
|
||||
|
||||
do_execsql_test vector-drop-index {
|
||||
CREATE TABLE t_index_drop( v FLOAT32(3));
|
||||
CREATE INDEX t_index_drop_idx ON t_index_drop( libsql_vector_idx(v) );
|
||||
INSERT INTO t_index_drop VALUES(vector('[1,2,3]')), (vector('[2,3,4]'));
|
||||
DROP INDEX t_index_drop_idx;
|
||||
SELECT sql FROM sqlite_master WHERE name LIKE '%t_index_drop%';
|
||||
SELECT name FROM libsql_vector_meta_shadow WHERE name = 't_index_drop_idx';
|
||||
} {{CREATE TABLE t_index_drop( v FLOAT32(3))}}
|
||||
|
||||
do_execsql_test vector-drop-table {
|
||||
CREATE TABLE t_table_drop( v FLOAT32(3));
|
||||
CREATE INDEX t_table_drop_idx ON t_table_drop( libsql_vector_idx(v) );
|
||||
INSERT INTO t_table_drop VALUES(vector('[1,2,3]')), (vector('[2,3,4]'));
|
||||
DROP table t_table_drop;
|
||||
SELECT sql FROM sqlite_master WHERE name LIKE '%t_table_drop%';
|
||||
SELECT name FROM libsql_vector_meta_shadow WHERE name = 't_table_drop_idx';
|
||||
} {}
|
||||
|
||||
do_execsql_test vector-mixed-format {
|
||||
CREATE TABLE t_mixed( v FLOAT32(3));
|
||||
INSERT INTO t_mixed VALUES('[1,2,3]');
|
||||
INSERT INTO t_mixed VALUES(vector('[2,3,4]'));
|
||||
INSERT INTO t_mixed VALUES('[5,6,7]');
|
||||
CREATE INDEX t_mixed_idx ON t_mixed( libsql_vector_idx(v) );
|
||||
SELECT * FROM vector_top_k('t_mixed_idx', '[1,2,3]', 1);
|
||||
SELECT * FROM vector_top_k('t_mixed_idx', '[5,6,7]', 1);
|
||||
SELECT * FROM vector_top_k('t_mixed_idx', vector('[1,2,3]'), 1);
|
||||
} {1 3 1}
|
||||
|
||||
do_execsql_test vector-alter-column {
|
||||
CREATE TABLE t_vec_alter( v BLOB );
|
||||
INSERT INTO t_vec_alter VALUES('[1,2,3]');
|
||||
INSERT INTO t_vec_alter VALUES(vector('[2,3,4]'));
|
||||
INSERT INTO t_vec_alter VALUES('[5,6,7]');
|
||||
ALTER TABLE t_vec_alter ALTER COLUMN v TO v FLOAT32(3);
|
||||
CREATE INDEX t_vec_alter_idx ON t_vec_alter( libsql_vector_idx(v) );
|
||||
SELECT * FROM vector_top_k('t_vec_alter_idx', '[1,2,3]', 1);
|
||||
SELECT * FROM vector_top_k('t_vec_alter_idx', '[5,6,7]', 1);
|
||||
SELECT * FROM vector_top_k('t_vec_alter_idx', vector('[1,2,3]'), 1);
|
||||
} {1 3 1}
|
||||
|
||||
proc error_messages {sql} {
|
||||
set ret ""
|
||||
catch {
|
||||
set stmt [sqlite3_prepare db $sql -1 dummy]
|
||||
sqlite3_step $stmt
|
||||
sqlite3_finalize $stmt
|
||||
} ret
|
||||
set ret [sqlite3_errmsg db]
|
||||
}
|
||||
|
||||
do_test vector-errors {
|
||||
set ret [list]
|
||||
lappend ret [error_messages {CREATE INDEX t_no_idx ON t_no( libsql_vector_idx(v) )}]
|
||||
sqlite3_exec db { CREATE TABLE t_err ( a INTEGER, b BLOB, c FLOAT32(-1), d FLOAT32(0), e FLOAT32(1) ) }
|
||||
sqlite3_exec db { CREATE TABLE t_err2 ( a, b, v FLOAT32(4), PRIMARY KEY (a, b) ) WITHOUT ROWID }
|
||||
lappend ret [error_messages {CREATE INDEX t_err_idx ON t_err( libsql_vector_idx(v) )}]
|
||||
lappend ret [error_messages {CREATE INDEX t_err_idx ON t_err( libsql_vector(e) )}]
|
||||
lappend ret [error_messages {CREATE INDEX t_err_idx ON t_err( libsql_vector_idx(a) )}]
|
||||
lappend ret [error_messages {CREATE INDEX t_err_idx ON t_err( libsql_vector_idx(b) )}]
|
||||
lappend ret [error_messages {CREATE INDEX t_err_idx ON t_err( libsql_vector_idx(c) )}]
|
||||
lappend ret [error_messages {CREATE INDEX t_err_idx ON t_err( libsql_vector_idx(d) )}]
|
||||
lappend ret [error_messages {CREATE INDEX t_err_idx ON t_err2( libsql_vector_idx(v) )}]
|
||||
sqlite3_exec db { CREATE TABLE t_err3 ( e FLOAT32(4) ) }
|
||||
sqlite3_exec db { CREATE INDEX t_err3_idx ON t_err3 (libsql_vector_idx(e)) }
|
||||
lappend ret [error_messages {INSERT INTO t_err3 VALUES (vector('[1]'))}]
|
||||
lappend ret [error_messages {INSERT INTO t_err3 VALUES (vector('[1, 2, 3, 4, 5]'))}]
|
||||
lappend ret [error_messages {INSERT INTO t_err3 VALUES (vector64('[1,2,3,4]'))}]
|
||||
sqlite3_exec db { CREATE TABLE t_mixed_t( v FLOAT32(3)); }
|
||||
sqlite3_exec db { INSERT INTO t_mixed_t VALUES('[1]'); }
|
||||
lappend ret [error_messages {CREATE INDEX t_mixed_t_idx ON t_mixed_t( libsql_vector_idx(v) )}]
|
||||
} [list {*}{
|
||||
{no such table: main.t_no}
|
||||
{no such column: v}
|
||||
{no such function: libsql_vector}
|
||||
{unexpected vector column type: INTEGER}
|
||||
{unexpected vector column type: BLOB}
|
||||
{non digit symbol in vector column parameter: FLOAT32(-1)}
|
||||
{vector column must have non-zero dimension for index: FLOAT32(0)}
|
||||
{vector index for tables without ROWID and composite primary key are not supported}
|
||||
{dimensions are different: 1 != 4}
|
||||
{dimensions are different: 5 != 4}
|
||||
{only f32 vectors are supported}
|
||||
{dimensions are different: 1 != 3}
|
||||
}]
|
88
libsql-sqlite3/tool/dump_vectoridx.py
Executable file
88
libsql-sqlite3/tool/dump_vectoridx.py
Executable file
@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import struct
|
||||
|
||||
def vector_size(vector_dims):
|
||||
return 4 + vector_dims * 4
|
||||
|
||||
def neighbour_metadata_offset(block_size, vector_dims):
|
||||
vector_sz = vector_size(vector_dims)
|
||||
neighbour_vector_sz = vector_size(vector_dims)
|
||||
max_neighbours = int((block_size - 8 - 2 - vector_sz) / (neighbour_vector_sz + 16))
|
||||
return 8 + 2 + vector_sz + neighbour_vector_sz * max_neighbours
|
||||
|
||||
def parse_vector(file, blocksize):
|
||||
off = 0
|
||||
raw = file.read(blocksize)
|
||||
if not raw:
|
||||
return False
|
||||
id = struct.unpack("<q", raw[off:off+8])[0]
|
||||
off += 8
|
||||
print(f"ID: {id}")
|
||||
num_neighbours = struct.unpack("<h", raw[off:off+2])[0]
|
||||
off += 2
|
||||
print(f"Num neighbours: {num_neighbours}")
|
||||
vector_len = struct.unpack("<l", raw[off:off+4])[0]
|
||||
off += 4
|
||||
print(f"Vector length: {vector_len}")
|
||||
for i in range(vector_len):
|
||||
vector = struct.unpack("<f", raw[off:off+4])[0]
|
||||
off += 4
|
||||
print(f"Vector[{i}]: {vector}")
|
||||
for i in range(num_neighbours):
|
||||
neighbour_vector_len = struct.unpack("<l", raw[off:off+4])[0]
|
||||
off += 4
|
||||
print(f"Neighbour {i} vector length: {neighbour_vector_len}")
|
||||
for j in range(neighbour_vector_len):
|
||||
vector = struct.unpack("<f", raw[off:off+4])[0]
|
||||
off += 4
|
||||
print(f"Neighbour {i} vector[{j}]: {vector}")
|
||||
off = neighbour_metadata_offset(blocksize, vector_len)
|
||||
print(f"Neighbour metadata offset: {off}")
|
||||
for i in range(num_neighbours):
|
||||
id = struct.unpack("<q", raw[off:off+8])[0]
|
||||
off += 8
|
||||
print(f"Neighbour {i} ID: {id}")
|
||||
offset = struct.unpack("<q", raw[off:off+8])[0]
|
||||
off += 8
|
||||
print(f"Neighbour {i} offset: {offset}")
|
||||
return True
|
||||
|
||||
def parse_header(file):
|
||||
raw_header = file.read(32)
|
||||
header = struct.unpack("<qhhhhqq", raw_header)
|
||||
block_size = header[1] << 9
|
||||
file.read(block_size - 32)
|
||||
return {
|
||||
"magic": header[0],
|
||||
"block_size": header[1],
|
||||
"vector_type": header[2],
|
||||
"vector_dims": header[3],
|
||||
"similarity_func": header[4],
|
||||
"entry_vector_offset": header[5],
|
||||
"first_free_offset": header[6]
|
||||
}
|
||||
|
||||
def parse(filename):
|
||||
with open(filename, 'rb') as file:
|
||||
header = parse_header(file)
|
||||
print("==========================")
|
||||
print(f"Magic: {hex(header['magic'])}")
|
||||
print(f"Block size: {header['block_size']}")
|
||||
print(f"Vector type: {header['vector_type']}")
|
||||
print(f"Vector dimensions: {header['vector_dims']}")
|
||||
print(f"Similarity function: {header['similarity_func']}")
|
||||
print(f"Entry vector offset: {header['entry_vector_offset']}")
|
||||
print(f"First free offset: {header['first_free_offset']}")
|
||||
print("==========================")
|
||||
blocksize = header['block_size'] << 9
|
||||
while parse_vector(file, blocksize):
|
||||
print("==========================")
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('filename')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
parse(args.filename)
|
@ -470,7 +470,9 @@ set flist {
|
||||
vector.c
|
||||
vectorfloat32.c
|
||||
vectorfloat64.c
|
||||
vectorIndex.c
|
||||
vectordiskann.c
|
||||
vectorvtab.c
|
||||
rtree.c
|
||||
icu.c
|
||||
fts3_icu.c
|
||||
|
80
libsql-sqlite3/tool/vectoridx_graphviz.py
Executable file
80
libsql-sqlite3/tool/vectoridx_graphviz.py
Executable file
@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import re
|
||||
import struct
|
||||
import argparse
|
||||
import graphviz
|
||||
import libsql_client
|
||||
from dataclasses import dataclass
|
||||
|
||||
def vector_size(v_type, v_dims):
|
||||
return v_dims * {"f": 4, "d": 8}[v_type]
|
||||
|
||||
def neighbour_metadata_offset(block_size, v_type, v_dims):
|
||||
vector_sz = vector_size(v_type, v_dims)
|
||||
neighbour_vector_sz = vector_size(v_type, v_dims)
|
||||
max_neighbours = int((block_size - 8 - 2 - vector_sz) / (neighbour_vector_sz + 16))
|
||||
return 8 + 2 + vector_sz + neighbour_vector_sz * max_neighbours
|
||||
|
||||
def unpack(buffer, offset, format):
|
||||
result = struct.unpack_from(format, buffer=buffer, offset=offset)
|
||||
return offset + struct.calcsize(format), result
|
||||
|
||||
@dataclass
|
||||
class Block:
|
||||
id: int
|
||||
vector: tuple[float]
|
||||
|
||||
n_count: int
|
||||
n_vectors: list[tuple[float]]
|
||||
n_ids: list[int]
|
||||
|
||||
def parse_block(block, v_type, v_dims):
|
||||
offset = 0
|
||||
offset, (id, n_count) = unpack(block, offset, "<qh")
|
||||
offset, (vector) = unpack(block, offset, "<" + v_type * v_dims)
|
||||
n_vectors, n_ids = [], []
|
||||
for i in range(n_count):
|
||||
offset, n_vector = unpack(block, offset, "<" + v_type * v_dims)
|
||||
n_vectors.append(n_vector)
|
||||
offset = neighbour_metadata_offset(65536, v_type, v_dims)
|
||||
for i in range(n_count):
|
||||
offset, (_, n_id) = unpack(block, offset, "<qq")
|
||||
n_ids.append(n_id)
|
||||
return Block(id=id, vector=vector, n_count=n_count, n_vectors=n_vectors, n_ids=n_ids)
|
||||
|
||||
|
||||
def parse(filename, shadow_idx):
|
||||
suffix = '_shadow'
|
||||
if not shadow_idx.endswith(suffix):
|
||||
raise Exception(f'unexpected shadow table name: {shadow_idx}')
|
||||
with libsql_client.create_client_sync('file:' + filename) as client:
|
||||
table_name = client.execute(f'SELECT tbl_name FROM sqlite_master WHERE name = ?', [shadow_idx[:-len(suffix)]]).rows[0][0]
|
||||
table_ddl = client.execute(f'SELECT sql FROM sqlite_master WHERE name = ?', [table_name]).rows[0][0]
|
||||
vector_column = re.search('(FLOAT32|FLOAT64|F32_BLOB|F64_BLOB)\\((\\d+)\\)', str(table_ddl))
|
||||
if vector_column:
|
||||
v_type = vector_column.group(1)
|
||||
v_dims = vector_column.group(2)
|
||||
v_type = {"FLOAT32": "f", "F32_BLOB": "f", "FLOAT64": "d", "F64_BLOB": "d"}[v_type]
|
||||
v_dims = int(v_dims)
|
||||
else:
|
||||
raise Exception(f'unexpected vector column type name: {table_ddl}')
|
||||
|
||||
result = client.execute(f'SELECT rowid, data FROM {shadow_idx}')
|
||||
dot = graphviz.Digraph(comment='Index Graph')
|
||||
for row in result:
|
||||
block = parse_block(row['data'], v_type, v_dims)
|
||||
dot.node(f'{block.id}')
|
||||
for n_id in block.n_ids:
|
||||
dot.edge(f'{n_id}', f'{block.id}')
|
||||
print(dot.source)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('filename')
|
||||
parser.add_argument('shadow_idx')
|
||||
args = parser.parse_args()
|
||||
parse(args.filename, args.shadow_idx)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -78,7 +78,8 @@ async fn connection_execute_transactional_batch_success() {
|
||||
sqlite_schema
|
||||
WHERE
|
||||
type ='table' AND
|
||||
name NOT LIKE 'sqlite_%';",
|
||||
name NOT LIKE 'sqlite_%' AND
|
||||
name NOT LIKE 'libsql_%';",
|
||||
(),
|
||||
)
|
||||
.await
|
||||
@ -117,7 +118,8 @@ async fn connection_execute_transactional_batch_fail() {
|
||||
sqlite_schema
|
||||
WHERE
|
||||
type ='table' AND
|
||||
name NOT LIKE 'sqlite_%';",
|
||||
name NOT LIKE 'sqlite_%' AND
|
||||
name NOT LIKE 'libsql_%';",
|
||||
(),
|
||||
)
|
||||
.await
|
||||
@ -152,7 +154,8 @@ async fn connection_execute_transactional_batch_transaction_fail() {
|
||||
sqlite_schema
|
||||
WHERE
|
||||
type ='table' AND
|
||||
name NOT LIKE 'sqlite_%';",
|
||||
name NOT LIKE 'sqlite_%' AND
|
||||
name NOT LIKE 'libsql_%';",
|
||||
(),
|
||||
)
|
||||
.await
|
||||
@ -186,7 +189,8 @@ async fn connection_execute_transactional_batch_transaction_incorrect() {
|
||||
sqlite_schema
|
||||
WHERE
|
||||
type ='table' AND
|
||||
name NOT LIKE 'sqlite_%';",
|
||||
name NOT LIKE 'sqlite_%' AND
|
||||
name NOT LIKE 'libsql_%';",
|
||||
(),
|
||||
)
|
||||
.await
|
||||
@ -219,7 +223,8 @@ async fn connection_execute_batch() {
|
||||
sqlite_schema
|
||||
WHERE
|
||||
type ='table' AND
|
||||
name NOT LIKE 'sqlite_%';",
|
||||
name NOT LIKE 'sqlite_%' AND
|
||||
name NOT LIKE 'libsql_%';",
|
||||
(),
|
||||
)
|
||||
.await
|
||||
|
@ -179,6 +179,7 @@ pub enum Stmt {
|
||||
CreateIndex {
|
||||
unique: bool,
|
||||
if_not_exists: bool,
|
||||
idx_type: Option<Vec<Name>>,
|
||||
idx_name: QualifiedName,
|
||||
tbl_name: Name,
|
||||
columns: Vec<SortedColumn>,
|
||||
@ -328,6 +329,7 @@ impl ToTokens for Stmt {
|
||||
Stmt::CreateIndex {
|
||||
unique,
|
||||
if_not_exists,
|
||||
idx_type,
|
||||
idx_name,
|
||||
tbl_name,
|
||||
columns,
|
||||
@ -343,6 +345,10 @@ impl ToTokens for Stmt {
|
||||
s.append(TK_NOT, None)?;
|
||||
s.append(TK_EXISTS, None)?;
|
||||
}
|
||||
if let Some(idx_type) = idx_type {
|
||||
s.append(TK_USING, None)?;
|
||||
comma(idx_type, s)?;
|
||||
}
|
||||
idx_name.to_tokens(s)?;
|
||||
s.append(TK_ON, None)?;
|
||||
tbl_name.to_tokens(s)?;
|
||||
|
@ -1088,9 +1088,9 @@ paren_exprlist(A) ::= LP exprlist(X) RP. {A = X;}
|
||||
|
||||
///////////////////////////// The CREATE INDEX command ///////////////////////
|
||||
//
|
||||
cmd ::= createkw uniqueflag(U) INDEX ifnotexists(NE) fullname(X)
|
||||
cmd ::= createkw uniqueflag(U) INDEX ifnotexists(NE) fullname(X) indextype(T)
|
||||
ON nm(Y) LP sortlist(Z) RP where_opt(W). {
|
||||
self.ctx.stmt = Some(Stmt::CreateIndex { unique: U, if_not_exists: NE, idx_name: X,
|
||||
self.ctx.stmt = Some(Stmt::CreateIndex { unique: U, if_not_exists: NE, idx_type: T, idx_name: X,
|
||||
tbl_name: Y, columns: Z, where_clause: W });
|
||||
}
|
||||
|
||||
@ -1098,6 +1098,9 @@ cmd ::= createkw uniqueflag(U) INDEX ifnotexists(NE) fullname(X)
|
||||
uniqueflag(A) ::= UNIQUE. {A = true;}
|
||||
uniqueflag(A) ::= . {A = false;}
|
||||
|
||||
%type indextype {Option<Vec<Name>>}
|
||||
indextype(T) ::= USING idlist(L). {T = Some(L);}
|
||||
indextype(T) ::= . {T = None;}
|
||||
|
||||
// The eidlist non-terminal (Expression Id List) generates an ExprList
|
||||
// from a list of identifiers. The identifier names are in ExprList.a[].zName.
|
||||
|
Reference in New Issue
Block a user