2009-11-13 10:36:20 +00:00
/*
* * 2009 Oct 23
* *
* * The author disclaims copyright to this source code . In place of
* * a legal notice , here is a blessing :
* *
* * May you do good and not evil .
* * May you find forgiveness for yourself and forgive others .
* * May you share freely , never taking more than you give .
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
2011-06-16 00:54:45 +00:00
# include "fts3Int.h"
2009-11-19 15:25:25 +00:00
# if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
2009-11-13 10:36:20 +00:00
# include <string.h>
# include <assert.h>
2021-04-16 16:55:28 +00:00
# ifndef SQLITE_AMALGAMATION
typedef sqlite3_int64 i64 ;
# endif
2010-11-23 19:16:47 +00:00
/*
* * Characters that may appear in the second argument to matchinfo ( ) .
*/
# define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */
# define FTS3_MATCHINFO_NCOL 'c' /* 1 value */
# define FTS3_MATCHINFO_NDOC 'n' /* 1 value */
# define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */
# define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */
# define FTS3_MATCHINFO_LCS 's' /* nCol values */
# define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */
2015-05-02 09:44:15 +00:00
# define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */
2015-05-05 20:39:53 +00:00
# define FTS3_MATCHINFO_LHITS_BM 'b' /* nCol*nPhrase values */
2010-11-23 19:16:47 +00:00
/*
* * The default value for the second argument to matchinfo ( ) .
*/
# define FTS3_MATCHINFO_DEFAULT "pcx"
2010-01-02 19:02:02 +00:00
2010-01-12 17:57:30 +00:00
/*
2023-01-25 13:42:55 +00:00
* * Used as an sqlite3Fts3ExprIterate ( ) context when loading phrase doclists to
2010-01-12 17:57:30 +00:00
* * Fts3Expr . aDoclist [ ] / nDoclist .
*/
typedef struct LoadDoclistCtx LoadDoclistCtx ;
struct LoadDoclistCtx {
2010-10-19 14:07:59 +00:00
Fts3Cursor * pCsr ; /* FTS3 Cursor */
2010-01-12 17:57:30 +00:00
int nPhrase ; /* Number of phrases seen so far */
int nToken ; /* Number of tokens seen so far */
} ;
/*
* * The following types are used as part of the implementation of the
* * fts3BestSnippet ( ) routine .
*/
typedef struct SnippetIter SnippetIter ;
typedef struct SnippetPhrase SnippetPhrase ;
typedef struct SnippetFragment SnippetFragment ;
struct SnippetIter {
Fts3Cursor * pCsr ; /* Cursor snippet is being generated from */
int iCol ; /* Extract snippet from this column */
int nSnippet ; /* Requested snippet length (in tokens) */
int nPhrase ; /* Number of phrases in query */
SnippetPhrase * aPhrase ; /* Array of size nPhrase */
int iCurrent ; /* First token of current snippet */
} ;
struct SnippetPhrase {
int nToken ; /* Number of tokens in phrase */
char * pList ; /* Pointer to start of phrase position list */
2021-04-16 16:55:28 +00:00
i64 iHead ; /* Next value in position list */
2010-01-12 17:57:30 +00:00
char * pHead ; /* Position list data following iHead */
2021-04-16 16:55:28 +00:00
i64 iTail ; /* Next value in trailing position list */
2010-01-12 17:57:30 +00:00
char * pTail ; /* Position list data following iTail */
} ;
struct SnippetFragment {
int iCol ; /* Column snippet is extracted from */
int iPos ; /* Index of first token in snippet */
u64 covered ; /* Mask of query phrases covered */
u64 hlmask ; /* Mask of snippet terms to highlight */
} ;
/*
2023-01-25 13:42:55 +00:00
* * This type is used as an sqlite3Fts3ExprIterate ( ) context object while
2010-01-12 17:57:30 +00:00
* * accumulating the data returned by the matchinfo ( ) function .
*/
typedef struct MatchInfo MatchInfo ;
struct MatchInfo {
Fts3Cursor * pCursor ; /* FTS3 Cursor */
int nCol ; /* Number of columns in table */
2010-11-23 19:16:47 +00:00
int nPhrase ; /* Number of matchable phrases in query */
sqlite3_int64 nDoc ; /* Number of docs in database */
2015-05-05 20:39:53 +00:00
char flag ;
2010-01-12 17:57:30 +00:00
u32 * aMatchinfo ; /* Pre-allocated buffer */
} ;
2015-05-05 19:37:07 +00:00
/*
* * An instance of this structure is used to manage a pair of buffers , each
* * ( nElem * sizeof ( u32 ) ) bytes in size . See the MatchinfoBuffer code below
* * for details .
*/
struct MatchinfoBuffer {
u8 aRef [ 3 ] ;
int nElem ;
int bGlobal ; /* Set if global data is loaded */
char * zMatchinfo ;
2015-05-26 17:29:48 +00:00
u32 aMatchinfo [ 1 ] ;
2015-05-05 19:37:07 +00:00
} ;
2010-01-12 17:57:30 +00:00
/*
* * The snippet ( ) and offsets ( ) functions both return text values . An instance
* * of the following structure is used to accumulate those values while the
* * functions are running . See fts3StringAppend ( ) for details .
*/
typedef struct StrBuffer StrBuffer ;
struct StrBuffer {
char * z ; /* Pointer to buffer containing string */
int n ; /* Length of z in bytes (excl. nul-term) */
int nAlloc ; /* Allocated size of buffer z in bytes */
} ;
2015-05-05 19:37:07 +00:00
/*************************************************************************
* * Start of MatchinfoBuffer code .
*/
/*
* * Allocate a two - slot MatchinfoBuffer object .
*/
2019-04-13 14:17:09 +00:00
static MatchinfoBuffer * fts3MIBufferNew ( size_t nElem , const char * zMatchinfo ) {
2015-05-05 19:37:07 +00:00
MatchinfoBuffer * pRet ;
2019-04-13 04:38:32 +00:00
sqlite3_int64 nByte = sizeof ( u32 ) * ( 2 * ( sqlite3_int64 ) nElem + 1 )
+ sizeof ( MatchinfoBuffer ) ;
sqlite3_int64 nStr = strlen ( zMatchinfo ) ;
2015-05-05 19:37:07 +00:00
2021-10-20 11:40:34 +00:00
pRet = sqlite3Fts3MallocZero ( nByte + nStr + 1 ) ;
2015-05-05 19:37:07 +00:00
if ( pRet ) {
pRet - > aMatchinfo [ 0 ] = ( u8 * ) ( & pRet - > aMatchinfo [ 1 ] ) - ( u8 * ) pRet ;
2019-04-13 14:17:09 +00:00
pRet - > aMatchinfo [ 1 + nElem ] = pRet - > aMatchinfo [ 0 ]
+ sizeof ( u32 ) * ( ( int ) nElem + 1 ) ;
pRet - > nElem = ( int ) nElem ;
2015-05-05 19:37:07 +00:00
pRet - > zMatchinfo = ( ( char * ) pRet ) + nByte ;
memcpy ( pRet - > zMatchinfo , zMatchinfo , nStr + 1 ) ;
pRet - > aRef [ 0 ] = 1 ;
}
return pRet ;
}
static void fts3MIBufferFree ( void * p ) {
MatchinfoBuffer * pBuf = ( MatchinfoBuffer * ) ( ( u8 * ) p - ( ( u32 * ) p ) [ - 1 ] ) ;
assert ( ( u32 * ) p = = & pBuf - > aMatchinfo [ 1 ]
| | ( u32 * ) p = = & pBuf - > aMatchinfo [ pBuf - > nElem + 2 ]
) ;
if ( ( u32 * ) p = = & pBuf - > aMatchinfo [ 1 ] ) {
pBuf - > aRef [ 1 ] = 0 ;
} else {
pBuf - > aRef [ 2 ] = 0 ;
}
if ( pBuf - > aRef [ 0 ] = = 0 & & pBuf - > aRef [ 1 ] = = 0 & & pBuf - > aRef [ 2 ] = = 0 ) {
sqlite3_free ( pBuf ) ;
}
}
static void ( * fts3MIBufferAlloc ( MatchinfoBuffer * p , u32 * * paOut ) ) ( void * ) {
void ( * xRet ) ( void * ) = 0 ;
u32 * aOut = 0 ;
if ( p - > aRef [ 1 ] = = 0 ) {
p - > aRef [ 1 ] = 1 ;
aOut = & p - > aMatchinfo [ 1 ] ;
xRet = fts3MIBufferFree ;
}
else if ( p - > aRef [ 2 ] = = 0 ) {
p - > aRef [ 2 ] = 1 ;
aOut = & p - > aMatchinfo [ p - > nElem + 2 ] ;
xRet = fts3MIBufferFree ;
} else {
2019-01-08 20:02:48 +00:00
aOut = ( u32 * ) sqlite3_malloc64 ( p - > nElem * sizeof ( u32 ) ) ;
2015-05-05 19:37:07 +00:00
if ( aOut ) {
xRet = sqlite3_free ;
if ( p - > bGlobal ) memcpy ( aOut , & p - > aMatchinfo [ 1 ] , p - > nElem * sizeof ( u32 ) ) ;
}
}
* paOut = aOut ;
return xRet ;
}
static void fts3MIBufferSetGlobal ( MatchinfoBuffer * p ) {
p - > bGlobal = 1 ;
memcpy ( & p - > aMatchinfo [ 2 + p - > nElem ] , & p - > aMatchinfo [ 1 ] , p - > nElem * sizeof ( u32 ) ) ;
}
/*
* * Free a MatchinfoBuffer object allocated using fts3MIBufferNew ( )
*/
void sqlite3Fts3MIBufferFree ( MatchinfoBuffer * p ) {
if ( p ) {
assert ( p - > aRef [ 0 ] = = 1 ) ;
p - > aRef [ 0 ] = 0 ;
if ( p - > aRef [ 0 ] = = 0 & & p - > aRef [ 1 ] = = 0 & & p - > aRef [ 2 ] = = 0 ) {
sqlite3_free ( p ) ;
}
}
}
/*
* * End of MatchinfoBuffer code .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-01-12 17:57:30 +00:00
/*
* * This function is used to help iterate through a position - list . A position
* * list is a list of unique integers , sorted from smallest to largest . Each
* * element of the list is represented by an FTS3 varint that takes the value
* * of the difference between the current element and the previous one plus
* * two . For example , to store the position - list :
* *
* * 4 9 113
* *
* * the three varints :
* *
* * 6 7 106
* *
* * are encoded .
* *
* * When this function is called , * pp points to the start of an element of
* * the list . * piPos contains the value of the previous entry in the list .
* * After it returns , * piPos contains the value of the next element of the
* * list and * pp is advanced to the following varint .
*/
2021-04-16 16:55:28 +00:00
static void fts3GetDeltaPosition ( char * * pp , i64 * piPos ) {
2010-01-02 19:02:02 +00:00
int iVal ;
2013-11-12 17:46:44 +00:00
* pp + = fts3GetVarint32 ( * pp , & iVal ) ;
2010-01-02 19:02:02 +00:00
* piPos + = ( iVal - 2 ) ;
}
2010-01-12 17:57:30 +00:00
/*
2023-01-25 13:42:55 +00:00
* * Helper function for sqlite3Fts3ExprIterate ( ) ( see below ) .
2010-01-12 17:57:30 +00:00
*/
2010-01-11 12:00:47 +00:00
static int fts3ExprIterate2 (
Fts3Expr * pExpr , /* Expression to iterate phrases of */
int * piPhrase , /* Pointer to phrase counter */
int ( * x ) ( Fts3Expr * , int , void * ) , /* Callback function to invoke for phrases */
void * pCtx /* Second argument to pass to callback */
) {
2010-01-12 17:57:30 +00:00
int rc ; /* Return code */
2015-05-06 17:51:59 +00:00
int eType = pExpr - > eType ; /* Type of expression node pExpr */
2010-01-12 17:57:30 +00:00
2015-05-06 17:51:59 +00:00
if ( eType ! = FTSQUERY_PHRASE ) {
assert ( pExpr - > pLeft & & pExpr - > pRight ) ;
rc = fts3ExprIterate2 ( pExpr - > pLeft , piPhrase , x , pCtx ) ;
if ( rc = = SQLITE_OK & & eType ! = FTSQUERY_NOT ) {
rc = fts3ExprIterate2 ( pExpr - > pRight , piPhrase , x , pCtx ) ;
2015-05-05 19:37:07 +00:00
}
2015-05-06 17:51:59 +00:00
} else {
rc = x ( pExpr , * piPhrase , pCtx ) ;
( * piPhrase ) + + ;
2010-01-11 12:00:47 +00:00
}
return rc ;
}
2010-01-02 19:02:02 +00:00
/*
* * Iterate through all phrase nodes in an FTS3 query , except those that
* * are part of a sub - tree that is the right - hand - side of a NOT operator .
* * For each phrase node found , the supplied callback function is invoked .
* *
* * If the callback function returns anything other than SQLITE_OK ,
* * the iteration is abandoned and the error code returned immediately .
* * Otherwise , SQLITE_OK is returned after a callback has been made for
* * all eligible phrase nodes .
*/
2023-01-25 13:42:55 +00:00
int sqlite3Fts3ExprIterate (
2010-01-02 19:02:02 +00:00
Fts3Expr * pExpr , /* Expression to iterate phrases of */
2010-01-11 12:00:47 +00:00
int ( * x ) ( Fts3Expr * , int , void * ) , /* Callback function to invoke for phrases */
2010-01-02 19:02:02 +00:00
void * pCtx /* Second argument to pass to callback */
) {
2010-01-12 17:57:30 +00:00
int iPhrase = 0 ; /* Variable used as the phrase counter */
2015-05-06 08:43:26 +00:00
return fts3ExprIterate2 ( pExpr , & iPhrase , x , pCtx ) ;
2010-01-02 19:02:02 +00:00
}
2010-01-12 17:57:30 +00:00
/*
2023-01-25 13:42:55 +00:00
* * This is an sqlite3Fts3ExprIterate ( ) callback used while loading the
* * doclists for each phrase into Fts3Expr . aDoclist [ ] / nDoclist . See also
2010-01-12 17:57:30 +00:00
* * fts3ExprLoadDoclists ( ) .
*/
2010-12-10 17:06:48 +00:00
static int fts3ExprLoadDoclistsCb ( Fts3Expr * pExpr , int iPhrase , void * ctx ) {
2010-01-02 19:02:02 +00:00
int rc = SQLITE_OK ;
2011-05-28 15:57:40 +00:00
Fts3Phrase * pPhrase = pExpr - > pPhrase ;
2010-01-02 19:02:02 +00:00
LoadDoclistCtx * p = ( LoadDoclistCtx * ) ctx ;
2010-01-06 17:19:21 +00:00
2010-02-26 01:46:54 +00:00
UNUSED_PARAMETER ( iPhrase ) ;
2010-01-02 19:02:02 +00:00
p - > nPhrase + + ;
2011-05-28 15:57:40 +00:00
p - > nToken + = pPhrase - > nToken ;
2010-01-06 17:19:21 +00:00
2010-01-02 19:02:02 +00:00
return rc ;
}
2010-01-12 17:57:30 +00:00
/*
* * Load the doclists for each phrase in the query associated with FTS3 cursor
* * pCsr .
* *
* * If pnPhrase is not NULL , then * pnPhrase is set to the number of matchable
* * phrases in the expression ( all phrases except those directly or
* * indirectly descended from the right - hand - side of a NOT operator ) . If
* * pnToken is not NULL , then it is set to the number of tokens in all
* * matchable phrases of the expression .
*/
2010-01-06 17:19:21 +00:00
static int fts3ExprLoadDoclists (
2010-01-12 17:57:30 +00:00
Fts3Cursor * pCsr , /* Fts3 cursor for current query */
2010-01-06 17:19:21 +00:00
int * pnPhrase , /* OUT: Number of phrases in query */
int * pnToken /* OUT: Number of tokens in query */
) {
2010-01-12 17:57:30 +00:00
int rc ; /* Return Code */
2023-01-25 13:42:55 +00:00
LoadDoclistCtx sCtx = { 0 , 0 , 0 } ; /* Context for sqlite3Fts3ExprIterate() */
2010-10-19 14:07:59 +00:00
sCtx . pCsr = pCsr ;
2023-01-25 13:42:55 +00:00
rc = sqlite3Fts3ExprIterate ( pCsr - > pExpr , fts3ExprLoadDoclistsCb , ( void * ) & sCtx ) ;
2010-01-06 17:19:21 +00:00
if ( pnPhrase ) * pnPhrase = sCtx . nPhrase ;
if ( pnToken ) * pnToken = sCtx . nToken ;
2010-01-02 19:02:02 +00:00
return rc ;
}
2010-11-25 17:49:28 +00:00
static int fts3ExprPhraseCountCb ( Fts3Expr * pExpr , int iPhrase , void * ctx ) {
( * ( int * ) ctx ) + + ;
2015-05-06 17:41:19 +00:00
pExpr - > iPhrase = iPhrase ;
2010-11-25 17:49:28 +00:00
return SQLITE_OK ;
}
static int fts3ExprPhraseCount ( Fts3Expr * pExpr ) {
int nPhrase = 0 ;
2023-01-25 13:42:55 +00:00
( void ) sqlite3Fts3ExprIterate ( pExpr , fts3ExprPhraseCountCb , ( void * ) & nPhrase ) ;
2010-11-25 17:49:28 +00:00
return nPhrase ;
}
2010-01-11 12:00:47 +00:00
/*
* * Advance the position list iterator specified by the first two
* * arguments so that it points to the first element with a value greater
* * than or equal to parameter iNext .
*/
2021-04-16 16:55:28 +00:00
static void fts3SnippetAdvance ( char * * ppIter , i64 * piIter , int iNext ) {
2010-01-11 12:00:47 +00:00
char * pIter = * ppIter ;
if ( pIter ) {
2021-04-16 16:55:28 +00:00
i64 iIter = * piIter ;
2010-01-11 12:00:47 +00:00
while ( iIter < iNext ) {
if ( 0 = = ( * pIter & 0xFE ) ) {
iIter = - 1 ;
pIter = 0 ;
2010-01-02 19:02:02 +00:00
break ;
}
2010-01-11 12:00:47 +00:00
fts3GetDeltaPosition ( & pIter , & iIter ) ;
2010-01-02 19:02:02 +00:00
}
2010-01-11 12:00:47 +00:00
* piIter = iIter ;
* ppIter = pIter ;
2010-01-02 19:02:02 +00:00
}
}
2010-01-12 17:57:30 +00:00
/*
* * Advance the snippet iterator to the next candidate snippet .
*/
static int fts3SnippetNextCandidate ( SnippetIter * pIter ) {
2010-01-11 12:00:47 +00:00
int i ; /* Loop counter */
2010-01-02 19:02:02 +00:00
2010-01-11 12:00:47 +00:00
if ( pIter - > iCurrent < 0 ) {
2010-01-12 17:57:30 +00:00
/* The SnippetIter object has just been initialized. The first snippet
2010-01-11 12:00:47 +00:00
* * candidate always starts at offset 0 ( even if this candidate has a
* * score of 0.0 ) .
*/
pIter - > iCurrent = 0 ;
2010-01-02 19:02:02 +00:00
2010-01-11 12:00:47 +00:00
/* Advance the 'head' iterator of each phrase to the first offset that
* * is greater than or equal to ( iNext + nSnippet ) .
*/
for ( i = 0 ; i < pIter - > nPhrase ; i + + ) {
SnippetPhrase * pPhrase = & pIter - > aPhrase [ i ] ;
fts3SnippetAdvance ( & pPhrase - > pHead , & pPhrase - > iHead , pIter - > nSnippet ) ;
}
} else {
int iStart ;
int iEnd = 0x7FFFFFFF ;
2010-01-02 19:02:02 +00:00
2010-01-11 12:00:47 +00:00
for ( i = 0 ; i < pIter - > nPhrase ; i + + ) {
SnippetPhrase * pPhrase = & pIter - > aPhrase [ i ] ;
if ( pPhrase - > pHead & & pPhrase - > iHead < iEnd ) {
iEnd = pPhrase - > iHead ;
}
}
if ( iEnd = = 0x7FFFFFFF ) {
return 1 ;
}
pIter - > iCurrent = iStart = iEnd - pIter - > nSnippet + 1 ;
for ( i = 0 ; i < pIter - > nPhrase ; i + + ) {
SnippetPhrase * pPhrase = & pIter - > aPhrase [ i ] ;
fts3SnippetAdvance ( & pPhrase - > pHead , & pPhrase - > iHead , iEnd + 1 ) ;
fts3SnippetAdvance ( & pPhrase - > pTail , & pPhrase - > iTail , iStart ) ;
}
2010-01-02 19:02:02 +00:00
}
2010-01-11 12:00:47 +00:00
return 0 ;
2010-01-02 19:02:02 +00:00
}
2010-01-12 17:57:30 +00:00
/*
* * Retrieve information about the current candidate snippet of snippet
* * iterator pIter .
*/
2010-01-11 12:00:47 +00:00
static void fts3SnippetDetails (
2010-01-12 17:57:30 +00:00
SnippetIter * pIter , /* Snippet iterator */
2010-01-11 12:00:47 +00:00
u64 mCovered , /* Bitmask of phrases already covered */
int * piToken , /* OUT: First token of proposed snippet */
int * piScore , /* OUT: "Score" for this snippet */
u64 * pmCover , /* OUT: Bitmask of phrases covered */
u64 * pmHighlight /* OUT: Bitmask of terms to highlight */
2010-01-02 19:02:02 +00:00
) {
2010-01-11 12:00:47 +00:00
int iStart = pIter - > iCurrent ; /* First token of snippet */
2010-01-12 17:57:30 +00:00
int iScore = 0 ; /* Score of this snippet */
int i ; /* Loop counter */
u64 mCover = 0 ; /* Mask of phrases covered by this snippet */
u64 mHighlight = 0 ; /* Mask of tokens to highlight in snippet */
2010-01-11 12:00:47 +00:00
for ( i = 0 ; i < pIter - > nPhrase ; i + + ) {
SnippetPhrase * pPhrase = & pIter - > aPhrase [ i ] ;
if ( pPhrase - > pTail ) {
char * pCsr = pPhrase - > pTail ;
2021-04-16 16:55:28 +00:00
i64 iCsr = pPhrase - > iTail ;
2010-01-11 12:00:47 +00:00
2019-02-28 13:41:35 +00:00
while ( iCsr < ( iStart + pIter - > nSnippet ) & & iCsr > = iStart ) {
2010-01-11 12:00:47 +00:00
int j ;
2019-08-21 11:31:48 +00:00
u64 mPhrase = ( u64 ) 1 < < ( i % 64 ) ;
2010-01-11 12:00:47 +00:00
u64 mPos = ( u64 ) 1 < < ( iCsr - iStart ) ;
2019-01-18 19:26:48 +00:00
assert ( iCsr > = iStart & & ( iCsr - iStart ) < = 64 ) ;
2019-08-21 11:31:48 +00:00
assert ( i > = 0 ) ;
2010-01-11 12:00:47 +00:00
if ( ( mCover | mCovered ) & mPhrase ) {
iScore + + ;
} else {
iScore + = 1000 ;
}
mCover | = mPhrase ;
2010-01-02 19:02:02 +00:00
2010-01-11 12:00:47 +00:00
for ( j = 0 ; j < pPhrase - > nToken ; j + + ) {
mHighlight | = ( mPos > > j ) ;
}
2010-01-02 19:02:02 +00:00
2010-01-11 12:00:47 +00:00
if ( 0 = = ( * pCsr & 0x0FE ) ) break ;
fts3GetDeltaPosition ( & pCsr , & iCsr ) ;
}
2010-01-02 19:02:02 +00:00
}
}
2010-01-12 17:57:30 +00:00
/* Set the output variables before returning. */
2010-01-11 12:00:47 +00:00
* piToken = iStart ;
* piScore = iScore ;
* pmCover = mCover ;
* pmHighlight = mHighlight ;
2010-01-02 19:02:02 +00:00
}
2010-01-11 12:00:47 +00:00
/*
2023-01-25 13:42:55 +00:00
* * This function is an sqlite3Fts3ExprIterate ( ) callback used by
* * fts3BestSnippet ( ) . Each invocation populates an element of the
* * SnippetIter . aPhrase [ ] array .
2010-01-11 12:00:47 +00:00
*/
static int fts3SnippetFindPositions ( Fts3Expr * pExpr , int iPhrase , void * ctx ) {
2010-01-12 17:57:30 +00:00
SnippetIter * p = ( SnippetIter * ) ctx ;
2010-01-11 12:00:47 +00:00
SnippetPhrase * pPhrase = & p - > aPhrase [ iPhrase ] ;
char * pCsr ;
2012-05-10 17:43:14 +00:00
int rc ;
2010-01-06 17:19:21 +00:00
2010-01-11 12:00:47 +00:00
pPhrase - > nToken = pExpr - > pPhrase - > nToken ;
2012-05-10 17:43:14 +00:00
rc = sqlite3Fts3EvalPhrasePoslist ( p - > pCsr , pExpr , p - > iCol , & pCsr ) ;
assert ( rc = = SQLITE_OK | | pCsr = = 0 ) ;
2010-01-11 12:00:47 +00:00
if ( pCsr ) {
2021-04-16 16:55:28 +00:00
i64 iFirst = 0 ;
2010-01-11 12:00:47 +00:00
pPhrase - > pList = pCsr ;
fts3GetDeltaPosition ( & pCsr , & iFirst ) ;
2019-02-03 07:46:07 +00:00
if ( iFirst < 0 ) {
rc = FTS_CORRUPT_VTAB ;
} else {
pPhrase - > pHead = pCsr ;
pPhrase - > pTail = pCsr ;
pPhrase - > iHead = iFirst ;
pPhrase - > iTail = iFirst ;
}
2010-01-11 12:00:47 +00:00
} else {
2012-05-10 17:43:14 +00:00
assert ( rc ! = SQLITE_OK | | (
pPhrase - > pList = = 0 & & pPhrase - > pHead = = 0 & & pPhrase - > pTail = = 0
) ) ;
2010-01-06 17:19:21 +00:00
}
2010-01-11 12:00:47 +00:00
2012-05-10 17:43:14 +00:00
return rc ;
2010-01-06 17:19:21 +00:00
}
2010-01-12 17:57:30 +00:00
/*
* * Select the fragment of text consisting of nFragment contiguous tokens
* * from column iCol that represent the " best " snippet . The best snippet
* * is the snippet with the highest score , where scores are calculated
* * by adding :
* *
2013-03-21 21:20:32 +00:00
* * ( a ) + 1 point for each occurrence of a matchable phrase in the snippet .
2010-01-12 17:57:30 +00:00
* *
2013-03-21 21:20:32 +00:00
* * ( b ) + 1000 points for the first occurrence of each matchable phrase in
2010-01-12 17:57:30 +00:00
* * the snippet for which the corresponding mCovered bit is not set .
* *
* * The selected snippet parameters are stored in structure * pFragment before
* * returning . The score of the selected snippet is stored in * piScore
* * before returning .
*/
2010-01-02 19:02:02 +00:00
static int fts3BestSnippet (
int nSnippet , /* Desired snippet length */
Fts3Cursor * pCsr , /* Cursor to create snippet for */
int iCol , /* Index of column to create snippet from */
2010-01-06 17:19:21 +00:00
u64 mCovered , /* Mask of phrases already covered */
u64 * pmSeen , /* IN/OUT: Mask of phrases seen */
SnippetFragment * pFragment , /* OUT: Best snippet found */
int * piScore /* OUT: Score of snippet pFragment */
2010-01-02 19:02:02 +00:00
) {
int rc ; /* Return Code */
2010-01-06 17:19:21 +00:00
int nList ; /* Number of phrases in expression */
2010-01-12 17:57:30 +00:00
SnippetIter sIter ; /* Iterates through snippet candidates */
2019-01-08 20:02:48 +00:00
sqlite3_int64 nByte ; /* Number of bytes of space to allocate */
2010-01-12 17:57:30 +00:00
int iBestScore = - 1 ; /* Best snippet score found so far */
int i ; /* Loop counter */
2010-01-11 12:00:47 +00:00
2010-01-12 17:57:30 +00:00
memset ( & sIter , 0 , sizeof ( sIter ) ) ;
2010-01-02 19:02:02 +00:00
/* Iterate through the phrases in the expression to count them. The same
* * callback makes sure the doclists are loaded for each phrase .
*/
2010-01-06 17:19:21 +00:00
rc = fts3ExprLoadDoclists ( pCsr , & nList , 0 ) ;
2010-01-02 19:02:02 +00:00
if ( rc ! = SQLITE_OK ) {
return rc ;
}
/* Now that it is known how many phrases there are, allocate and zero
2010-01-11 12:00:47 +00:00
* * the required space using malloc ( ) .
2010-01-02 19:02:02 +00:00
*/
2010-01-11 12:00:47 +00:00
nByte = sizeof ( SnippetPhrase ) * nList ;
2021-10-20 11:40:34 +00:00
sIter . aPhrase = ( SnippetPhrase * ) sqlite3Fts3MallocZero ( nByte ) ;
2010-01-12 17:57:30 +00:00
if ( ! sIter . aPhrase ) {
2010-01-02 19:02:02 +00:00
return SQLITE_NOMEM ;
}
2010-01-12 17:57:30 +00:00
/* Initialize the contents of the SnippetIter object. Then iterate through
2010-01-11 12:00:47 +00:00
* * the set of phrases in the expression to populate the aPhrase [ ] array .
*/
2010-01-12 17:57:30 +00:00
sIter . pCsr = pCsr ;
sIter . iCol = iCol ;
sIter . nSnippet = nSnippet ;
sIter . nPhrase = nList ;
sIter . iCurrent = - 1 ;
2023-01-25 13:42:55 +00:00
rc = sqlite3Fts3ExprIterate (
pCsr - > pExpr , fts3SnippetFindPositions , ( void * ) & sIter
) ;
2015-01-27 18:43:02 +00:00
if ( rc = = SQLITE_OK ) {
2010-01-12 17:57:30 +00:00
2015-01-27 18:43:02 +00:00
/* Set the *pmSeen output variable. */
for ( i = 0 ; i < nList ; i + + ) {
if ( sIter . aPhrase [ i ] . pHead ) {
2020-01-07 09:06:43 +00:00
* pmSeen | = ( u64 ) 1 < < ( i % 64 ) ;
2015-01-27 18:43:02 +00:00
}
2010-01-06 17:19:21 +00:00
}
2015-01-27 18:43:02 +00:00
/* Loop through all candidate snippets. Store the best snippet in
* * * pFragment . Store its associated ' score ' in iBestScore .
*/
pFragment - > iCol = iCol ;
while ( ! fts3SnippetNextCandidate ( & sIter ) ) {
int iPos ;
int iScore ;
u64 mCover ;
u64 mHighlite ;
fts3SnippetDetails ( & sIter , mCovered , & iPos , & iScore , & mCover , & mHighlite ) ;
assert ( iScore > = 0 ) ;
if ( iScore > iBestScore ) {
pFragment - > iPos = iPos ;
pFragment - > hlmask = mHighlite ;
pFragment - > covered = mCover ;
iBestScore = iScore ;
}
2010-01-02 19:02:02 +00:00
}
2015-01-27 18:43:02 +00:00
* piScore = iBestScore ;
}
2010-01-12 17:57:30 +00:00
sqlite3_free ( sIter . aPhrase ) ;
2015-01-27 18:43:02 +00:00
return rc ;
2010-01-02 19:02:02 +00:00
}
2010-01-11 12:00:47 +00:00
2010-01-12 17:57:30 +00:00
/*
* * Append a string to the string - buffer passed as the first argument .
* *
* * If nAppend is negative , then the length of the string zAppend is
* * determined using strlen ( ) .
*/
2010-01-02 19:02:02 +00:00
static int fts3StringAppend (
2010-01-12 17:57:30 +00:00
StrBuffer * pStr , /* Buffer to append to */
const char * zAppend , /* Pointer to data to append to buffer */
int nAppend /* Size of zAppend in bytes (or -1) */
2010-01-02 19:02:02 +00:00
) {
if ( nAppend < 0 ) {
2010-02-26 01:46:54 +00:00
nAppend = ( int ) strlen ( zAppend ) ;
2010-01-02 19:02:02 +00:00
}
2010-01-12 17:57:30 +00:00
/* If there is insufficient space allocated at StrBuffer.z, use realloc()
* * to grow the buffer until so that it is big enough to accomadate the
* * appended data .
*/
2010-01-02 19:02:02 +00:00
if ( pStr - > n + nAppend + 1 > = pStr - > nAlloc ) {
2019-01-08 20:02:48 +00:00
sqlite3_int64 nAlloc = pStr - > nAlloc + ( sqlite3_int64 ) nAppend + 100 ;
char * zNew = sqlite3_realloc64 ( pStr - > z , nAlloc ) ;
2010-01-02 19:02:02 +00:00
if ( ! zNew ) {
return SQLITE_NOMEM ;
}
pStr - > z = zNew ;
pStr - > nAlloc = nAlloc ;
}
2013-08-19 18:17:03 +00:00
assert ( pStr - > z ! = 0 & & ( pStr - > nAlloc > = pStr - > n + nAppend + 1 ) ) ;
2010-01-02 19:02:02 +00:00
2010-01-12 17:57:30 +00:00
/* Append the data to the string buffer. */
2010-01-02 19:02:02 +00:00
memcpy ( & pStr - > z [ pStr - > n ] , zAppend , nAppend ) ;
pStr - > n + = nAppend ;
pStr - > z [ pStr - > n ] = ' \0 ' ;
return SQLITE_OK ;
}
2010-01-12 17:57:30 +00:00
/*
* * The fts3BestSnippet ( ) function often selects snippets that end with a
* * query term . That is , the final term of the snippet is always a term
* * that requires highlighting . For example , if ' X ' is a highlighted term
* * and ' . ' is a non - highlighted term , BestSnippet ( ) may select :
* *
* * . . . . . . . . X . . . . . X
* *
* * This function " shifts " the beginning of the snippet forward in the
* * document so that there are approximately the same number of
* * non - highlighted terms to the right of the final highlighted term as there
* * are to the left of the first highlighted term . For example , to this :
* *
* * . . . . X . . . . . X . . . .
* *
* * This is done as part of extracting the snippet text , not when selecting
* * the snippet . Snippet selection is done based on doclists only , so there
* * is no way for fts3BestSnippet ( ) to know whether or not the document
* * actually contains terms that follow the final highlighted term .
*/
2010-03-18 16:34:44 +00:00
static int fts3SnippetShift (
2010-01-12 17:57:30 +00:00
Fts3Table * pTab , /* FTS3 table snippet comes from */
2012-03-03 18:46:41 +00:00
int iLangid , /* Language id to use in tokenizing */
2010-01-12 17:57:30 +00:00
int nSnippet , /* Number of tokens desired for snippet */
const char * zDoc , /* Document text to extract snippet from */
int nDoc , /* Size of buffer zDoc in bytes */
int * piPos , /* IN/OUT: First token of snippet */
u64 * pHlmask /* IN/OUT: Mask of tokens to highlight */
2010-01-06 17:19:21 +00:00
) {
2010-01-12 17:57:30 +00:00
u64 hlmask = * pHlmask ; /* Local copy of initial highlight-mask */
2010-01-06 17:19:21 +00:00
if ( hlmask ) {
2010-01-12 17:57:30 +00:00
int nLeft ; /* Tokens to the left of first highlight */
int nRight ; /* Tokens to the right of last highlight */
int nDesired ; /* Ideal number of tokens to shift forward */
2010-01-06 17:19:21 +00:00
for ( nLeft = 0 ; ! ( hlmask & ( ( u64 ) 1 < < nLeft ) ) ; nLeft + + ) ;
for ( nRight = 0 ; ! ( hlmask & ( ( u64 ) 1 < < ( nSnippet - 1 - nRight ) ) ) ; nRight + + ) ;
2019-01-18 19:26:48 +00:00
assert ( ( nSnippet - 1 - nRight ) < = 63 & & ( nSnippet - 1 - nRight ) > = 0 ) ;
2010-01-06 17:19:21 +00:00
nDesired = ( nLeft - nRight ) / 2 ;
2010-01-12 17:57:30 +00:00
/* Ideally, the start of the snippet should be pushed forward in the
* * document nDesired tokens . This block checks if there are actually
* * nDesired tokens to the right of the snippet . If so , * piPos and
* * * pHlMask are updated to shift the snippet nDesired tokens to the
* * right . Otherwise , the snippet is shifted by the number of tokens
* * available .
*/
2010-01-06 17:19:21 +00:00
if ( nDesired > 0 ) {
2010-01-12 17:57:30 +00:00
int nShift ; /* Number of tokens to shift snippet by */
int iCurrent = 0 ; /* Token counter */
int rc ; /* Return Code */
2010-01-06 17:19:21 +00:00
sqlite3_tokenizer_module * pMod ;
sqlite3_tokenizer_cursor * pC ;
pMod = ( sqlite3_tokenizer_module * ) pTab - > pTokenizer - > pModule ;
2010-01-12 17:57:30 +00:00
/* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
* * or more tokens in zDoc / nDoc .
*/
2012-03-03 18:46:41 +00:00
rc = sqlite3Fts3OpenTokenizer ( pTab - > pTokenizer , iLangid , zDoc , nDoc , & pC ) ;
2010-01-06 18:36:27 +00:00
if ( rc ! = SQLITE_OK ) {
return rc ;
}
2010-01-06 17:19:21 +00:00
while ( rc = = SQLITE_OK & & iCurrent < ( nSnippet + nDesired ) ) {
2012-10-17 20:15:10 +00:00
const char * ZDUMMY ; int DUMMY1 = 0 , DUMMY2 = 0 , DUMMY3 = 0 ;
2010-01-06 17:19:21 +00:00
rc = pMod - > xNext ( pC , & ZDUMMY , & DUMMY1 , & DUMMY2 , & DUMMY3 , & iCurrent ) ;
}
pMod - > xClose ( pC ) ;
2010-01-07 10:54:28 +00:00
if ( rc ! = SQLITE_OK & & rc ! = SQLITE_DONE ) { return rc ; }
nShift = ( rc = = SQLITE_DONE ) + iCurrent - nSnippet ;
assert ( nShift < = nDesired ) ;
2010-01-06 17:19:21 +00:00
if ( nShift > 0 ) {
* piPos + = nShift ;
* pHlmask = hlmask > > nShift ;
}
}
}
return SQLITE_OK ;
}
2010-01-12 17:57:30 +00:00
/*
* * Extract the snippet text for fragment pFragment from cursor pCsr and
* * append it to string buffer pOut .
*/
2010-01-02 19:02:02 +00:00
static int fts3SnippetText (
Fts3Cursor * pCsr , /* FTS3 Cursor */
2010-01-06 17:19:21 +00:00
SnippetFragment * pFragment , /* Snippet to extract */
2010-01-07 10:54:28 +00:00
int iFragment , /* Fragment number */
int isLast , /* True for final fragment in snippet */
2010-01-02 19:02:02 +00:00
int nSnippet , /* Number of tokens in extracted snippet */
const char * zOpen , /* String inserted before highlighted term */
const char * zClose , /* String inserted after highlighted term */
2010-01-12 17:57:30 +00:00
const char * zEllipsis , /* String inserted between snippets */
StrBuffer * pOut /* Write output here */
2010-01-02 19:02:02 +00:00
) {
Fts3Table * pTab = ( Fts3Table * ) pCsr - > base . pVtab ;
int rc ; /* Return code */
2010-01-06 17:19:21 +00:00
const char * zDoc ; /* Document text to extract snippet from */
int nDoc ; /* Size of zDoc in bytes */
int iCurrent = 0 ; /* Current token number of document */
int iEnd = 0 ; /* Byte offset of end of current token */
2010-01-12 17:57:30 +00:00
int isShiftDone = 0 ; /* True after snippet is shifted */
int iPos = pFragment - > iPos ; /* First token of snippet */
u64 hlmask = pFragment - > hlmask ; /* Highlight-mask for snippet */
int iCol = pFragment - > iCol + 1 ; /* Query column to extract text from */
2010-01-02 19:02:02 +00:00
sqlite3_tokenizer_module * pMod ; /* Tokenizer module methods object */
sqlite3_tokenizer_cursor * pC ; /* Tokenizer cursor open on zDoc/nDoc */
2010-01-06 17:19:21 +00:00
2010-01-12 17:57:30 +00:00
zDoc = ( const char * ) sqlite3_column_text ( pCsr - > pStmt , iCol ) ;
2010-01-06 17:19:21 +00:00
if ( zDoc = = 0 ) {
2010-01-12 17:57:30 +00:00
if ( sqlite3_column_type ( pCsr - > pStmt , iCol ) ! = SQLITE_NULL ) {
2010-01-06 17:19:21 +00:00
return SQLITE_NOMEM ;
}
return SQLITE_OK ;
}
2010-01-12 17:57:30 +00:00
nDoc = sqlite3_column_bytes ( pCsr - > pStmt , iCol ) ;
2010-01-02 19:02:02 +00:00
2010-01-07 10:54:28 +00:00
/* Open a token cursor on the document. */
2010-01-02 19:02:02 +00:00
pMod = ( sqlite3_tokenizer_module * ) pTab - > pTokenizer - > pModule ;
2012-03-03 18:46:41 +00:00
rc = sqlite3Fts3OpenTokenizer ( pTab - > pTokenizer , pCsr - > iLangid , zDoc , nDoc , & pC ) ;
2010-01-06 17:19:21 +00:00
if ( rc ! = SQLITE_OK ) {
return rc ;
2010-01-02 19:02:02 +00:00
}
while ( rc = = SQLITE_OK ) {
2012-10-17 20:15:10 +00:00
const char * ZDUMMY ; /* Dummy argument used with tokenizer */
int DUMMY1 = - 1 ; /* Dummy argument used with tokenizer */
int iBegin = 0 ; /* Offset in zDoc of start of token */
int iFin = 0 ; /* Offset in zDoc of end of token */
int isHighlight = 0 ; /* True for highlighted terms */
2010-01-06 17:19:21 +00:00
2012-10-17 20:28:52 +00:00
/* Variable DUMMY1 is initialized to a negative value above. Elsewhere
* * in the FTS code the variable that the third argument to xNext points to
* * is initialized to zero before the first ( * but not necessarily
* * subsequent * ) call to xNext ( ) . This is done for a particular application
* * that needs to know whether or not the tokenizer is being used for
* * snippet generation or for some other purpose .
* *
* * Extreme care is required when writing code to depend on this
* * initialization . It is not a documented part of the tokenizer interface .
* * If a tokenizer is used directly by any code outside of FTS , this
* * convention might not be respected . */
2010-01-07 10:54:28 +00:00
rc = pMod - > xNext ( pC , & ZDUMMY , & DUMMY1 , & iBegin , & iFin , & iCurrent ) ;
if ( rc ! = SQLITE_OK ) {
if ( rc = = SQLITE_DONE ) {
/* Special case - the last token of the snippet is also the last token
* * of the column . Append any punctuation that occurred between the end
* * of the previous token and the end of the document to the output .
* * Then break out of the loop . */
rc = fts3StringAppend ( pOut , & zDoc [ iEnd ] , - 1 ) ;
2010-01-06 17:19:21 +00:00
}
2010-01-07 10:54:28 +00:00
break ;
}
if ( iCurrent < iPos ) { continue ; }
2010-01-06 17:19:21 +00:00
2010-01-07 10:54:28 +00:00
if ( ! isShiftDone ) {
int n = nDoc - iBegin ;
2012-03-03 18:46:41 +00:00
rc = fts3SnippetShift (
pTab , pCsr - > iLangid , nSnippet , & zDoc [ iBegin ] , n , & iPos , & hlmask
) ;
2010-01-07 10:54:28 +00:00
isShiftDone = 1 ;
/* Now that the shift has been done, check if the initial "..." are
* * required . They are required if ( a ) this is not the first fragment ,
* * or ( b ) this fragment does not begin at position 0 of its column .
*/
2015-01-27 19:01:26 +00:00
if ( rc = = SQLITE_OK ) {
if ( iPos > 0 | | iFragment > 0 ) {
rc = fts3StringAppend ( pOut , zEllipsis , - 1 ) ;
} else if ( iBegin ) {
rc = fts3StringAppend ( pOut , zDoc , iBegin ) ;
}
2010-01-02 19:02:02 +00:00
}
2010-01-07 10:54:28 +00:00
if ( rc ! = SQLITE_OK | | iCurrent < iPos ) continue ;
2010-01-02 19:02:02 +00:00
}
2010-01-07 10:54:28 +00:00
if ( iCurrent > = ( iPos + nSnippet ) ) {
if ( isLast ) {
2010-01-06 17:19:21 +00:00
rc = fts3StringAppend ( pOut , zEllipsis , - 1 ) ;
2010-01-02 19:02:02 +00:00
}
2010-01-07 10:54:28 +00:00
break ;
2010-01-02 19:02:02 +00:00
}
2010-01-07 10:54:28 +00:00
/* Set isHighlight to true if this term should be highlighted. */
isHighlight = ( hlmask & ( ( u64 ) 1 < < ( iCurrent - iPos ) ) ) ! = 0 ;
if ( iCurrent > iPos ) rc = fts3StringAppend ( pOut , & zDoc [ iEnd ] , iBegin - iEnd ) ;
if ( rc = = SQLITE_OK & & isHighlight ) rc = fts3StringAppend ( pOut , zOpen , - 1 ) ;
if ( rc = = SQLITE_OK ) rc = fts3StringAppend ( pOut , & zDoc [ iBegin ] , iFin - iBegin ) ;
if ( rc = = SQLITE_OK & & isHighlight ) rc = fts3StringAppend ( pOut , zClose , - 1 ) ;
iEnd = iFin ;
2010-01-02 19:02:02 +00:00
}
pMod - > xClose ( pC ) ;
return rc ;
}
/*
2010-01-12 17:57:30 +00:00
* * This function is used to count the entries in a column - list ( a
* * delta - encoded list of term offsets within a single column of a single
* * row ) . When this function is called , * ppCollist should point to the
* * beginning of the first varint in the column - list ( the varint that
* * contains the position of the first matching term in the column data ) .
* * Before returning , * ppCollist is set to point to the first byte after
* * the last varint in the column - list ( either the 0x00 signifying the end
* * of the position - list , or the 0x01 that precedes the column number of
* * the next column in the position - list ) .
2010-01-02 19:02:02 +00:00
* *
2010-01-12 17:57:30 +00:00
* * The number of elements in the column - list is returned .
2010-01-02 19:02:02 +00:00
*/
static int fts3ColumnlistCount ( char * * ppCollist ) {
char * pEnd = * ppCollist ;
char c = 0 ;
int nEntry = 0 ;
/* A column-list is terminated by either a 0x01 or 0x00. */
while ( 0xFE & ( * pEnd | c ) ) {
c = * pEnd + + & 0x80 ;
if ( ! c ) nEntry + + ;
}
* ppCollist = pEnd ;
return nEntry ;
}
2015-05-06 17:41:19 +00:00
/*
* * This function gathers ' y ' or ' b ' data for a single phrase .
*/
2019-01-12 14:58:35 +00:00
static int fts3ExprLHits (
2015-05-06 17:41:19 +00:00
Fts3Expr * pExpr , /* Phrase expression node */
MatchInfo * p /* Matchinfo context */
) {
Fts3Table * pTab = ( Fts3Table * ) p - > pCursor - > base . pVtab ;
int iStart ;
Fts3Phrase * pPhrase = pExpr - > pPhrase ;
char * pIter = pPhrase - > doclist . pList ;
int iCol = 0 ;
assert ( p - > flag = = FTS3_MATCHINFO_LHITS_BM | | p - > flag = = FTS3_MATCHINFO_LHITS ) ;
if ( p - > flag = = FTS3_MATCHINFO_LHITS ) {
iStart = pExpr - > iPhrase * p - > nCol ;
} else {
iStart = pExpr - > iPhrase * ( ( p - > nCol + 31 ) / 32 ) ;
}
2020-05-14 23:59:24 +00:00
if ( pIter ) while ( 1 ) {
2015-05-06 17:41:19 +00:00
int nHit = fts3ColumnlistCount ( & pIter ) ;
if ( ( pPhrase - > iColumn > = pTab - > nColumn | | pPhrase - > iColumn = = iCol ) ) {
if ( p - > flag = = FTS3_MATCHINFO_LHITS ) {
p - > aMatchinfo [ iStart + iCol ] = ( u32 ) nHit ;
} else if ( nHit ) {
p - > aMatchinfo [ iStart + ( iCol + 1 ) / 32 ] | = ( 1 < < ( iCol & 0x1F ) ) ;
}
}
assert ( * pIter = = 0x00 | | * pIter = = 0x01 ) ;
if ( * pIter ! = 0x01 ) break ;
pIter + + ;
pIter + = fts3GetVarint32 ( pIter , & iCol ) ;
2019-01-12 14:58:35 +00:00
if ( iCol > = p - > nCol ) return FTS_CORRUPT_VTAB ;
2015-05-06 17:41:19 +00:00
}
2019-01-12 14:58:35 +00:00
return SQLITE_OK ;
2015-05-06 17:41:19 +00:00
}
/*
* * Gather the results for matchinfo directives ' y ' and ' b ' .
*/
2019-01-12 14:58:35 +00:00
static int fts3ExprLHitGather (
2015-05-06 17:41:19 +00:00
Fts3Expr * pExpr ,
MatchInfo * p
) {
2019-01-12 14:58:35 +00:00
int rc = SQLITE_OK ;
2015-05-06 17:41:19 +00:00
assert ( ( pExpr - > pLeft = = 0 ) = = ( pExpr - > pRight = = 0 ) ) ;
if ( pExpr - > bEof = = 0 & & pExpr - > iDocid = = p - > pCursor - > iPrevId ) {
if ( pExpr - > pLeft ) {
2019-01-12 14:58:35 +00:00
rc = fts3ExprLHitGather ( pExpr - > pLeft , p ) ;
if ( rc = = SQLITE_OK ) rc = fts3ExprLHitGather ( pExpr - > pRight , p ) ;
2015-05-06 17:41:19 +00:00
} else {
2019-01-12 14:58:35 +00:00
rc = fts3ExprLHits ( pExpr , p ) ;
2015-05-06 17:41:19 +00:00
}
}
2019-01-12 14:58:35 +00:00
return rc ;
2015-05-06 17:41:19 +00:00
}
2010-01-02 19:02:02 +00:00
/*
2023-01-25 13:42:55 +00:00
* * sqlite3Fts3ExprIterate ( ) callback used to collect the " global " matchinfo
* * stats for a single query .
2010-11-23 19:16:47 +00:00
* *
2023-01-25 13:42:55 +00:00
* * sqlite3Fts3ExprIterate ( ) callback to load the ' global ' elements of a
2010-11-23 19:16:47 +00:00
* * FTS3_MATCHINFO_HITS matchinfo array . The global stats are those elements
* * of the matchinfo array that are constant for all rows returned by the
* * current query .
* *
* * Argument pCtx is actually a pointer to a struct of type MatchInfo . This
* * function populates Matchinfo . aMatchinfo [ ] as follows :
* *
* * for ( iCol = 0 ; iCol < nCol ; iCol + + ) {
* * aMatchinfo [ 3 * iPhrase * nCol + 3 * iCol + 1 ] = X ;
* * aMatchinfo [ 3 * iPhrase * nCol + 3 * iCol + 2 ] = Y ;
* * }
* *
* * where X is the number of matches for phrase iPhrase is column iCol of all
* * rows of the table . Y is the number of rows for which column iCol contains
* * at least one instance of phrase iPhrase .
2010-11-24 11:51:56 +00:00
* *
* * If the phrase pExpr consists entirely of deferred tokens , then all X and
* * Y values are set to nDoc , where nDoc is the number of documents in the
* * file system . This is done because the full - text index doclist is required
* * to calculate these values properly , and the full - text index doclist is
* * not available for deferred tokens .
2010-01-02 19:02:02 +00:00
*/
2010-11-23 19:16:47 +00:00
static int fts3ExprGlobalHitsCb (
2010-01-02 19:02:02 +00:00
Fts3Expr * pExpr , /* Phrase expression node */
2010-01-12 17:57:30 +00:00
int iPhrase , /* Phrase number (numbered from zero) */
2010-01-02 19:02:02 +00:00
void * pCtx /* Pointer to MatchInfo structure */
) {
MatchInfo * p = ( MatchInfo * ) pCtx ;
2011-06-08 18:39:07 +00:00
return sqlite3Fts3EvalPhraseStats (
p - > pCursor , pExpr , & p - > aMatchinfo [ 3 * iPhrase * p - > nCol ]
) ;
2010-01-02 19:02:02 +00:00
}
2010-01-12 17:57:30 +00:00
/*
2023-01-25 13:42:55 +00:00
* * sqlite3Fts3ExprIterate ( ) callback used to collect the " local " part of the
2010-11-23 19:16:47 +00:00
* * FTS3_MATCHINFO_HITS array . The local stats are those elements of the
2010-01-12 17:57:30 +00:00
* * array that are different for each row returned by the query .
*/
2010-11-23 19:16:47 +00:00
static int fts3ExprLocalHitsCb (
2010-01-02 19:02:02 +00:00
Fts3Expr * pExpr , /* Phrase expression node */
2010-01-12 17:57:30 +00:00
int iPhrase , /* Phrase number */
2010-01-02 19:02:02 +00:00
void * pCtx /* Pointer to MatchInfo structure */
) {
2012-05-10 17:43:14 +00:00
int rc = SQLITE_OK ;
2010-01-02 19:02:02 +00:00
MatchInfo * p = ( MatchInfo * ) pCtx ;
2011-03-23 17:10:43 +00:00
int iStart = iPhrase * p - > nCol * 3 ;
int i ;
2012-05-10 17:43:14 +00:00
for ( i = 0 ; i < p - > nCol & & rc = = SQLITE_OK ; i + + ) {
2010-01-02 19:02:02 +00:00
char * pCsr ;
2012-05-10 17:43:14 +00:00
rc = sqlite3Fts3EvalPhrasePoslist ( p - > pCursor , pExpr , i , & pCsr ) ;
2010-01-12 17:57:30 +00:00
if ( pCsr ) {
2011-06-02 19:57:24 +00:00
p - > aMatchinfo [ iStart + i * 3 ] = fts3ColumnlistCount ( & pCsr ) ;
} else {
p - > aMatchinfo [ iStart + i * 3 ] = 0 ;
2010-01-12 17:57:30 +00:00
}
2010-01-02 19:02:02 +00:00
}
2012-05-10 17:43:14 +00:00
return rc ;
2010-01-02 19:02:02 +00:00
}
2010-11-23 19:16:47 +00:00
static int fts3MatchinfoCheck (
Fts3Table * pTab ,
char cArg ,
char * * pzErr
) {
2010-11-24 11:51:56 +00:00
if ( ( cArg = = FTS3_MATCHINFO_NPHRASE )
| | ( cArg = = FTS3_MATCHINFO_NCOL )
2012-03-27 15:00:06 +00:00
| | ( cArg = = FTS3_MATCHINFO_NDOC & & pTab - > bFts4 )
| | ( cArg = = FTS3_MATCHINFO_AVGLENGTH & & pTab - > bFts4 )
2010-11-24 11:51:56 +00:00
| | ( cArg = = FTS3_MATCHINFO_LENGTH & & pTab - > bHasDocsize )
| | ( cArg = = FTS3_MATCHINFO_LCS )
| | ( cArg = = FTS3_MATCHINFO_HITS )
2015-05-02 09:44:15 +00:00
| | ( cArg = = FTS3_MATCHINFO_LHITS )
2015-05-05 20:39:53 +00:00
| | ( cArg = = FTS3_MATCHINFO_LHITS_BM )
2010-11-23 19:16:47 +00:00
) {
return SQLITE_OK ;
}
2015-05-01 14:07:30 +00:00
sqlite3Fts3ErrMsg ( pzErr , " unrecognized matchinfo request: %c " , cArg ) ;
2010-11-23 19:16:47 +00:00
return SQLITE_ERROR ;
}
2019-04-13 14:07:57 +00:00
static size_t fts3MatchinfoSize ( MatchInfo * pInfo , char cArg ) {
size_t nVal ; /* Number of integers output by cArg */
2010-11-23 19:16:47 +00:00
switch ( cArg ) {
case FTS3_MATCHINFO_NDOC :
case FTS3_MATCHINFO_NPHRASE :
case FTS3_MATCHINFO_NCOL :
nVal = 1 ;
break ;
case FTS3_MATCHINFO_AVGLENGTH :
case FTS3_MATCHINFO_LENGTH :
case FTS3_MATCHINFO_LCS :
nVal = pInfo - > nCol ;
break ;
2015-05-02 09:44:15 +00:00
case FTS3_MATCHINFO_LHITS :
nVal = pInfo - > nCol * pInfo - > nPhrase ;
break ;
2015-05-05 20:39:53 +00:00
case FTS3_MATCHINFO_LHITS_BM :
nVal = pInfo - > nPhrase * ( ( pInfo - > nCol + 31 ) / 32 ) ;
break ;
2010-11-25 17:49:28 +00:00
default :
assert ( cArg = = FTS3_MATCHINFO_HITS ) ;
2010-11-23 19:16:47 +00:00
nVal = pInfo - > nCol * pInfo - > nPhrase * 3 ;
break ;
}
return nVal ;
}
static int fts3MatchinfoSelectDoctotal (
Fts3Table * pTab ,
sqlite3_stmt * * ppStmt ,
sqlite3_int64 * pnDoc ,
2019-11-17 02:41:06 +00:00
const char * * paLen ,
const char * * ppEnd
2010-11-23 19:16:47 +00:00
) {
sqlite3_stmt * pStmt ;
const char * a ;
2019-11-17 02:41:06 +00:00
const char * pEnd ;
2010-11-23 19:16:47 +00:00
sqlite3_int64 nDoc ;
2019-11-17 02:41:06 +00:00
int n ;
2010-11-23 19:16:47 +00:00
if ( ! * ppStmt ) {
int rc = sqlite3Fts3SelectDoctotal ( pTab , ppStmt ) ;
if ( rc ! = SQLITE_OK ) return rc ;
}
pStmt = * ppStmt ;
2011-01-13 10:58:26 +00:00
assert ( sqlite3_data_count ( pStmt ) = = 1 ) ;
2010-11-23 19:16:47 +00:00
2019-11-17 02:41:06 +00:00
n = sqlite3_column_bytes ( pStmt , 0 ) ;
2010-11-23 19:16:47 +00:00
a = sqlite3_column_blob ( pStmt , 0 ) ;
2019-11-17 02:41:06 +00:00
if ( a = = 0 ) {
2019-11-18 14:04:21 +00:00
return FTS_CORRUPT_VTAB ;
2019-11-17 02:41:06 +00:00
}
pEnd = a + n ;
a + = sqlite3Fts3GetVarintBounded ( a , pEnd , & nDoc ) ;
2019-11-18 10:37:57 +00:00
if ( nDoc < = 0 | | a > pEnd ) {
2019-11-17 02:41:06 +00:00
return FTS_CORRUPT_VTAB ;
}
2019-11-18 10:37:57 +00:00
* pnDoc = nDoc ;
2010-11-23 19:16:47 +00:00
if ( paLen ) * paLen = a ;
2019-11-17 02:41:06 +00:00
if ( ppEnd ) * ppEnd = pEnd ;
2010-11-23 19:16:47 +00:00
return SQLITE_OK ;
}
2010-11-24 19:26:18 +00:00
2010-11-26 15:13:31 +00:00
/*
* * An instance of the following structure is used to store state while
* * iterating through a multi - column position - list corresponding to the
* * hits for a single phrase on a single row in order to calculate the
* * values for a matchinfo ( ) FTS3_MATCHINFO_LCS request .
*/
2010-11-24 19:26:18 +00:00
typedef struct LcsIterator LcsIterator ;
struct LcsIterator {
Fts3Expr * pExpr ; /* Pointer to phrase expression */
int iPosOffset ; /* Tokens count up to end of this phrase */
2011-06-02 19:57:24 +00:00
char * pRead ; /* Cursor used to iterate through aDoclist */
2010-11-24 19:26:18 +00:00
int iPos ; /* Current position */
} ;
2010-11-26 15:13:31 +00:00
/*
* * If LcsIterator . iCol is set to the following value , the iterator has
* * finished iterating through all offsets for all columns .
*/
2010-11-24 19:26:18 +00:00
# define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
static int fts3MatchinfoLcsCb (
Fts3Expr * pExpr , /* Phrase expression node */
int iPhrase , /* Phrase number (numbered from zero) */
void * pCtx /* Pointer to MatchInfo structure */
) {
LcsIterator * aIter = ( LcsIterator * ) pCtx ;
aIter [ iPhrase ] . pExpr = pExpr ;
return SQLITE_OK ;
}
2010-11-26 15:13:31 +00:00
/*
* * Advance the iterator passed as an argument to the next position . Return
* * 1 if the iterator is at EOF or if it now points to the start of the
* * position list for the next column .
*/
2010-11-24 19:26:18 +00:00
static int fts3LcsIteratorAdvance ( LcsIterator * pIter ) {
2021-10-04 15:08:49 +00:00
char * pRead ;
2010-11-24 19:26:18 +00:00
sqlite3_int64 iRead ;
int rc = 0 ;
2021-10-04 15:08:49 +00:00
if ( NEVER ( pIter = = 0 ) ) return 1 ;
pRead = pIter - > pRead ;
2010-11-24 19:26:18 +00:00
pRead + = sqlite3Fts3GetVarint ( pRead , & iRead ) ;
2011-06-02 19:57:24 +00:00
if ( iRead = = 0 | | iRead = = 1 ) {
pRead = 0 ;
2010-11-24 19:26:18 +00:00
rc = 1 ;
} else {
2010-12-01 15:36:00 +00:00
pIter - > iPos + = ( int ) ( iRead - 2 ) ;
2010-11-24 19:26:18 +00:00
}
pIter - > pRead = pRead ;
return rc ;
}
2010-11-23 19:16:47 +00:00
2010-11-26 15:13:31 +00:00
/*
* * This function implements the FTS3_MATCHINFO_LCS matchinfo ( ) flag .
* *
* * If the call is successful , the longest - common - substring lengths for each
* * column are written into the first nCol elements of the pInfo - > aMatchinfo [ ]
* * array before returning . SQLITE_OK is returned in this case .
* *
* * Otherwise , if an error occurs , an SQLite error code is returned and the
* * data written to the first nCol elements of pInfo - > aMatchinfo [ ] is
* * undefined .
*/
2010-11-24 19:26:18 +00:00
static int fts3MatchinfoLcs ( Fts3Cursor * pCsr , MatchInfo * pInfo ) {
LcsIterator * aIter ;
int i ;
int iCol ;
int nToken = 0 ;
2019-01-16 19:44:09 +00:00
int rc = SQLITE_OK ;
2010-11-24 19:26:18 +00:00
/* Allocate and populate the array of LcsIterator objects. The array
* * contains one element for each matchable phrase in the query .
* */
2021-10-20 11:40:34 +00:00
aIter = sqlite3Fts3MallocZero ( sizeof ( LcsIterator ) * pCsr - > nPhrase ) ;
2010-11-24 19:26:18 +00:00
if ( ! aIter ) return SQLITE_NOMEM ;
2023-01-25 13:42:55 +00:00
( void ) sqlite3Fts3ExprIterate ( pCsr - > pExpr , fts3MatchinfoLcsCb , ( void * ) aIter ) ;
2011-06-02 19:57:24 +00:00
2010-11-24 19:26:18 +00:00
for ( i = 0 ; i < pInfo - > nPhrase ; i + + ) {
LcsIterator * pIter = & aIter [ i ] ;
nToken - = pIter - > pExpr - > pPhrase - > nToken ;
pIter - > iPosOffset = nToken ;
}
for ( iCol = 0 ; iCol < pInfo - > nCol ; iCol + + ) {
2010-11-26 15:13:31 +00:00
int nLcs = 0 ; /* LCS value for this column */
int nLive = 0 ; /* Number of iterators in aIter not at EOF */
2010-11-24 19:26:18 +00:00
for ( i = 0 ; i < pInfo - > nPhrase ; i + + ) {
2011-06-02 19:57:24 +00:00
LcsIterator * pIt = & aIter [ i ] ;
2012-05-10 17:43:14 +00:00
rc = sqlite3Fts3EvalPhrasePoslist ( pCsr , pIt - > pExpr , iCol , & pIt - > pRead ) ;
2019-01-16 19:44:09 +00:00
if ( rc ! = SQLITE_OK ) goto matchinfo_lcs_out ;
2011-06-02 19:57:24 +00:00
if ( pIt - > pRead ) {
pIt - > iPos = pIt - > iPosOffset ;
2019-01-16 19:44:09 +00:00
fts3LcsIteratorAdvance ( pIt ) ;
if ( pIt - > pRead = = 0 ) {
rc = FTS_CORRUPT_VTAB ;
goto matchinfo_lcs_out ;
}
2011-06-02 19:57:24 +00:00
nLive + + ;
}
2010-11-24 19:26:18 +00:00
}
while ( nLive > 0 ) {
2010-11-26 15:13:31 +00:00
LcsIterator * pAdv = 0 ; /* The iterator to advance by one position */
int nThisLcs = 0 ; /* LCS for the current iterator positions */
2010-11-24 19:26:18 +00:00
for ( i = 0 ; i < pInfo - > nPhrase ; i + + ) {
LcsIterator * pIter = & aIter [ i ] ;
2011-06-02 19:57:24 +00:00
if ( pIter - > pRead = = 0 ) {
2010-11-26 15:13:31 +00:00
/* This iterator is already at EOF for this column. */
2010-11-24 19:26:18 +00:00
nThisLcs = 0 ;
} else {
2010-11-26 15:13:31 +00:00
if ( pAdv = = 0 | | pIter - > iPos < pAdv - > iPos ) {
pAdv = pIter ;
}
if ( nThisLcs = = 0 | | pIter - > iPos = = pIter [ - 1 ] . iPos ) {
nThisLcs + + ;
} else {
nThisLcs = 1 ;
}
if ( nThisLcs > nLcs ) nLcs = nThisLcs ;
2010-11-24 19:26:18 +00:00
}
}
if ( fts3LcsIteratorAdvance ( pAdv ) ) nLive - - ;
}
pInfo - > aMatchinfo [ iCol ] = nLcs ;
}
2019-01-16 19:44:09 +00:00
matchinfo_lcs_out :
2010-11-24 19:26:18 +00:00
sqlite3_free ( aIter ) ;
2019-01-16 19:44:09 +00:00
return rc ;
2010-11-24 19:26:18 +00:00
}
2010-11-23 19:16:47 +00:00
2010-11-26 15:13:31 +00:00
/*
* * Populate the buffer pInfo - > aMatchinfo [ ] with an array of integers to
* * be returned by the matchinfo ( ) function . Argument zArg contains the
* * format string passed as the second argument to matchinfo ( or the
* * default value " pcx " if no second argument was specified ) . The format
* * string has already been validated and the pInfo - > aMatchinfo [ ] array
* * is guaranteed to be large enough for the output .
* *
* * If bGlobal is true , then populate all fields of the matchinfo ( ) output .
* * If it is false , then assume that those fields that do not change between
* * rows ( i . e . FTS3_MATCHINFO_NPHRASE , NCOL , NDOC , AVGLENGTH and part of HITS )
* * have already been populated .
* *
* * Return SQLITE_OK if successful , or an SQLite error code if an error
* * occurs . If a value other than SQLITE_OK is returned , the state the
* * pInfo - > aMatchinfo [ ] buffer is left in is undefined .
*/
2010-11-23 19:16:47 +00:00
static int fts3MatchinfoValues (
Fts3Cursor * pCsr , /* FTS3 cursor object */
int bGlobal , /* True to grab the global stats */
MatchInfo * pInfo , /* Matchinfo context object */
const char * zArg /* Matchinfo format string */
) {
int rc = SQLITE_OK ;
int i ;
Fts3Table * pTab = ( Fts3Table * ) pCsr - > base . pVtab ;
sqlite3_stmt * pSelect = 0 ;
2010-11-25 17:49:28 +00:00
for ( i = 0 ; rc = = SQLITE_OK & & zArg [ i ] ; i + + ) {
2015-05-05 20:39:53 +00:00
pInfo - > flag = zArg [ i ] ;
2010-11-23 19:16:47 +00:00
switch ( zArg [ i ] ) {
2010-11-26 15:13:31 +00:00
case FTS3_MATCHINFO_NPHRASE :
2010-11-23 19:16:47 +00:00
if ( bGlobal ) pInfo - > aMatchinfo [ 0 ] = pInfo - > nPhrase ;
break ;
2010-11-26 15:13:31 +00:00
case FTS3_MATCHINFO_NCOL :
2010-11-23 19:16:47 +00:00
if ( bGlobal ) pInfo - > aMatchinfo [ 0 ] = pInfo - > nCol ;
break ;
case FTS3_MATCHINFO_NDOC :
if ( bGlobal ) {
2011-06-20 17:24:29 +00:00
sqlite3_int64 nDoc = 0 ;
2019-11-17 02:41:06 +00:00
rc = fts3MatchinfoSelectDoctotal ( pTab , & pSelect , & nDoc , 0 , 0 ) ;
2010-11-23 19:16:47 +00:00
pInfo - > aMatchinfo [ 0 ] = ( u32 ) nDoc ;
}
break ;
case FTS3_MATCHINFO_AVGLENGTH :
if ( bGlobal ) {
sqlite3_int64 nDoc ; /* Number of rows in table */
const char * a ; /* Aggregate column length array */
2019-11-17 02:41:06 +00:00
const char * pEnd ; /* First byte past end of length array */
2010-11-23 19:16:47 +00:00
2019-11-17 02:41:06 +00:00
rc = fts3MatchinfoSelectDoctotal ( pTab , & pSelect , & nDoc , & a , & pEnd ) ;
2010-11-23 19:16:47 +00:00
if ( rc = = SQLITE_OK ) {
int iCol ;
for ( iCol = 0 ; iCol < pInfo - > nCol ; iCol + + ) {
2011-02-01 17:55:48 +00:00
u32 iVal ;
2010-11-23 19:16:47 +00:00
sqlite3_int64 nToken ;
a + = sqlite3Fts3GetVarint ( a , & nToken ) ;
2019-11-17 02:41:06 +00:00
if ( a > pEnd ) {
rc = SQLITE_CORRUPT_VTAB ;
break ;
}
2011-02-01 17:55:48 +00:00
iVal = ( u32 ) ( ( ( u32 ) ( nToken & 0xffffffff ) + nDoc / 2 ) / nDoc ) ;
pInfo - > aMatchinfo [ iCol ] = iVal ;
2010-11-23 19:16:47 +00:00
}
}
}
break ;
case FTS3_MATCHINFO_LENGTH : {
sqlite3_stmt * pSelectDocsize = 0 ;
rc = sqlite3Fts3SelectDocsize ( pTab , pCsr - > iPrevId , & pSelectDocsize ) ;
if ( rc = = SQLITE_OK ) {
int iCol ;
const char * a = sqlite3_column_blob ( pSelectDocsize , 0 ) ;
2019-11-17 02:41:06 +00:00
const char * pEnd = a + sqlite3_column_bytes ( pSelectDocsize , 0 ) ;
2010-11-23 19:16:47 +00:00
for ( iCol = 0 ; iCol < pInfo - > nCol ; iCol + + ) {
sqlite3_int64 nToken ;
2019-11-17 02:41:06 +00:00
a + = sqlite3Fts3GetVarintBounded ( a , pEnd , & nToken ) ;
if ( a > pEnd ) {
rc = SQLITE_CORRUPT_VTAB ;
break ;
}
2010-11-23 19:16:47 +00:00
pInfo - > aMatchinfo [ iCol ] = ( u32 ) nToken ;
}
}
sqlite3_reset ( pSelectDocsize ) ;
break ;
}
2010-11-25 17:49:28 +00:00
case FTS3_MATCHINFO_LCS :
2010-11-25 10:33:54 +00:00
rc = fts3ExprLoadDoclists ( pCsr , 0 , 0 ) ;
if ( rc = = SQLITE_OK ) {
2010-11-25 17:49:28 +00:00
rc = fts3MatchinfoLcs ( pCsr , pInfo ) ;
2010-11-23 19:16:47 +00:00
}
break ;
2015-05-05 20:39:53 +00:00
case FTS3_MATCHINFO_LHITS_BM :
2015-05-05 19:37:07 +00:00
case FTS3_MATCHINFO_LHITS : {
2019-04-13 14:07:57 +00:00
size_t nZero = fts3MatchinfoSize ( pInfo , zArg [ i ] ) * sizeof ( u32 ) ;
2015-05-05 19:37:07 +00:00
memset ( pInfo - > aMatchinfo , 0 , nZero ) ;
2019-01-12 14:58:35 +00:00
rc = fts3ExprLHitGather ( pCsr - > pExpr , pInfo ) ;
2015-05-02 09:44:15 +00:00
break ;
2015-05-05 19:37:07 +00:00
}
2015-05-02 09:44:15 +00:00
2010-11-25 17:49:28 +00:00
default : {
2010-11-26 16:49:59 +00:00
Fts3Expr * pExpr ;
2010-11-25 17:49:28 +00:00
assert ( zArg [ i ] = = FTS3_MATCHINFO_HITS ) ;
2010-11-26 16:49:59 +00:00
pExpr = pCsr - > pExpr ;
2010-11-25 10:33:54 +00:00
rc = fts3ExprLoadDoclists ( pCsr , 0 , 0 ) ;
2010-11-25 17:49:28 +00:00
if ( rc ! = SQLITE_OK ) break ;
if ( bGlobal ) {
if ( pCsr - > pDeferred ) {
2019-11-17 02:41:06 +00:00
rc = fts3MatchinfoSelectDoctotal ( pTab , & pSelect , & pInfo - > nDoc , 0 , 0 ) ;
2010-11-25 17:49:28 +00:00
if ( rc ! = SQLITE_OK ) break ;
}
2023-01-25 13:42:55 +00:00
rc = sqlite3Fts3ExprIterate ( pExpr , fts3ExprGlobalHitsCb , ( void * ) pInfo ) ;
2015-05-25 10:57:13 +00:00
sqlite3Fts3EvalTestDeferred ( pCsr , & rc ) ;
2010-11-25 17:49:28 +00:00
if ( rc ! = SQLITE_OK ) break ;
2010-11-25 10:33:54 +00:00
}
2023-01-25 13:42:55 +00:00
( void ) sqlite3Fts3ExprIterate ( pExpr , fts3ExprLocalHitsCb , ( void * ) pInfo ) ;
2010-11-24 19:26:18 +00:00
break ;
2010-11-25 17:49:28 +00:00
}
2010-11-23 19:16:47 +00:00
}
pInfo - > aMatchinfo + = fts3MatchinfoSize ( pInfo , zArg [ i ] ) ;
}
sqlite3_reset ( pSelect ) ;
return rc ;
}
2010-01-02 19:02:02 +00:00
/*
2010-01-12 17:57:30 +00:00
* * Populate pCsr - > aMatchinfo [ ] with data for the current row . The
* * ' matchinfo ' data is an array of 32 - bit unsigned integers ( C type u32 ) .
2010-01-02 19:02:02 +00:00
*/
2015-05-06 17:41:19 +00:00
static void fts3GetMatchinfo (
2015-05-05 19:37:07 +00:00
sqlite3_context * pCtx , /* Return results here */
2010-11-23 19:16:47 +00:00
Fts3Cursor * pCsr , /* FTS3 Cursor object */
const char * zArg /* Second argument to matchinfo() function */
) {
2010-01-12 17:57:30 +00:00
MatchInfo sInfo ;
2010-01-02 19:02:02 +00:00
Fts3Table * pTab = ( Fts3Table * ) pCsr - > base . pVtab ;
2010-02-03 19:55:13 +00:00
int rc = SQLITE_OK ;
2010-11-23 19:16:47 +00:00
int bGlobal = 0 ; /* Collect 'global' stats as well as local */
2010-01-12 17:57:30 +00:00
2015-05-05 19:37:07 +00:00
u32 * aOut = 0 ;
void ( * xDestroyOut ) ( void * ) = 0 ;
2010-11-23 19:16:47 +00:00
memset ( & sInfo , 0 , sizeof ( MatchInfo ) ) ;
2010-01-12 17:57:30 +00:00
sInfo . pCursor = pCsr ;
sInfo . nCol = pTab - > nColumn ;
2010-11-23 19:16:47 +00:00
/* If there is cached matchinfo() data, but the format string for the
* * cache does not match the format string for this request , discard
* * the cached data . */
2015-05-05 19:37:07 +00:00
if ( pCsr - > pMIBuffer & & strcmp ( pCsr - > pMIBuffer - > zMatchinfo , zArg ) ) {
sqlite3Fts3MIBufferFree ( pCsr - > pMIBuffer ) ;
pCsr - > pMIBuffer = 0 ;
2010-11-23 19:16:47 +00:00
}
2015-05-05 19:37:07 +00:00
/* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the
2010-11-23 19:16:47 +00:00
* * matchinfo function has been called for this query . In this case
* * allocate the array used to accumulate the matchinfo data and
* * initialize those elements that are constant for every row .
*/
2015-05-05 19:37:07 +00:00
if ( pCsr - > pMIBuffer = = 0 ) {
2019-04-13 14:17:09 +00:00
size_t nMatchinfo = 0 ; /* Number of u32 elements in match-info */
2010-11-23 19:16:47 +00:00
int i ; /* Used to iterate through zArg */
2010-01-02 19:02:02 +00:00
2010-11-26 15:13:31 +00:00
/* Determine the number of phrases in the query */
2010-11-25 17:49:28 +00:00
pCsr - > nPhrase = fts3ExprPhraseCount ( pCsr - > pExpr ) ;
2010-11-23 19:16:47 +00:00
sInfo . nPhrase = pCsr - > nPhrase ;
2010-01-02 19:02:02 +00:00
2010-11-26 15:13:31 +00:00
/* Determine the number of integers in the buffer returned by this call. */
2010-11-23 19:16:47 +00:00
for ( i = 0 ; zArg [ i ] ; i + + ) {
2015-05-06 08:43:26 +00:00
char * zErr = 0 ;
if ( fts3MatchinfoCheck ( pTab , zArg [ i ] , & zErr ) ) {
sqlite3_result_error ( pCtx , zErr , - 1 ) ;
sqlite3_free ( zErr ) ;
return ;
}
2010-11-23 19:16:47 +00:00
nMatchinfo + = fts3MatchinfoSize ( & sInfo , zArg [ i ] ) ;
2010-01-02 19:02:02 +00:00
}
2010-11-23 19:16:47 +00:00
/* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
2015-05-05 19:37:07 +00:00
pCsr - > pMIBuffer = fts3MIBufferNew ( nMatchinfo , zArg ) ;
if ( ! pCsr - > pMIBuffer ) rc = SQLITE_NOMEM ;
2010-02-03 19:55:13 +00:00
pCsr - > isMatchinfoNeeded = 1 ;
2010-11-23 19:16:47 +00:00
bGlobal = 1 ;
2010-01-12 17:57:30 +00:00
}
2010-01-02 19:02:02 +00:00
2015-05-05 19:37:07 +00:00
if ( rc = = SQLITE_OK ) {
xDestroyOut = fts3MIBufferAlloc ( pCsr - > pMIBuffer , & aOut ) ;
if ( xDestroyOut = = 0 ) {
rc = SQLITE_NOMEM ;
}
}
if ( rc = = SQLITE_OK ) {
sInfo . aMatchinfo = aOut ;
sInfo . nPhrase = pCsr - > nPhrase ;
2010-11-23 19:16:47 +00:00
rc = fts3MatchinfoValues ( pCsr , bGlobal , & sInfo , zArg ) ;
2015-05-05 19:37:07 +00:00
if ( bGlobal ) {
fts3MIBufferSetGlobal ( pCsr - > pMIBuffer ) ;
}
}
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error_code ( pCtx , rc ) ;
if ( xDestroyOut ) xDestroyOut ( aOut ) ;
} else {
int n = pCsr - > pMIBuffer - > nElem * sizeof ( u32 ) ;
sqlite3_result_blob ( pCtx , aOut , n , xDestroyOut ) ;
2010-01-02 19:02:02 +00:00
}
}
2010-01-12 17:57:30 +00:00
/*
* * Implementation of snippet ( ) function .
*/
2010-01-06 17:19:21 +00:00
void sqlite3Fts3Snippet (
2010-01-02 19:02:02 +00:00
sqlite3_context * pCtx , /* SQLite function call context */
Fts3Cursor * pCsr , /* Cursor object */
const char * zStart , /* Snippet start text - "<b>" */
const char * zEnd , /* Snippet end text - "</b>" */
const char * zEllipsis , /* Snippet ellipsis text - "<b>...</b>" */
int iCol , /* Extract snippet from this column */
int nToken /* Approximate number of tokens in snippet */
) {
2010-01-06 17:19:21 +00:00
Fts3Table * pTab = ( Fts3Table * ) pCsr - > base . pVtab ;
int rc = SQLITE_OK ;
int i ;
StrBuffer res = { 0 , 0 , 0 } ;
/* The returned text includes up to four fragments of text extracted from
* * the data in the current row . The first iteration of the for ( . . . ) loop
* * below attempts to locate a single fragment of text nToken tokens in
* * size that contains at least one instance of all phrases in the query
* * expression that appear in the current row . If such a fragment of text
* * cannot be found , the second iteration of the loop attempts to locate
* * a pair of fragments , and so on .
*/
int nSnippet = 0 ; /* Number of fragments in this snippet */
SnippetFragment aSnippet [ 4 ] ; /* Maximum of 4 fragments per snippet */
int nFToken = - 1 ; /* Number of tokens in each fragment */
2010-01-02 19:02:02 +00:00
2010-02-10 05:33:17 +00:00
if ( ! pCsr - > pExpr ) {
sqlite3_result_text ( pCtx , " " , 0 , SQLITE_STATIC ) ;
return ;
}
2019-01-18 19:26:48 +00:00
/* Limit the snippet length to 64 tokens. */
if ( nToken < - 64 ) nToken = - 64 ;
if ( nToken > + 64 ) nToken = + 64 ;
2010-01-15 17:25:52 +00:00
for ( nSnippet = 1 ; 1 ; nSnippet + + ) {
2010-01-06 17:19:21 +00:00
int iSnip ; /* Loop counter 0..nSnippet-1 */
u64 mCovered = 0 ; /* Bitmask of phrases covered by snippet */
u64 mSeen = 0 ; /* Bitmask of phrases seen by BestSnippet() */
2010-01-02 19:02:02 +00:00
2010-01-15 17:25:52 +00:00
if ( nToken > = 0 ) {
nFToken = ( nToken + nSnippet - 1 ) / nSnippet ;
} else {
nFToken = - 1 * nToken ;
}
2010-01-02 19:02:02 +00:00
2010-01-06 17:19:21 +00:00
for ( iSnip = 0 ; iSnip < nSnippet ; iSnip + + ) {
int iBestScore = - 1 ; /* Best score of columns checked so far */
int iRead ; /* Used to iterate through columns */
SnippetFragment * pFragment = & aSnippet [ iSnip ] ;
memset ( pFragment , 0 , sizeof ( * pFragment ) ) ;
/* Loop through all columns of the table being considered for snippets.
* * If the iCol argument to this function was negative , this means all
* * columns of the FTS3 table . Otherwise , only column iCol is considered .
*/
for ( iRead = 0 ; iRead < pTab - > nColumn ; iRead + + ) {
2010-11-24 11:51:56 +00:00
SnippetFragment sF = { 0 , 0 , 0 , 0 } ;
2015-03-04 20:18:55 +00:00
int iS = 0 ;
2010-01-06 17:19:21 +00:00
if ( iCol > = 0 & & iRead ! = iCol ) continue ;
/* Find the best snippet of nFToken tokens in column iRead. */
rc = fts3BestSnippet ( nFToken , pCsr , iRead , mCovered , & mSeen , & sF , & iS ) ;
if ( rc ! = SQLITE_OK ) {
goto snippet_out ;
}
if ( iS > iBestScore ) {
* pFragment = sF ;
iBestScore = iS ;
}
}
mCovered | = pFragment - > covered ;
}
/* If all query phrases seen by fts3BestSnippet() are present in at least
* * one of the nSnippet snippet fragments , break out of the loop .
*/
assert ( ( mCovered & mSeen ) = = mCovered ) ;
2010-01-15 17:25:52 +00:00
if ( mSeen = = mCovered | | nSnippet = = SizeofArray ( aSnippet ) ) break ;
}
2010-01-06 17:19:21 +00:00
assert ( nFToken > 0 ) ;
for ( i = 0 ; i < nSnippet & & rc = = SQLITE_OK ; i + + ) {
2010-01-07 10:54:28 +00:00
rc = fts3SnippetText ( pCsr , & aSnippet [ i ] ,
i , ( i = = nSnippet - 1 ) , nFToken , zStart , zEnd , zEllipsis , & res
) ;
2010-01-02 19:02:02 +00:00
}
2010-01-06 17:19:21 +00:00
snippet_out :
2010-10-23 19:07:30 +00:00
sqlite3Fts3SegmentsClose ( pTab ) ;
2010-01-02 19:02:02 +00:00
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error_code ( pCtx , rc ) ;
2010-01-06 17:19:21 +00:00
sqlite3_free ( res . z ) ;
} else {
sqlite3_result_text ( pCtx , res . z , - 1 , sqlite3_free ) ;
}
}
typedef struct TermOffset TermOffset ;
2010-01-12 17:57:30 +00:00
typedef struct TermOffsetCtx TermOffsetCtx ;
2010-01-06 17:19:21 +00:00
struct TermOffset {
char * pList ; /* Position-list */
2021-04-16 16:55:28 +00:00
i64 iPos ; /* Position just read from pList */
i64 iOff ; /* Offset of this term from read positions */
2010-01-06 17:19:21 +00:00
} ;
struct TermOffsetCtx {
2011-05-04 12:52:59 +00:00
Fts3Cursor * pCsr ;
2010-01-06 17:19:21 +00:00
int iCol ; /* Column of table to populate aTerm for */
int iTerm ;
sqlite3_int64 iDocid ;
TermOffset * aTerm ;
} ;
/*
2023-01-25 13:42:55 +00:00
* * This function is an sqlite3Fts3ExprIterate ( ) callback used by sqlite3Fts3Offsets ( ) .
2010-01-06 17:19:21 +00:00
*/
2010-01-11 12:00:47 +00:00
static int fts3ExprTermOffsetInit ( Fts3Expr * pExpr , int iPhrase , void * ctx ) {
2010-01-06 17:19:21 +00:00
TermOffsetCtx * p = ( TermOffsetCtx * ) ctx ;
int nTerm ; /* Number of tokens in phrase */
int iTerm ; /* For looping through nTerm phrase terms */
char * pList ; /* Pointer to position list for phrase */
2021-04-16 16:55:28 +00:00
i64 iPos = 0 ; /* First position in position-list */
2012-05-10 17:43:14 +00:00
int rc ;
2010-01-06 17:19:21 +00:00
2010-02-26 01:46:54 +00:00
UNUSED_PARAMETER ( iPhrase ) ;
2012-05-10 17:43:14 +00:00
rc = sqlite3Fts3EvalPhrasePoslist ( p - > pCsr , pExpr , p - > iCol , & pList ) ;
2010-01-06 17:19:21 +00:00
nTerm = pExpr - > pPhrase - > nToken ;
if ( pList ) {
fts3GetDeltaPosition ( & pList , & iPos ) ;
2019-02-23 20:48:41 +00:00
assert_fts3_nc ( iPos > = 0 ) ;
2010-01-06 17:19:21 +00:00
}
for ( iTerm = 0 ; iTerm < nTerm ; iTerm + + ) {
TermOffset * pT = & p - > aTerm [ p - > iTerm + + ] ;
pT - > iOff = nTerm - iTerm - 1 ;
pT - > pList = pList ;
pT - > iPos = iPos ;
}
2012-05-10 17:43:14 +00:00
return rc ;
2010-01-06 17:19:21 +00:00
}
/*
* * Implementation of offsets ( ) function .
*/
void sqlite3Fts3Offsets (
sqlite3_context * pCtx , /* SQLite function call context */
Fts3Cursor * pCsr /* Cursor object */
) {
Fts3Table * pTab = ( Fts3Table * ) pCsr - > base . pVtab ;
sqlite3_tokenizer_module const * pMod = pTab - > pTokenizer - > pModule ;
int rc ; /* Return Code */
int nToken ; /* Number of tokens in query */
int iCol ; /* Column currently being processed */
StrBuffer res = { 0 , 0 , 0 } ; /* Result string */
2010-01-12 17:57:30 +00:00
TermOffsetCtx sCtx ; /* Context for fts3ExprTermOffsetInit() */
2010-01-06 17:19:21 +00:00
2010-02-10 05:33:17 +00:00
if ( ! pCsr - > pExpr ) {
sqlite3_result_text ( pCtx , " " , 0 , SQLITE_STATIC ) ;
return ;
}
2010-01-06 17:19:21 +00:00
memset ( & sCtx , 0 , sizeof ( sCtx ) ) ;
assert ( pCsr - > isRequireSeek = = 0 ) ;
/* Count the number of terms in the query */
rc = fts3ExprLoadDoclists ( pCsr , 0 , & nToken ) ;
if ( rc ! = SQLITE_OK ) goto offsets_out ;
/* Allocate the array of TermOffset iterators. */
2021-10-20 11:40:34 +00:00
sCtx . aTerm = ( TermOffset * ) sqlite3Fts3MallocZero ( sizeof ( TermOffset ) * nToken ) ;
2010-01-06 17:19:21 +00:00
if ( 0 = = sCtx . aTerm ) {
rc = SQLITE_NOMEM ;
goto offsets_out ;
}
sCtx . iDocid = pCsr - > iPrevId ;
2011-05-04 12:52:59 +00:00
sCtx . pCsr = pCsr ;
2010-01-06 17:19:21 +00:00
2010-01-12 17:57:30 +00:00
/* Loop through the table columns, appending offset information to
* * string - buffer res for each column .
*/
2010-01-06 17:19:21 +00:00
for ( iCol = 0 ; iCol < pTab - > nColumn ; iCol + + ) {
sqlite3_tokenizer_cursor * pC ; /* Tokenizer cursor */
2012-10-17 20:15:10 +00:00
const char * ZDUMMY ; /* Dummy argument used with xNext() */
int NDUMMY = 0 ; /* Dummy argument used with xNext() */
int iStart = 0 ;
int iEnd = 0 ;
int iCurrent = 0 ;
2010-01-06 17:19:21 +00:00
const char * zDoc ;
int nDoc ;
2021-11-04 18:04:55 +00:00
/* Initialize the contents of sCtx.aTerm[] for column iCol. This
* * operation may fail if the database contains corrupt records .
2010-01-15 17:25:52 +00:00
*/
2010-01-06 17:19:21 +00:00
sCtx . iCol = iCol ;
sCtx . iTerm = 0 ;
2023-01-25 13:42:55 +00:00
rc = sqlite3Fts3ExprIterate (
pCsr - > pExpr , fts3ExprTermOffsetInit , ( void * ) & sCtx
) ;
2021-11-04 18:04:55 +00:00
if ( rc ! = SQLITE_OK ) goto offsets_out ;
2010-01-06 17:19:21 +00:00
2010-01-09 07:33:54 +00:00
/* Retreive the text stored in column iCol. If an SQL NULL is stored
* * in column iCol , jump immediately to the next iteration of the loop .
* * If an OOM occurs while retrieving the data ( this can happen if SQLite
* * needs to transform the data from utf - 16 to utf - 8 ) , return SQLITE_NOMEM
* * to the caller .
*/
2010-01-06 17:19:21 +00:00
zDoc = ( const char * ) sqlite3_column_text ( pCsr - > pStmt , iCol + 1 ) ;
nDoc = sqlite3_column_bytes ( pCsr - > pStmt , iCol + 1 ) ;
2010-01-09 07:33:54 +00:00
if ( zDoc = = 0 ) {
if ( sqlite3_column_type ( pCsr - > pStmt , iCol + 1 ) = = SQLITE_NULL ) {
continue ;
}
rc = SQLITE_NOMEM ;
goto offsets_out ;
}
/* Initialize a tokenizer iterator to iterate through column iCol. */
2012-03-03 18:46:41 +00:00
rc = sqlite3Fts3OpenTokenizer ( pTab - > pTokenizer , pCsr - > iLangid ,
zDoc , nDoc , & pC
) ;
2010-01-06 17:19:21 +00:00
if ( rc ! = SQLITE_OK ) goto offsets_out ;
rc = pMod - > xNext ( pC , & ZDUMMY , & NDUMMY , & iStart , & iEnd , & iCurrent ) ;
while ( rc = = SQLITE_OK ) {
int i ; /* Used to loop through terms */
int iMinPos = 0x7FFFFFFF ; /* Position of next token */
TermOffset * pTerm = 0 ; /* TermOffset associated with next token */
for ( i = 0 ; i < nToken ; i + + ) {
TermOffset * pT = & sCtx . aTerm [ i ] ;
if ( pT - > pList & & ( pT - > iPos - pT - > iOff ) < iMinPos ) {
iMinPos = pT - > iPos - pT - > iOff ;
pTerm = pT ;
}
}
if ( ! pTerm ) {
/* All offsets for this column have been gathered. */
2011-10-04 11:22:59 +00:00
rc = SQLITE_DONE ;
2010-01-06 17:19:21 +00:00
} else {
2019-02-23 20:48:41 +00:00
assert_fts3_nc ( iCurrent < = iMinPos ) ;
2010-01-06 17:19:21 +00:00
if ( 0 = = ( 0xFE & * pTerm - > pList ) ) {
pTerm - > pList = 0 ;
} else {
fts3GetDeltaPosition ( & pTerm - > pList , & pTerm - > iPos ) ;
}
while ( rc = = SQLITE_OK & & iCurrent < iMinPos ) {
rc = pMod - > xNext ( pC , & ZDUMMY , & NDUMMY , & iStart , & iEnd , & iCurrent ) ;
}
if ( rc = = SQLITE_OK ) {
char aBuffer [ 64 ] ;
sqlite3_snprintf ( sizeof ( aBuffer ) , aBuffer ,
" %d %d %d %d " , iCol , pTerm - sCtx . aTerm , iStart , iEnd - iStart
) ;
2010-01-07 10:54:28 +00:00
rc = fts3StringAppend ( & res , aBuffer , - 1 ) ;
2011-10-04 11:22:59 +00:00
} else if ( rc = = SQLITE_DONE & & pTab - > zContentTbl = = 0 ) {
2011-10-13 17:16:45 +00:00
rc = FTS_CORRUPT_VTAB ;
2010-01-06 17:19:21 +00:00
}
}
}
if ( rc = = SQLITE_DONE ) {
2010-03-24 15:57:33 +00:00
rc = SQLITE_OK ;
2010-01-06 17:19:21 +00:00
}
pMod - > xClose ( pC ) ;
if ( rc ! = SQLITE_OK ) goto offsets_out ;
}
offsets_out :
sqlite3_free ( sCtx . aTerm ) ;
assert ( rc ! = SQLITE_DONE ) ;
2010-10-23 19:07:30 +00:00
sqlite3Fts3SegmentsClose ( pTab ) ;
2010-01-06 17:19:21 +00:00
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error_code ( pCtx , rc ) ;
sqlite3_free ( res . z ) ;
2010-01-02 19:02:02 +00:00
} else {
2010-01-06 17:19:21 +00:00
sqlite3_result_text ( pCtx , res . z , res . n - 1 , sqlite3_free ) ;
2010-01-02 19:02:02 +00:00
}
2010-01-06 17:19:21 +00:00
return ;
2010-01-02 19:02:02 +00:00
}
2010-01-12 17:57:30 +00:00
/*
* * Implementation of matchinfo ( ) function .
*/
2010-11-23 19:16:47 +00:00
void sqlite3Fts3Matchinfo (
sqlite3_context * pContext , /* Function call context */
Fts3Cursor * pCsr , /* FTS3 table cursor */
const char * zArg /* Second arg to matchinfo() function */
) {
Fts3Table * pTab = ( Fts3Table * ) pCsr - > base . pVtab ;
const char * zFormat ;
if ( zArg ) {
zFormat = zArg ;
} else {
zFormat = FTS3_MATCHINFO_DEFAULT ;
}
2010-02-10 05:33:17 +00:00
if ( ! pCsr - > pExpr ) {
sqlite3_result_blob ( pContext , " " , 0 , SQLITE_STATIC ) ;
return ;
2015-05-06 08:43:26 +00:00
} else {
/* Retrieve matchinfo() data. */
2015-05-06 17:41:19 +00:00
fts3GetMatchinfo ( pContext , pCsr , zFormat ) ;
2015-05-06 08:43:26 +00:00
sqlite3Fts3SegmentsClose ( pTab ) ;
2010-02-10 05:33:17 +00:00
}
2010-01-02 19:02:02 +00:00
}
2009-11-19 15:25:25 +00:00
# endif