mirror of
https://github.com/tursodatabase/libsql.git
synced 2025-05-13 05:53:15 +00:00
* Remove unused elements from the json_tree() cursor. FossilOrigin-Name: 914a50117d477b2cd30d58388fb8d1b71ff7ff6842ba025f38efc6e9647d06d0 * Same results as the legacy JsonNode implementation on a small set of test cases. FossilOrigin-Name: c3da4b079a1a15a4c0b1a6e71f876648b1d9eb32eddc67b9946c2475c7b6d085 * Fix corner-case error conditions. FossilOrigin-Name: ec23d34ab75e1d7e9366e59c633e0d30def8759f6d4717583ebeb4c90aeccf0d * All tests passing. FossilOrigin-Name: b5a5660ca22437640c9bf32c44d92c76a7293dafcbaf4fa6a4c171128d64871d * Give the json_valid() function an optional second argument that determines what is meant by "valid". FossilOrigin-Name: a4e19ad43dac81e7655ec03ff69bb99d1d02b0c227034c90fb41415fd4793fe3 * Enhance the (SQLITE_DEBUG-only) json_parse() routine so that it shows a decoding of JSONB when given a BLOB argument. FossilOrigin-Name: af267868562e0799ad691dccad05f17afbc34d609eede8c55f57d209290246ef * In SQLITE_ENABLE_SETLK_TIMEOUT builds, use blocking locks in place of sleep() when opening a read-transaction. FossilOrigin-Name: a51ef39998e25e86bd0600e71d15011b12e05f4319608018293bdaecb09e8c97 * Have SQLITE_ENABLE_SETLK_TIMEOUT builds block when locking a read-lock slot. FossilOrigin-Name: f797baf47cf7859cfd8ce248f4f3087af4551a7040af990333426e5a7c269504 * Add untested (#ifdefed-out) code for the MergePatch algorithm against JSONB. Add (and test) the jsonBlobEdit() routine that is needed by the new MergePatch. FossilOrigin-Name: 4d353387fc10e1038cfdd86e66007bf728c231a928e588897bbee0fbfe76f225 * More aggressive use of jsonBlobEdit(). Improvements to the MergePatch implementation sketch. FossilOrigin-Name: fbca9570fd2e1465739e4d3a8d9bb40fad594fd78ab49b2cb34efa27ebdd8361 * The json_patch() code for JSONB compiles and works sometimes, but there are still issues. Incremental check-in. FossilOrigin-Name: e0099464a0045a04f4ccf29bc2b8325fc8c7f39ccf4847e74818f928c9153588 * All legacy tests are passing. FossilOrigin-Name: 2c436806b8d5f57de99c00f6154b038454fb9ae427d00d7b4a46ab9c7c69bcb9 * Handle an SQLITE_BUSY_TIMEOUT error if one occurs while attempting a shared lock on a read-lock slot. FossilOrigin-Name: 5fbf3906d272df3eb981f67455eb35f649ad2774cba9fc3f077b28d9bef3f0cb * The json_remove() function now uses only JSONB, never JsonNodes, internally. FossilOrigin-Name: b69786e746ae2b927b64d9871fd120b7f8f06cc53739fd46a4da51aa16cf8576 * Attempt to get json_extract() working with pure JSONB only, and without the use of JsonNode. Mostly working, but there are some differences from legacy in corner cases. FossilOrigin-Name: 8c324af1eca27e86adc45622af4f3b06a67a3f968596ac58aa7434b1f6f05f3c * Preserve flexibility in the format of the RHS of -> and ->> operators found in legacy. FossilOrigin-Name: 6231ec43adb7436195eb1497de39a6c13c6b4f1c5032e6ea52515d214e61fdbc * Do not set the J subtype when the output is JSONB. FossilOrigin-Name: 4f106b64fe8988435872806bd0a6c223b61f53af0dd1c47c847bb4eec4e03e27 * Convert the json_array_length() function to use JSONB instead of JsonNodes. FossilOrigin-Name: 5ab790736d943e08f097efcee5cfbf0d83c65b0a53f273060330ba719affa5e5 * The assertion change at check-in [7946c79567b0ccd3] is insufficient to fix the problem of a Table object being deleted out from under the OP_VCheck opcode. We need to reference count the Table, which is accomplished here. FossilOrigin-Name: cad269d5e274443c39203a56603b991accc0399135d436996fc039d1d28ec9db * In the recovery extension, if a payload size is unreasonably large, it is probably corrupt, so truncate it. FossilOrigin-Name: 988c3179e978a3a6d42541e9c7a2ab98150383671810926503376ed808f150ff * Fix signed integer overflow in fts5. FossilOrigin-Name: 60e46c7ec68fd8caaed960ca06d98fb06855b2d0bb860dd2fb7b5e89a5e9c7b4 * The json_patch() function now operates exclusively on JSONB. This patch also includes improvements to JSONB debug printing routines. FossilOrigin-Name: fee19d0098242110d2c44ec7b9620c1210ef3f87913305f66ec85d277dd96ab6 * Convert the json_error_position() routine to use only JSONB internally. FossilOrigin-Name: e7a8ba35bff6fde55827f978de5b343b6c134c7fa53827f5c63915a9dc2598ad * Convert json_insert(), json_replace(), json_set() to use JSONB internally. Mostly working, but some corner cases are still not quite right. FossilOrigin-Name: 99c8f6bd5c9a31b6d00f92e383bec8a8235ed553916ad59adbb1b7663f6ebff1 * Update some OPFS-related help text in WASM tests. Minor cleanups in speedtest1-worker.js. FossilOrigin-Name: 263f6d3a7784ef7d032dbf7a3265aca8dd70bf50797f28f6b2e8ddb6a301f83a * New test cases for insert/set/replace with paths that indicate substructure that does not yet exist. FossilOrigin-Name: 146c717c51940b2139befc45ac74e7a1c36ef3c32fd3cfe35b334488eebe6298 * New JSON test cases showing insert or set with missing substructure. FossilOrigin-Name: 6802b6459d0d16c961ff41d240a6c88287f197d8f609090f79308707490a49c2 * Simplification of the new JSON insert/set test cases. FossilOrigin-Name: 04c0d5644372446c924a2e31a26edf51ddc563a1990d170b0ed4739e3e8b239b * Enhance json_set() and json_insert() so that they create missing substructure. FossilOrigin-Name: cc7a641ab5ae739d31c24f0ad0caeb15a481a63fa8f13720718ea922c25862ff * Convert json_type() to use JSONB internally. FossilOrigin-Name: 83074835b900ce85cf67059e674ce959801505c37592671af25ca0af7ed483f1 * Add a basic batch-mode SQL runner for the SAH Pool VFS, for use in comparing it against WebSQL. Bring the WebSQL batch runner up to date, noting that it cannot run without addition of an "origin trial" activation key from Google because that's now the only way to enable WebSQL in Chrome (that part is not checked in because that key is private). Minor code-adjacent cleanups. FossilOrigin-Name: 883990e7938c1f63906300a6113f0fadce143913b7c384e8aeb5f886f0be7c62 * Convert json_valid() over to using only JSONB as its internal format. FossilOrigin-Name: 7b5756fa6d00b093bf083a8d7a5ef5485f7a09e4eac473785c8380688f861a1b * Remove all trace of JsonNode from the JSON implementation. The JSONB format is used as the internal binary encoding for searching and editing. FossilOrigin-Name: 11ebb5f712cc7a515e2e0f2be8c1d71de20c97fe5b74c4f4d72c84fd21182d35 * First attempt to get the JSON text-to-binary cache working. All test cases pass, but the cache seems not to help much. FossilOrigin-Name: 25ed295f300fea6185104a73721076bccd2b2a6e411c78564266fa6dca4ff70c * Cache is working better, but does not preserve the hasJson5 flag. FossilOrigin-Name: a12add7ab9f5aee5bb2ede0c4d22e599dd28f7a107dce72b2ea48ef92d233e8a * Fix up the JSON cache to work better. FossilOrigin-Name: 1fdbc39521f63aedc6f08ecaafa54ea467b8c6316a692a18ad01eecbf22a0977 * Different approach to querying a tokendata=1 table. Saves cpu and memory. FossilOrigin-Name: c523f40895866e6fc979a26483dbea8206126b4bbdf4b73b77263c09e13c855e * Remove old code for tokendata=1 queries. FossilOrigin-Name: b0a489e8e1bf0290c2117ab32d78b1cc7d67bcb226b55ec044c8367ebde3815b * Performance optimization in the JSON parser. FossilOrigin-Name: 68d191f40e708962ec88e0c245b4496bc4a671300484b1cc0f3fc7e6d199a6e6 * Fix harmless compiler warnings and enhance performance the parser. FossilOrigin-Name: 285633da6d188547e52f07779e209c9e5f3dc33ce0668e14858f3337889ef4b8 * Unroll a loop in the parser for a performance increase. FossilOrigin-Name: a6dc29e4d5e13949e0fcd9d5dde575c2670eb10a230ab9df3806fc8c3016c540 * Remove a NEVER that can be true if a virtual table column is declared to have a DEFAULT. See [forum:/forumpost/3d4de8917627d058|forum post 3d4de8917627d058]. FossilOrigin-Name: 8abc2ccaf8106f20243568cd7fa74174386eb85d7ea381201e97e2fd527033e0 * Simplification and optimization of the JSON parser. FossilOrigin-Name: f5ec9485119a2a6cb33eb864c7ca9b41d4a2ed08ab6ad9a6b0dd9358ab253576 * Performance optimization in jsonAppendString(). FossilOrigin-Name: fdf00e96239c73fb67e2acecc5b95f55a1fc51c3deed4512613c0d6070ce5805 * Minor fix to the header comment on jsonXlateTextToBlob(). FossilOrigin-Name: c3677ba410208c07b711f5f526eb5cf039a8eee49f632c7ae04fa55cdfbb9058 * Fix potential unsigned integer underflow in jsonAppendString(). FossilOrigin-Name: d2fba2cbdc3870d34228c1a9446eced884325acc183900d7dd0b96132570fb4a * Do not allow a JsonParse object to be considered "editable" after an OOM. FossilOrigin-Name: c6bacf57bd6fe0fee00c9d41163a270b60997c20659949971bbf5c6c62622bfe * Protect a memcpy() against OOM conditions. FossilOrigin-Name: 26144d1c25ae0435db568009ba05e485d23d146f2b1f29f3a426c87860316aed * Ensure that tokendata=1 queries avoid loading large doclists for queries like "common AND uncommon", just as tokendata=0 queries do. FossilOrigin-Name: 7bda09ab404a110d57449e149a3281fca8dc4cacf7bd9832ea2a1356ad20fe8e * Take extra care to ensure that JSONB values that are in cache are actually owned by the JSON subsystem, and that ownership of such values is not handed back to the bytecode engine. FossilOrigin-Name: 1304534001e9ef66c6b12752b69d790bfa3427cc803f87cc48ca22ae12df0fdf * When tokendata=1 queries require multiple segment-cursors, allow those cursors to share a single array of in-memory tombstone pages. FossilOrigin-Name: e0175d07e4094db5ea4b0378a5ff480dafb6ba9da86a113fa767c4c89c3c866f * Fix harmless compiler warnings. Refactor some identifier names for clearer presentation. FossilOrigin-Name: 7e3941502789c5afaf19b08112f464abf5e3cba7f92fc9290af2a0f96127ad9a * Code and comment cleanup. Everything should work the same. FossilOrigin-Name: c640754df0d3ffdad994745f0d0e10c8f19f424b87f6a6e6e269491a0350b950 * Fix various compiler warnings and other problems with the new code on this branch. FossilOrigin-Name: 3a623cfa173b4035c759cb84985d11d8727053beb383648503987d6ab15c0ef0 * Fix harmless compiler warnings reported by MSVC. FossilOrigin-Name: 419652c0c82980bd043584dcd2976f91dfff7b926b216d597698299850b855c0 * Implement strict JSONB checking in the json_valid() function. FossilOrigin-Name: 0f26d38880fcbc207abcc94dbc170a7428bab1b4f0b7731aaf5bee0224000994 * Minor code changes for consistency and to simplify testing. FossilOrigin-Name: df272bd837910ad9e03e222716a1201a601399664365f1dcf73d5932372518ed * Do not let bad hexadecimal digits in malformed JSONB cause an assertion fault. FossilOrigin-Name: 8dec1ba1e5076ff596756e00c1e2ada0245f168a503dd1cadadf848331acfac3 * Enable incorrect JSONB to be rendered into text without hitting an assertion for a bad whitespace escape in a string. FossilOrigin-Name: 4d6a9a217df6792b41766b774fb0c0553b45f9104c26a0955bf4a30862d7d7bf * Ensure that OOM conditions in the generation of the "bad JSON path" error message result in an SQLITE_NOMEM error. FossilOrigin-Name: aa0e02b5c26a2ef3d6216a0ed8bc01382be43173485f898cb63f2a8c559f2e74 * Avoid problems when the path argument to json_tree() contains embedded U+0000 characters. FossilOrigin-Name: 9f055091af01a5dddba1a7e9868ad030c8f206237e1569215cb161e53e54aa71 * Remove dead code. Improved reporting of errors in JSON inputs. FossilOrigin-Name: 2eaa738e6b5c1b67b3e57c868d9c3a30eea38a0b3b8b02482f06d57a45b10921 * Back off on the use of strlen() for situations where sqlite3_value_bytes() will work as well, for performance. FossilOrigin-Name: 79fb54fbb8b9c30f47cdbd437d24a21542716241e822749e5e28c9fbc449bfa8 * Better pre-scan size estimations for objects in the JSON parser resulting in fewer reallocations and memmove operations. FossilOrigin-Name: 526b27f90897f5e35dfff7257daf6c4ce4798d649b09b8aecfb02df0449e3c51 * Repair issues and inefficiencies found during testing. FossilOrigin-Name: ae973cb1515f9d76409c92a2ca2ffd6b71f32b0b490a4886770e7c1b90f12611 * Add tests for using tokendata=1 and contentless_delete=1 together. FossilOrigin-Name: a2506b8c9718054912270055638204753c4156bbc115e55194e6df9d7e76cb10 * Two new NEVER macros. FossilOrigin-Name: 52632c92cb06faf0e804654b3490fd6c199521107bd30c8fcbc3a2a5a488098f * Remove reachable ALWAYS and NEVER macros. FossilOrigin-Name: f601de3eeabd85993c1f5ee96b62de6fdabbeae2fe8950e00d08feb48d42c498 * Fix bug in xInstToken() causing the wrong token to be returned. FossilOrigin-Name: da78d07e77cbc783fbc725758911c230fd6a1c1885d9576125de955dcc2bd37f * Continuing simplifications and code cleanup. FossilOrigin-Name: ddf92b5059a9106753fd18b82ba8daa269a62af947561c460790107b83416f0b * Fix a problem with the xInstCount() API and "ORDER BY rank" queries. FossilOrigin-Name: 317a50563d9e8586fda136e513727241b414e7267d50a06571c8ebd0eae710bc * Fix memory leak in new code on this branch. FossilOrigin-Name: ebc160b9a05568df66f86e30804399ee29d34b44a60c57e062f98cb92826353f * Fixes for xInstToken() with tokendata=0 tables. And with prefix queries. FossilOrigin-Name: 78fbb71598b1ca756acc078253880a1d0f7983a5a26b9efc683e6488122505a1 * Fix errors in rendering JSON5 escape sequences embedded in JSONB. FossilOrigin-Name: f1a51ae3863557526a51c6e98e71fcdf4f1ed14a36212b3c90f7408f926345e4 * Do not make the input JSONB editable in json_remove() if there are no PATH argument. FossilOrigin-Name: 66594544f3ba9977475a3e3f74404eb2b2fb845053b28bd24c2b52c7df94e9d7 * Fixes to error handling in json_array_length(). FossilOrigin-Name: aa85df2d26b74c171c55bde19ef17c4f11f40b8af7181bbf7162f87cdea7e88b * Add further tests for xInstToken(). FossilOrigin-Name: 8582707f16133f003a6687f68cbea03d4eb6c2a0e2e07746b7cace0c44e84fa4 * Rename the internal routine jsonMergePatchBlob() to just jsonMergePatch(). FossilOrigin-Name: ebf667b616235bb64b83832008342ba5e7b10b2c170d7cebc431f040fef7ecfb * Fix OOM and corrupt JSONB handling in json_patch(). FossilOrigin-Name: 1910feb0b7d5cc2b810c3322f6cca281d8730182d30d162bd7bb56800979ea91 * Use an assert() to fix a harmless static analyzer warning. FossilOrigin-Name: a249ca657e624028bc6b3d2c2bcedd7162d118addb7d62ce519920cecebf1860 * Clean up the JSONB performance test script. FossilOrigin-Name: 905301075a7fc1010ee7e754867b1b698c9b8576d50e98125def32a5dfb7ee9d * Small performance gain by unwinding the string literal delimiter search loop in the JSON parser by one more level. FossilOrigin-Name: 4c587feac153e8ebe526559ec3d254f545f81e8d1ed3126f91a5ff25ec4aa72e * Use strspn() to accelerate whitespace bypass in the JSON parser. FossilOrigin-Name: 843197df08352bdff4b87be91d160e574572aded0d0c66142fd960000c0b4701 * Miscellaneous comment cleanup and typo fixes. FossilOrigin-Name: 59446dc0bd0091572122a3c8b4653d7a2dc867d16c4a5919f79b81bc3a673ce3 * Further tests for the new code on this branch. FossilOrigin-Name: 59d008b6c23ab900377bc696ee19381feb7614bac80546eae361e401c3620c4e * Use extra assert() statement to silence harmless static analyzer warnings. FossilOrigin-Name: 174c2b2eef5fecd96a5fc89b81032fe81f7801f12097cea10e7e7f0a02114813 * README.md typo fix reported in the forum and update all links from http: to https:. FossilOrigin-Name: 5c48acdbb44185b352b54911a57a6986d6c7e624bdeba2af48b985d29f0292bf * Increased rigor in comparisons between object labels in JSON. FossilOrigin-Name: 2bc86d145fccc07107b7753cb1a69122676d4096fe59c454497bd81a6142d45e * The rule for the RHS of the ->> and -> operators when the RHS does not begin with $ is that it must be (1) all digits, or (2) all alphanumerics, or (3) contained within [..] or else it will become a quoted label. FossilOrigin-Name: 0e059a546ec11fa5c6d007bd65c249ee2422f1facbdb2792c53e0bc0ccc97e14 * Test cases for object label matching with escape sequences. FossilOrigin-Name: c6f2aa38e95b7888650cfa7bb773b18a28e01d883033ac77be6d504ffe417d18 * In CLI, move -interactive flag handling back to arg-loop pass 2. FossilOrigin-Name: 63cb05a862532d2d56e9e81fe32ced09bf58f03146587a118f11c2a84e195e69 * Fix the routine that determines the json_tree.path value for the first row so that it correctly takes into account escape sequences in the path argument. FossilOrigin-Name: b9243ee8a37c62eb8848e765bd4af83bc1b3d3eb24fb4268a1357ad1f8b2e1fb * Correctly handle 8-byte sizes in the JSONB format. [forum:/forumpost/283daf08e91183fc|Forum post 283daf08e91183fc]. FossilOrigin-Name: 73d390f39c0bbbc017e01544e4d43c76761f2599bd57f900131c706270dfd202 * Update documentation comments in fts5.h. FossilOrigin-Name: 38c50e22c98607e6c1fd78d7615cda534773b6d4fd85c712b54749fcd7af0c83 * Work around LLVM's newfound hatred of function pointer casts. [forum:/forumpost/1a7d257346636292|Forum post 1a7d257346636292]. FossilOrigin-Name: ec0ae4030968c782af48d1c776351c14b2ada21d40aeb97915f33df30706e18f * Fix compiler warning about shadowed variable in fts5_index.c. FossilOrigin-Name: ee70e4c1c9c41617850228e48d8df44f105cf2fbbe789340ceca6f27ad6ce5eb * Improved detection of corrupt JSONB in the jsonReturnFromBlob() function. FossilOrigin-Name: b014736c1f80ccc46fb4b24ac04310a6ce5cb5b6653665efff366cb3bc742257 * Add ALWAYS() on branches added in [ec0ae4030968c782] that are always true. FossilOrigin-Name: 451cef8609e96dd9244818adc5c6f240544694bcb4ae620e88f90e403e59d70f * Rework the jsonEachPathLength() routine in json_tree() so that it is less susceptible to problems due to goofy object labels. FossilOrigin-Name: 858b76a00e8ff55215f7a2e6a4cd77fc4d4f98dea7224cd90488744f5ce246a4 * Different fix for the fts5 COMMIT-following-OOM problem first fixed by [fba3129d]. This one does not cause problems if an fts5 table is renamed and then dropped within the same transaction. FossilOrigin-Name: d8c6b246944934a7a6e027b3f5b986fd64a19dd5c5c5175f4ea8586da59a6764 * Fix a problem with handling OOM and other errors in fts5 when querying tokendata=1 tables. FossilOrigin-Name: bc911ab5953532956510c199be72b1d3c556f2d0ddbd7fc0ae6f5f917b337b48 * Fix a null-pointer dereference in fts5 tokendata=1 code. FossilOrigin-Name: d69fa8f0504887f968d9a190ecb889ddb40bb1b56d0d4479f9819c106aec719b * Avoid an assert() failure when querying an fts5vocab table that accesses a tokendata=1 fts5 table with corrupt %_data records. FossilOrigin-Name: 386ba9e20423fb2f623d6adc9d3c310fb1b135f54a1dad15ef3b593d97886926 * Ensure an fts5vocab table never uses a special tokendata=1 merge cursor. FossilOrigin-Name: 1e26510e83b40c9bd2e8bfa2a0e81f2cb915e78fed773204ef537683e48b61dc * Avoid dropping an error code in new fts5 tokendata=1 code. FossilOrigin-Name: a66596e33dc9aa4bab2ec3ff45546e1321d0a11bdc764f8381b315292ca92423 * Fix a harmless compiler warning about "confusing indentation". FossilOrigin-Name: 34f9e9a8c4bea13f60f43062e25cd7d9422f2e7f5b371ed0ddadc9abeb3ca256 * Fix a potential problem RCStr access on a JsonString object that is not really and RCStr. Fuzzer/UBSAN find. FossilOrigin-Name: d2f2174ce2cc89606034e158149a2d05fc3627ec4d5cdb772add7a2250f29d78 * Fix a harmless UBSAN warning. FossilOrigin-Name: 1503cba6d17e9bade7a5c103ddd23241ff4741f9a2e3032ffe2987af243dae65 * Fix a potential use of uninitialized value in json_valid() with 2nd argument of 8. FossilOrigin-Name: fa102036fe46eeb71b7df3e265be1935ae5c78e0b939b08841bcfb8abadbc77a * Work toward enhanced functionality for json_valid() with deep checking of the JSONB (second argument has bit 0x08). FossilOrigin-Name: c370d573198b151767f04e91bf8baa4ae0076751ae468c5709742a0b0ed16770 * Add SQLITE_TESTCTRL_VALIDATE_JSONB, which if enabled under SQLITE_DEBUG causes cross-checking of generate JSONB. FossilOrigin-Name: b410a4db74a650003539ffaaea18519d5159b504daac47db6a4874b730f40ac8 * Rename the new test-control to SQLITE_TESTCTRL_JSON_SELFCHECK. Make it so that the current value of the setting can be interrogated. FossilOrigin-Name: 7aff1d9a4cb17ecd5abab21ab032f35a78741dd641ddd8cbcc85fc4a81a0707d * Activate JSON_SELFCHECK within fuzzcheck. FossilOrigin-Name: 4d14e733bb521aed65e98533969d2303738232ae87dab70fdf7962e6513195f5 * json_valid(*,8) allows minus-signs on hexadecimal literals. FossilOrigin-Name: c0d7f4520d839a268b3fd2474d0897a9832aa608bd6238b3e287fabecf07a350 * json_error_position() now uses jsonValidityCheck() to find the approximate position of an error in a JSONB blob. FossilOrigin-Name: c3d60cf7028a333b825d5b89516945a73e0c158ac81d8bcc117d21bfd98602c8 * The json_error_position() function now reports an approximate byte offset to the problem in a JSONB if there is a problem. FossilOrigin-Name: 80d5d94dff6a2d2557039be3d7d47c1a6003c4b98defe0bd411acfeb963ad5dd * Validity checking of text nodes in JSONB. FossilOrigin-Name: fa5160687c2f970d407e8af73c246f7cd806bb4ce35f29a79ac534a8646a6c8e * Improvements to JSONB validation - catch more cases where the input does not conform to spec. FossilOrigin-Name: be1864eac4eb75cc30bf98f73092c8608467f4bd956240df6a0cbea9f1e09e85 * Add NEVER to two unreachable branches in JSON. FossilOrigin-Name: c96ebb086feb89341565cc52b970ae7799ce1327fe1ad4fc790f1b0dcaa6e229 * Worker1 Promiser API: when multiple db connections are active then use the requested connection instead of always the first-opened connection. Bug reported in [forum:894c330e7f23b177|forum post 894c330e7f23b177]. FossilOrigin-Name: 194276e18e0268829061c09317e7f9f527a703eb45f1755ff1dd30bd99dc1b68 * Fix the JSON object label comparison object so that it works correctly even if the label ends with escaped whitespace. FossilOrigin-Name: 4d5353cadd7b7c5f105bc197f3ec739e2d041472d6b3e939654c9f9cfc2749ae * Improvements to UTF8 handling, and especially the handling of invalid UTF8, in the JSON routines. FossilOrigin-Name: 1b229c1101d6c384a30f343c5e47b471ab084b2d8e81170eb8f642afc1c67e3b * Bug fix in the JSONB validator. dbsqlfuzz ac6fa521a08609a642198e7decf64180e750b3c4 FossilOrigin-Name: 3e940a6a08b0a0434650cd3d8dd286e09ad8ab805b0a4d515e57bba5d3608577 * Avoid invoking sqlite3ExprColUsage() on an unresolve column reference. dbsqlfuzz fc34aa62df4de103705d11b807074687ffafbda5. FossilOrigin-Name: ac9314c0e335694b48c613145f5397247bb88c51806cd0dc3ed4ec306db4bbad * In CLI, fix .read inability to open 2GB+ files on WIN32. FossilOrigin-Name: 56c80a62d2e033d64ba5d545ae9cbe3ed7c9d046c0a3fafb6cfa2f0b562d1ef0 * Pass subtype information through the aggregate ORDER BY sorter for aggregate functions that use subtype information. FossilOrigin-Name: 3536f4030eab6d650b7ed729d2f71eb6cc3b5fbe16b4e96b99008d66522aaccb * Improve the error message returned by an fts5 'rebuild' command on an external content table if there is a problem with the content table or view. FossilOrigin-Name: 0fbf4b8a58fde1c187908934da6f59999b146f32e07ac255cc531c5c4d7007fd * Fix harmless compiler warnings in JSON and FTS5. FossilOrigin-Name: 90135efccfeb1046f002bfcbd8dfec9a1a3b40cbe1b5c714ae065b06368e354f * Add assert()s to FTS5 to fix static analyzer warnings. FossilOrigin-Name: 27d4a89a5ff96b7b7fc5dc9650e1269f7c7edf91de9b9aafce40be9ecc8b95e9 * Use SQLITE_STRICT_SUBTYPE=1 by default for the JNI and WASM builds unless they're explicitly built with SQLITE_STRICT_SUBTYPE=0. FossilOrigin-Name: 990211357badf0ab08bd34cf6d25b58849d0fd8503e289c1839fc837a74e1909 * Correct --enable-sab flag in ext/wasm/GNUmakefile to fix a silent alhttpd args-parsing error. FossilOrigin-Name: 7b9b757d872a31395b0f6454e2309a6a4664b8bdd8749f6a15371cbe72c05b60 * Avoid running the "no_mutex_try" tests with SQLITE_ENABLE_SETLK_TIMEOUT builds as part of the release test. FossilOrigin-Name: 6b4e1344a28c213cbe8fb97f7f3f6688de93fb73ed96bf460ff74c959da1a712 * Do not run test script fts5origintest4.test with either "memsubsys1" or "mmap" permutations. FossilOrigin-Name: 05a63d9603ef42cbee6dadff72d97583a9c78e549f70e9a808534d5c1ae7c28a * Fix a new JSON test case so that it works even if SQLITE_OMIT_VIRTUALTABLE is defined. FossilOrigin-Name: b995aae510888a9746b46545d176a0885d4738e1f1bc0b7ad7937ed023efd7d6 * Add mention of --buildonly and --dryrun to the testrunner.tcl usage screen. FossilOrigin-Name: 23b92d915c12ee768857e2c3c961832f390cad9b53b8bcfc2b97664baab25bb7 * Avoid expiring prepared statements in the middle of an integrity-check. FossilOrigin-Name: 88beb48472da4667c0727c8ebabe046ea526450ff837fe789d041ed3f1ff105e * In the count-of-view optimization, deferring freeing obsolete parts of the parse tree, on the off-chance that some other part of the code might be holding a pointer to those parts. FossilOrigin-Name: da442578856c87137eb1677d9b13b7c1cf15828cc41d4756572b278060f69bae * New test case based on Chromium bug report 1511689. FossilOrigin-Name: 2c7ef4b4d215f99f8d6787adb64e2037ae96e5dd6cb49c8b81634249f5e1b328 * Enable SQLITE_STRICT_SUBTYPE for default builds of the shell, fuzzcheck, and testfixture. FossilOrigin-Name: 5a0c517ed7e46c0f8a3db752cf5b9f8010c60f35084606abe9e7c1c4f993b4a7 * Enhancements to the "randomjson.c" extension. Automatically load that extension into fuzzcheck. FossilOrigin-Name: 70620405ab01d6a5d38bafa9ae175fd6e4eabaf2efb7854734278dafd7b05c99 * Enhancements to ext/misc/randomjson.c. FossilOrigin-Name: a4e6d1f86f3a502e4170f5a90031e269e48363e95114a66b84d373e3ce0b2704 * Bug fix in the randomjson.c extension. FossilOrigin-Name: 1f3a33df530dbe330ea8b14a69369b807b413b25a167d1a3938f8f0faf97cc91 * Ensure that all object labels for individual objects generated by randomjson.c are unique. FossilOrigin-Name: 29c46aca231b3f1e997ef306a5a651408185bf3ad09ab9fc1fe21ed18caa4d02 * Add randomjson.c to testfixture. Use it for a new set of invariant tests against JSON functions. FossilOrigin-Name: f1c040606bfe784804134d8f3ca130908fad5212b47e3c32792baab977470943 * Ensure that the insert/delete size delta on JSONB objects in the JSON cache are always set to zero. FossilOrigin-Name: 4b4581668a908473dbf1322a3e98bc7cca122998c44518ea183af7f0d1ba9f95 * Fix JSON to JSONB translation so that it deals correctly with Infinity and NaN. FossilOrigin-Name: 178cb84f36bdb45ba17511900d6d8ea8dfa14912fc5bf7094a20348174a36c95 * Add NEVER() to an unfalsifiable branch. FossilOrigin-Name: 9a0c67db366d38a0b0741f6a1ae333cf27cfe6f6b7c6eed94bdec9686f9f9f8a * New JSON invariant test cases. FossilOrigin-Name: a6a1367b0bf364b1a2e20e153c5f4a578624b8846f9ec0b7c9c3cba0ea2ec346 * Remove a stray comment in the JSON code. FossilOrigin-Name: 6618bdf0679405b43911ea8cd94050b12a5dc469f3dfe4759ee3ff850a55229e * Extra ALWAYS() macros to verify state in the sqlite3ExprCanBeNull() routine. FossilOrigin-Name: be19b84c9f3fe127165809908add148dbe9a827a55608b0490de7e69b7f7f191 * Always make the sqlite_dbdata virtual table available in the CLI. FossilOrigin-Name: e5fd3b32ad87586a7413570e568c9c1859a37a4f836cca074126471b125fb682 * When unable to resolve an identifier, change the Expr node into TK_NULL rather than TK_COLUMN, to prevent any downstream misuse of the non-existent column. dbsqlfuzz 71869261db80a95e4733afa10ff5724bf3c78592. FossilOrigin-Name: d2e6117e4f97ab98b01deb5fcad5520f8181d00bed8d904d34963c01d73df857 * Test case for the previous check-in. FossilOrigin-Name: df5a07e1a5122e08c2fa6076ac08adb2820f997ee11dd88b84863666899dfb57 * Ignore COLLATE operators when determining whether the result of a subexpression should be shallow-copied or deep-copied. FossilOrigin-Name: 34ae36a45e814bed7c8340412c7ef3fc849b82357656d0eb5f0f805e59d846d0 * Add ALWAYS() and NEVER() on branches made unreachable by recent changes. FossilOrigin-Name: c50e6c2ace49d0928b05cbfd877c621e9a0f77dc4e056ccb1dbe5cf118a00d00 * More precise computation of the size of data structures in the query planner. Response to [forum:/forumpost/7d8685d49d|Forum post 7d8685d49d]. FossilOrigin-Name: 0c8d88e41167ea92341dd1129be01b596a73f46bdcd5b0dd931441a979c013d0 * Fix harmless compiler warning in the randomjson.c extension. FossilOrigin-Name: debe7060b16669ada7304ffb9bf7616c8fa30bd286d8be871ed17fd6d64a3d4c * On second thought, we don't really need sqlite_dbdata accessible to the CLI. FossilOrigin-Name: 36fe6a61ef8fb393281a5e15119d716521219c7b971fbfd63bdea07d27a78ac9 * Remove redundant conditional from sqlite3ExprCanBeNull(). FossilOrigin-Name: 257f96a2d22c605885fa66220c28cf7dc5941c330bccee3f132b9e7b70d89d30 * In JSON - minor code cleanup and refactoring with a small size reduction and performance increase. FossilOrigin-Name: 215fabda38daecdbd38b1eca5a6aafbc61b6a36a8303f1d7164d5a1138e63134 * Avoid harmless integer overflow in pager status statistics gathering. Response to [forum:/forumpost/7f4cdf23f9|forum post 7f4cdf23f9]. FossilOrigin-Name: 206d8c650d937bc700946c40a82a62ea6bc4a80e5f3fb42d0ae2968de25f0644 * Fix SQLITE_ENABLE_SETLK_TIMEOUT assert() statements in os_unix.c to avoid reading past the end of the unixShmNode.aMutex[] array. FossilOrigin-Name: 029a05cd2928d43d81e4549cce5388c432e2c9e75e3fa0b2fe6e91021b2fb9ac * Add internal core-developer-only documentation of the JSONB format. FossilOrigin-Name: 4d30478863b2a60512010de9ec6e3099bfaf75d4afee20acec536713fe94334d * Add a new comment to debugging output routine sqlite3WhereLoopPrint() to remind us of what the various fields of the debug output mean. No changes to code. FossilOrigin-Name: da5f34fd4052432b1ae27bb12e56b358cdc5c1282653d60ed0f0fe62f727e4ee * Fix a usan complaint about signed integer overflow. FossilOrigin-Name: e65907e0279f4814ec957f0790777d8b94a86926cd27c52442b311b27efc0185 * Update #ifdef checks in pager.c and util.c to account for [0462a2612d1fc1d0] to resolve the build problem reported in [forum:9819032aac|forum post 9819032aac]. FossilOrigin-Name: 0f22d809a1c6c80e381f6bcd931fe4ec36dca0e28d07ab4f4f7f83c813424f60 * Add the -fno-sanitize-recover=undefined to the sanitizer builds used for sdevtest and release testing. To ensure that any test that provokes undefined behaviour fails. FossilOrigin-Name: 89563311adb0ab7c7a3eadb11c2e27fbca50c56fce8ca616628facbc00d72b88 * Change parameters on a debugging function to include "const". FossilOrigin-Name: 94c3e1110c6590261bd30ba317fba4dd94023d69b81a94f4b216cce748fe7489 * Add debugging output routines sqlite3ShowWhereLoop(X) and sqlite3ShowWhereLoopList(X) that can be invoked from a debugger to show a summary of the content of a single WhereLoop object or a list of WhereLoop objects. No change in release builds. FossilOrigin-Name: 5db30bcc338aac1cf081de2deec7e60749ae012e2b6f95ccf745623adb4a31dc * Improvements to the query planner to address the inefficiency described by [forum/forumpost/2568d1f6e6|forum post 2568d1f6e6]. FossilOrigin-Name: 72fcc12cda910a0e3f7875eb3d117b2a5608705c97703985427a02960f1ab5c5 * Avoid signed integer overflow during integrity_check of FTS5. FossilOrigin-Name: 5937df3b25799eceaadfb04d7226c9995d44c8d8edb5ac3ad02af9d7e3570726 * Fix harmless compiler warnings associated with [5db30bcc338aac1c] FossilOrigin-Name: e55d1c2333f35fc20615aa83a7843d08cae7945710a2156d44eee0cc37d90ade * Remove an ALWAYS() added in [c50e6c2ace49d092] because it is sometimes false. dbsqlfuzz c393a4f783d42efd9552772110aff7e5d937f15e. FossilOrigin-Name: b9daf37e57cde12c4de271a2b1995e8e91b6411f8c2e8882e536241929609b3a * Improved handling of malformed unicode within JSON strings. FossilOrigin-Name: e252bdf5f5de26ba8e2bcc6b0ad94121ed6fc4d86c02fe4a2a058ada93747beb * Ensure that the xColumnText(), xQueryPhrase() and xPhraseFirstColumn() APIs all return SQLITE_RANGE if they are passed a bad column or phrase number. FossilOrigin-Name: 1a8a9b1c89519d265869251e8b6d3c5db733f0d3a7dea6c7962811a8f1157dff * Fix a problem in the shell tool (not library) causing an out-of-bounds write if an ".open" command failed, then the user pressed ctrl-c to interrupt a query running on the substitute in-memory database. FossilOrigin-Name: 026618b9e321576f616a32e41329066ba629814170c6cfeef35430343f5003f3 * Enhance the (undocumented, debug-only) json_parse() SQL function so that it returns the text rendering of the JSONB parse of the input, rather than printing the rendering on stdout. FossilOrigin-Name: 056de8d551dcbdf1d162e2db15ed418fa9c786f900cd3972ef8a1dea3f4f3aa1 * Fix harmless compiler warnings in FTS5. FossilOrigin-Name: 3cd5ef44e40570c357f913a9483fa1cd72e7f2827a5ed5826bff99febae213b1 * Performance improvement by unwinding a loop in jsonAppendString(). FossilOrigin-Name: 190ab3c08431a0ba24d76392eab251f5c1792add05e4ec780998b299208eca95 * Update fts5origintext4.test to work with SQLITE_DIRECT_OVERFLOW_READ. FossilOrigin-Name: 15ed002aed12556aeb9bbe537c4ba839f0c95bac65a69d03401b37cc3fd11b92 * Enable SQLITE_DIRECT_OVERFLOW_READ unless it is specifically disabled using the -DSQLITE_DIRECT_OVERFLOW_READ=0 compile-time option. FossilOrigin-Name: 630604a4e604bfb36c31602917bfa8d42c10c82966d0819932bf8f827b9158b8 * Minor doc touchup in the JS bits. FossilOrigin-Name: 8d2120c35425081e2158d6a8a6b083c4adf8d694046b2d98f5fd235520920432 * Use SQLITE_ENABLE_STAT4 in both the WASM and JNI builds. FossilOrigin-Name: 99d11e6d0ae687ff6bac5119027f7b04d5e7185214e79cf8c56289cfa809b0f9 * WASM: various build cleanups and add initial infrastructure for a build which elides the oo1 API and its dependents (worker1 and promiser). Sidebar: an attempt was made to move generation of the build rules to an external script, but the mixed-mode make/script was even less legible than the $(eval) indirection going on in the makefile. FossilOrigin-Name: 563d313163c02b398ae85b7c2ed231019a14e006726f09a7c1f294a58bf4363f * JNI: move the ByteBuffer-using APIs from public to package visibility for the time being because they have UB-inducing possibilities which need to be worked out. Update test code to account for a change in custom FTS5 columntext() impls. FossilOrigin-Name: dc501275fcfab3ad9b6ebbadf7588b225a9dd07a0abac5be83d96f15bfba99e9 * Extra steps taken to avoid using low-quality indexes in a query plan. This branch accomplishes the same end as the nearby enhanced-stat1 branch, but with much less change and hence less risk. FossilOrigin-Name: c030e646262fee43a59b45fdc1630d972f8bf88ac3c142b6bdaf4cbb36695a4f * Remove some unnecessary computations from ANALYZE so that ANALYZE runs with fewer CPU cycles. These changes were spotted while working on the nearby enhanced-stat1 branch. So even if enhanced-stat1 is abandoned, that effort put into it will not have been in vain. FossilOrigin-Name: 5527e8c4abb904b1a438ec1c353d4a960bf82faaf3a2c742af1df7c613850441 * Back out [99d11e6d0ae6] (enabling of STAT4 in WASM/JNI), per /chat discussion. FossilOrigin-Name: cd7929ee2e2c305475fa5a4dff2edaccf90067126ef04a1c2714cf464925453f * Update and clean up the in-makefile docs for ext/wasm. FossilOrigin-Name: 7a7b295e6d7e95ee4a46cc42761895d11700ab295870c5a4380072bb4a5b7099 * Elaborate on the various build flavors used by ext/wasm/. Doc changes only. FossilOrigin-Name: d489232aa492618d4c8e5817addb2323d0ca067742d7140216914239a66fb221 * Increase the default "max_page_count" to its theoretical maximum of 4294967294. FossilOrigin-Name: ffb35f1784a4305b979a850485f57f56938104a3a03f4a7aececde92864c4879 * Fix a problem in fts5 caused by a COMMIT involving fts5 data that immediately follows a ROLLBACK TO that does not. FossilOrigin-Name: 55c61f6a8d6a1bc79497b05669beac5c5397b06382bf24b6bec54845962d219b * Adjust the sqlite3PagerDirectReadOk() routine (part of the SQLITE_DIRECT_OVERFLOW_READ optimization) to use less code and to be more easily testable. FossilOrigin-Name: eed670ea2a9424f7df4eeb01c152fc38f7190a5e39aa891651b28dc91fcdc019 * Back out [b517a52fa36df0a0] which is no longer reachable due to early error detection enhancements in [166e82dd20efbfd3]. FossilOrigin-Name: 704943e96f2620b99260667ac9922c2f72bc3e92e2dfe1d9c2a91c7b704564d9 * Update the sqldiff.exe utility program so that it uses the sqlite3_str string interface, and so that it does console output using the ext/consio extension. FossilOrigin-Name: 4443b7e592da97d1cb1b3b79ed0559452d8057a33aba4d184c2fffbf200e05f5 * Enhance sqlite3_analyzer.exe so that it uses the ext/consio extension. FossilOrigin-Name: 769de0b98e136e4a0945b80216d0c9583c1ccd9de69cb0494875c2300e172646 * Change a constant from decimal to hex to avoid a compiler warning on Mac. FossilOrigin-Name: e3acb8a43ad544fd5b5341058276bd3b61b6bdb6b719790476a90e0de4320f90 * Convert the JSON functions to use lookaside memory allocation whenever feasible, to avoid hitting the global memory allocator mutex. FossilOrigin-Name: a79a244954f728596da3c0e28fa3b887258d1bd831f53881970f418f3fba84c7 * Fix a #ifdef in sqlite3_test_control() that was preventing builds with SQLITE_OMIT_WSD. FossilOrigin-Name: d546a9c94caf7408cc6e4530ec190d3a13fae09dc15b71b03d6369e02ee62abd * Restructure some code to fix what appears to be a false-positive UBSAN warning. FossilOrigin-Name: fe952c12903ea2150880c8bb57cda2efc00ce9fa801568a68c619e0745f30567 * Avoid errors with SQLITE_OMIT_VIRTUALTABLE builds in json106.test and unionall.test. FossilOrigin-Name: 90e8a233549a2d31e6959ce3fec927693b772ab3c0abce65e81d7350d2ca5cc6 * Update extension ext/misc/totext.c to avoid both ubsan warnings and dubious real->integer conversions. FossilOrigin-Name: c626aa108a7a30cef54af8d93ac9e45749568ed38e4e06623a6bad6b4bf6e8ec * Update JSON performance testing procedures for clarity and to describe how to do performance testing of JSONB. FossilOrigin-Name: b115b4f75bc7c4e6d9bab5edf13297f27a36f30083c80d2c502b01208da5dfc0 * Ensure that SQLITE_PROTOCOL is not returned too early when a SQLITE_ENABLE_SETLK_TIMEOUT build fails to open a transaction on a wal mode database in cases where blocking locks are not being used. FossilOrigin-Name: b934a33671d8a0190082ad7e5e68c78fe0c558d102404eafc1de26e4e7d65b92 * Updates to RTREE to facility testing. FossilOrigin-Name: 7a5b42ff74882c58493dc8b710fde73d4ff251f5d42271d84be73ceaabc01698 * Remove an ALWAYS() from RTREE. Dbsqlfuzz found a way to make it false. FossilOrigin-Name: 40f0a29e6dd90fcb969d7c0e49728ba0ee8f31d9e8f502b9a21469620a8ad283 * Minor change to os_unix.c to facilitate 100% MC/DC testing. FossilOrigin-Name: 0dfa7b4da134db281c3c4eddb4569c53a450f955f0af2f410e13db801aff4ea2 * Automatically turn off DEFENSIVE mode in the shell tool when executing scripts generated by the ".dump" command against an empty database. Add a warning to the top of generated ".dump" scripts that populate virtual tables. FossilOrigin-Name: 6e9e96b7e7afb9420110f4b93d10b945c9eadfde5e9c81e59ae9ee8167e75707 * Fix date on new file shell9.test. FossilOrigin-Name: c82da712113d5dcd63b764dbc68842026989627abc840acb4a33f3a4972b832a * Improved resolution of unqualified names in the REINDEX command. [forum:/info/74cd0ceabd|Forum thread 74cd0ceabd]. FossilOrigin-Name: 97709ce2a1f5ae05495e412ca27108048e5b8a63a1e3bca4be13933f7527da7b * Put an SQLITE_ENABLE_SETLK_TIMEOUT branch inside the appropriate ifdef with an assert on the else since the condition is always false if SETLK_TIMEOUT is not available. FossilOrigin-Name: d81e7a036ac5d70b6a6ee6ab7d81e041c1f5fc04b70bcee47e203d521caf7e93 * In fts5, flush the contents of the in-memory hash table whenever the secure-delete option is toggled. This prevents spurious corruption reports under some circumstances. FossilOrigin-Name: ccf552319a62bfb329820a3bc1f490bacbaa6e90694a257fc65a568a605542c3 * Fix a comment in sessions. No functional changes. [forum:/forumpost/8c20dc935b|Forum post 8c20dc935b]. FossilOrigin-Name: b0eb6d3628c1f70399a22d9fd3b79a796bc343adfeba50515440db609565961a * Have the shell tool automatically enable SQLITE_CONFIG_DQS_DDL when executing a ".dump" script against an empty db. FossilOrigin-Name: f47a5f4e0ce078e6cc1183e6cbb3c4013af379b496efae94863a42e5c39928ed * Version 3.45.0 FossilOrigin-Name: 1066602b2b1976fe58b5150777cced894af17c803e068f5918390d6915b46e1d * wasm build: reformulate an awk invocation to account for awks which do not support the -e flag. Problem reported on the forum via a docker-hosted build. FossilOrigin-Name: 90dd51153fd0a6197e2ee49b5492ad120f0bfc324b60651f3d4f47c286887b46 * When backing out a character in a constructed string in JSON, first make sure the string has not been reset by on OOM. FossilOrigin-Name: 950bf9fe7829864e0abe6d71ca0495f346feb5d7943d76c95e55a6b86ea855da * Ensure that the xIntegrity methods of fts3 and fts5 work on read-only databases. FossilOrigin-Name: e79b97369fa740f62f695057d4a2cf8dae48a683982ec879f04a19039c9cb418 * When a JSON input is a blob, but it looks like valid JSON when cast to text, then accept it as valid JSON. This replicates a long-standing bug in the behavior of JSON routines, and thus avoids breaking legacy apps. FossilOrigin-Name: 4c2c1b97dce46a279846380c937ac6de5c367927c6843516641eead7ea6db472 * Bump the version number to 3.45.1 FossilOrigin-Name: 54d34edb89430b266221b7e6eea0afbd2c9dafbe774344469473abc8ad1e13fd * Fix harmless "unused parameter" compiler warning in the new fts3IntegrityMethod implementation. FossilOrigin-Name: 9d459f6b50fb6f995e6284a0815c5e211cacac44aad0b96bf01ba68af97f51fc * In os_unix.c and os_win.c, do not allow xFetch() to return a pointer to a page buffer that is right at the end of the mapped region - if the database is corrupted in a specific way such a page buffer might be overread by several bytes. FossilOrigin-Name: d131cab652ac11795322af13d0b330e7e44ab91587a1a3e73fe7b9a14b2dd531 * Slight adjustment to test results for Windows in mmap1.test due to the previous check-in. FossilOrigin-Name: a8043eaed899285b5cf4aab0c23c3dabb8975910c353cb579fd1f1655db390f6 * Apply the same fix found in [99057383acc8f920] to descending scans. FossilOrigin-Name: 593d6a1c2e9256d797f160e867278414e882a3d04d7fea269bea86965eaa7576 * Automatically disable the DISTINCT optimization during query planning if the ORDER BY clause exceeds 63 terms. FossilOrigin-Name: 6edbdcc02d18727f68f0236e15dde4ecfc77e6f452b522eb4e1e895929b1fb63 * When rendering JSONB back into text JSON, report an error if a zero-length integer or floating-point node is encountered. Otherwise, if the node occurs at the very end of the JSONB, the rendering logic might read one byte past the end of the initialized part of the BLOB byte array. OSSFuzz 66284. FossilOrigin-Name: 3ab08ac75d97ffd9920f5c924362a4819560b40faa8a4f9100068057f5fa420a * Avoid a potential buffer overread when handling corrupt json blobs. FossilOrigin-Name: ac402cc551b2cbe3f8fbbc9c711a04942eab5eeb9d2f4a394e9370d2380427b5 * Detect malformed nested JSONB earlier and stop rendering to avoid long delays. FossilOrigin-Name: ab40e282465c989bf249453d7c6f60072a38b691f579411cdf9aad234b20f0f7 * Version 3.45.1 FossilOrigin-Name: e876e51a0ed5c5b3126f52e532044363a014bc594cfefa87ffb5b82257cc467a --------- Co-authored-by: drh <> Co-authored-by: dan <Dan Kennedy> Co-authored-by: stephan <stephan@noemail.net> Co-authored-by: larrybr <larrybr@noemail.net>
6188 lines
203 KiB
C
6188 lines
203 KiB
C
/*
|
|
** 2006 Oct 10
|
|
**
|
|
** The author disclaims copyright to this source code. In place of
|
|
** a legal notice, here is a blessing:
|
|
**
|
|
** May you do good and not evil.
|
|
** May you find forgiveness for yourself and forgive others.
|
|
** May you share freely, never taking more than you give.
|
|
**
|
|
******************************************************************************
|
|
**
|
|
** This is an SQLite module implementing full-text search.
|
|
*/
|
|
|
|
/*
|
|
** The code in this file is only compiled if:
|
|
**
|
|
** * The FTS3 module is being built as an extension
|
|
** (in which case SQLITE_CORE is not defined), or
|
|
**
|
|
** * The FTS3 module is being built into the core of
|
|
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
|
|
*/
|
|
|
|
/* The full-text index is stored in a series of b+tree (-like)
|
|
** structures called segments which map terms to doclists. The
|
|
** structures are like b+trees in layout, but are constructed from the
|
|
** bottom up in optimal fashion and are not updatable. Since trees
|
|
** are built from the bottom up, things will be described from the
|
|
** bottom up.
|
|
**
|
|
**
|
|
**** Varints ****
|
|
** The basic unit of encoding is a variable-length integer called a
|
|
** varint. We encode variable-length integers in little-endian order
|
|
** using seven bits * per byte as follows:
|
|
**
|
|
** KEY:
|
|
** A = 0xxxxxxx 7 bits of data and one flag bit
|
|
** B = 1xxxxxxx 7 bits of data and one flag bit
|
|
**
|
|
** 7 bits - A
|
|
** 14 bits - BA
|
|
** 21 bits - BBA
|
|
** and so on.
|
|
**
|
|
** This is similar in concept to how sqlite encodes "varints" but
|
|
** the encoding is not the same. SQLite varints are big-endian
|
|
** are are limited to 9 bytes in length whereas FTS3 varints are
|
|
** little-endian and can be up to 10 bytes in length (in theory).
|
|
**
|
|
** Example encodings:
|
|
**
|
|
** 1: 0x01
|
|
** 127: 0x7f
|
|
** 128: 0x81 0x00
|
|
**
|
|
**
|
|
**** Document lists ****
|
|
** A doclist (document list) holds a docid-sorted list of hits for a
|
|
** given term. Doclists hold docids and associated token positions.
|
|
** A docid is the unique integer identifier for a single document.
|
|
** A position is the index of a word within the document. The first
|
|
** word of the document has a position of 0.
|
|
**
|
|
** FTS3 used to optionally store character offsets using a compile-time
|
|
** option. But that functionality is no longer supported.
|
|
**
|
|
** A doclist is stored like this:
|
|
**
|
|
** array {
|
|
** varint docid; (delta from previous doclist)
|
|
** array { (position list for column 0)
|
|
** varint position; (2 more than the delta from previous position)
|
|
** }
|
|
** array {
|
|
** varint POS_COLUMN; (marks start of position list for new column)
|
|
** varint column; (index of new column)
|
|
** array {
|
|
** varint position; (2 more than the delta from previous position)
|
|
** }
|
|
** }
|
|
** varint POS_END; (marks end of positions for this document.
|
|
** }
|
|
**
|
|
** Here, array { X } means zero or more occurrences of X, adjacent in
|
|
** memory. A "position" is an index of a token in the token stream
|
|
** generated by the tokenizer. Note that POS_END and POS_COLUMN occur
|
|
** in the same logical place as the position element, and act as sentinals
|
|
** ending a position list array. POS_END is 0. POS_COLUMN is 1.
|
|
** The positions numbers are not stored literally but rather as two more
|
|
** than the difference from the prior position, or the just the position plus
|
|
** 2 for the first position. Example:
|
|
**
|
|
** label: A B C D E F G H I J K
|
|
** value: 123 5 9 1 1 14 35 0 234 72 0
|
|
**
|
|
** The 123 value is the first docid. For column zero in this document
|
|
** there are two matches at positions 3 and 10 (5-2 and 9-2+3). The 1
|
|
** at D signals the start of a new column; the 1 at E indicates that the
|
|
** new column is column number 1. There are two positions at 12 and 45
|
|
** (14-2 and 35-2+12). The 0 at H indicate the end-of-document. The
|
|
** 234 at I is the delta to next docid (357). It has one position 70
|
|
** (72-2) and then terminates with the 0 at K.
|
|
**
|
|
** A "position-list" is the list of positions for multiple columns for
|
|
** a single docid. A "column-list" is the set of positions for a single
|
|
** column. Hence, a position-list consists of one or more column-lists,
|
|
** a document record consists of a docid followed by a position-list and
|
|
** a doclist consists of one or more document records.
|
|
**
|
|
** A bare doclist omits the position information, becoming an
|
|
** array of varint-encoded docids.
|
|
**
|
|
**** Segment leaf nodes ****
|
|
** Segment leaf nodes store terms and doclists, ordered by term. Leaf
|
|
** nodes are written using LeafWriter, and read using LeafReader (to
|
|
** iterate through a single leaf node's data) and LeavesReader (to
|
|
** iterate through a segment's entire leaf layer). Leaf nodes have
|
|
** the format:
|
|
**
|
|
** varint iHeight; (height from leaf level, always 0)
|
|
** varint nTerm; (length of first term)
|
|
** char pTerm[nTerm]; (content of first term)
|
|
** varint nDoclist; (length of term's associated doclist)
|
|
** char pDoclist[nDoclist]; (content of doclist)
|
|
** array {
|
|
** (further terms are delta-encoded)
|
|
** varint nPrefix; (length of prefix shared with previous term)
|
|
** varint nSuffix; (length of unshared suffix)
|
|
** char pTermSuffix[nSuffix];(unshared suffix of next term)
|
|
** varint nDoclist; (length of term's associated doclist)
|
|
** char pDoclist[nDoclist]; (content of doclist)
|
|
** }
|
|
**
|
|
** Here, array { X } means zero or more occurrences of X, adjacent in
|
|
** memory.
|
|
**
|
|
** Leaf nodes are broken into blocks which are stored contiguously in
|
|
** the %_segments table in sorted order. This means that when the end
|
|
** of a node is reached, the next term is in the node with the next
|
|
** greater node id.
|
|
**
|
|
** New data is spilled to a new leaf node when the current node
|
|
** exceeds LEAF_MAX bytes (default 2048). New data which itself is
|
|
** larger than STANDALONE_MIN (default 1024) is placed in a standalone
|
|
** node (a leaf node with a single term and doclist). The goal of
|
|
** these settings is to pack together groups of small doclists while
|
|
** making it efficient to directly access large doclists. The
|
|
** assumption is that large doclists represent terms which are more
|
|
** likely to be query targets.
|
|
**
|
|
** TODO(shess) It may be useful for blocking decisions to be more
|
|
** dynamic. For instance, it may make more sense to have a 2.5k leaf
|
|
** node rather than splitting into 2k and .5k nodes. My intuition is
|
|
** that this might extend through 2x or 4x the pagesize.
|
|
**
|
|
**
|
|
**** Segment interior nodes ****
|
|
** Segment interior nodes store blockids for subtree nodes and terms
|
|
** to describe what data is stored by the each subtree. Interior
|
|
** nodes are written using InteriorWriter, and read using
|
|
** InteriorReader. InteriorWriters are created as needed when
|
|
** SegmentWriter creates new leaf nodes, or when an interior node
|
|
** itself grows too big and must be split. The format of interior
|
|
** nodes:
|
|
**
|
|
** varint iHeight; (height from leaf level, always >0)
|
|
** varint iBlockid; (block id of node's leftmost subtree)
|
|
** optional {
|
|
** varint nTerm; (length of first term)
|
|
** char pTerm[nTerm]; (content of first term)
|
|
** array {
|
|
** (further terms are delta-encoded)
|
|
** varint nPrefix; (length of shared prefix with previous term)
|
|
** varint nSuffix; (length of unshared suffix)
|
|
** char pTermSuffix[nSuffix]; (unshared suffix of next term)
|
|
** }
|
|
** }
|
|
**
|
|
** Here, optional { X } means an optional element, while array { X }
|
|
** means zero or more occurrences of X, adjacent in memory.
|
|
**
|
|
** An interior node encodes n terms separating n+1 subtrees. The
|
|
** subtree blocks are contiguous, so only the first subtree's blockid
|
|
** is encoded. The subtree at iBlockid will contain all terms less
|
|
** than the first term encoded (or all terms if no term is encoded).
|
|
** Otherwise, for terms greater than or equal to pTerm[i] but less
|
|
** than pTerm[i+1], the subtree for that term will be rooted at
|
|
** iBlockid+i. Interior nodes only store enough term data to
|
|
** distinguish adjacent children (if the rightmost term of the left
|
|
** child is "something", and the leftmost term of the right child is
|
|
** "wicked", only "w" is stored).
|
|
**
|
|
** New data is spilled to a new interior node at the same height when
|
|
** the current node exceeds INTERIOR_MAX bytes (default 2048).
|
|
** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing
|
|
** interior nodes and making the tree too skinny. The interior nodes
|
|
** at a given height are naturally tracked by interior nodes at
|
|
** height+1, and so on.
|
|
**
|
|
**
|
|
**** Segment directory ****
|
|
** The segment directory in table %_segdir stores meta-information for
|
|
** merging and deleting segments, and also the root node of the
|
|
** segment's tree.
|
|
**
|
|
** The root node is the top node of the segment's tree after encoding
|
|
** the entire segment, restricted to ROOT_MAX bytes (default 1024).
|
|
** This could be either a leaf node or an interior node. If the top
|
|
** node requires more than ROOT_MAX bytes, it is flushed to %_segments
|
|
** and a new root interior node is generated (which should always fit
|
|
** within ROOT_MAX because it only needs space for 2 varints, the
|
|
** height and the blockid of the previous root).
|
|
**
|
|
** The meta-information in the segment directory is:
|
|
** level - segment level (see below)
|
|
** idx - index within level
|
|
** - (level,idx uniquely identify a segment)
|
|
** start_block - first leaf node
|
|
** leaves_end_block - last leaf node
|
|
** end_block - last block (including interior nodes)
|
|
** root - contents of root node
|
|
**
|
|
** If the root node is a leaf node, then start_block,
|
|
** leaves_end_block, and end_block are all 0.
|
|
**
|
|
**
|
|
**** Segment merging ****
|
|
** To amortize update costs, segments are grouped into levels and
|
|
** merged in batches. Each increase in level represents exponentially
|
|
** more documents.
|
|
**
|
|
** New documents (actually, document updates) are tokenized and
|
|
** written individually (using LeafWriter) to a level 0 segment, with
|
|
** incrementing idx. When idx reaches MERGE_COUNT (default 16), all
|
|
** level 0 segments are merged into a single level 1 segment. Level 1
|
|
** is populated like level 0, and eventually MERGE_COUNT level 1
|
|
** segments are merged to a single level 2 segment (representing
|
|
** MERGE_COUNT^2 updates), and so on.
|
|
**
|
|
** A segment merge traverses all segments at a given level in
|
|
** parallel, performing a straightforward sorted merge. Since segment
|
|
** leaf nodes are written in to the %_segments table in order, this
|
|
** merge traverses the underlying sqlite disk structures efficiently.
|
|
** After the merge, all segment blocks from the merged level are
|
|
** deleted.
|
|
**
|
|
** MERGE_COUNT controls how often we merge segments. 16 seems to be
|
|
** somewhat of a sweet spot for insertion performance. 32 and 64 show
|
|
** very similar performance numbers to 16 on insertion, though they're
|
|
** a tiny bit slower (perhaps due to more overhead in merge-time
|
|
** sorting). 8 is about 20% slower than 16, 4 about 50% slower than
|
|
** 16, 2 about 66% slower than 16.
|
|
**
|
|
** At query time, high MERGE_COUNT increases the number of segments
|
|
** which need to be scanned and merged. For instance, with 100k docs
|
|
** inserted:
|
|
**
|
|
** MERGE_COUNT segments
|
|
** 16 25
|
|
** 8 12
|
|
** 4 10
|
|
** 2 6
|
|
**
|
|
** This appears to have only a moderate impact on queries for very
|
|
** frequent terms (which are somewhat dominated by segment merge
|
|
** costs), and infrequent and non-existent terms still seem to be fast
|
|
** even with many segments.
|
|
**
|
|
** TODO(shess) That said, it would be nice to have a better query-side
|
|
** argument for MERGE_COUNT of 16. Also, it is possible/likely that
|
|
** optimizations to things like doclist merging will swing the sweet
|
|
** spot around.
|
|
**
|
|
**
|
|
**
|
|
**** Handling of deletions and updates ****
|
|
** Since we're using a segmented structure, with no docid-oriented
|
|
** index into the term index, we clearly cannot simply update the term
|
|
** index when a document is deleted or updated. For deletions, we
|
|
** write an empty doclist (varint(docid) varint(POS_END)), for updates
|
|
** we simply write the new doclist. Segment merges overwrite older
|
|
** data for a particular docid with newer data, so deletes or updates
|
|
** will eventually overtake the earlier data and knock it out. The
|
|
** query logic likewise merges doclists so that newer data knocks out
|
|
** older data.
|
|
*/
|
|
|
|
#include "fts3Int.h"
|
|
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
|
|
|
#if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_CORE)
|
|
# define SQLITE_CORE 1
|
|
#endif
|
|
|
|
#include <assert.h>
|
|
#include <stdlib.h>
|
|
#include <stddef.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <stdarg.h>
|
|
|
|
#include "fts3.h"
|
|
#ifndef SQLITE_CORE
|
|
# include "sqlite3ext.h"
|
|
SQLITE_EXTENSION_INIT1
|
|
#endif
|
|
|
|
typedef struct Fts3HashWrapper Fts3HashWrapper;
|
|
struct Fts3HashWrapper {
|
|
Fts3Hash hash; /* Hash table */
|
|
int nRef; /* Number of pointers to this object */
|
|
};
|
|
|
|
static int fts3EvalNext(Fts3Cursor *pCsr);
|
|
static int fts3EvalStart(Fts3Cursor *pCsr);
|
|
static int fts3TermSegReaderCursor(
|
|
Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **);
|
|
|
|
/*
|
|
** This variable is set to false when running tests for which the on disk
|
|
** structures should not be corrupt. Otherwise, true. If it is false, extra
|
|
** assert() conditions in the fts3 code are activated - conditions that are
|
|
** only true if it is guaranteed that the fts3 database is not corrupt.
|
|
*/
|
|
#ifdef SQLITE_DEBUG
|
|
int sqlite3_fts3_may_be_corrupt = 1;
|
|
#endif
|
|
|
|
/*
|
|
** Write a 64-bit variable-length integer to memory starting at p[0].
|
|
** The length of data written will be between 1 and FTS3_VARINT_MAX bytes.
|
|
** The number of bytes written is returned.
|
|
*/
|
|
int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){
|
|
unsigned char *q = (unsigned char *) p;
|
|
sqlite_uint64 vu = v;
|
|
do{
|
|
*q++ = (unsigned char) ((vu & 0x7f) | 0x80);
|
|
vu >>= 7;
|
|
}while( vu!=0 );
|
|
q[-1] &= 0x7f; /* turn off high bit in final byte */
|
|
assert( q - (unsigned char *)p <= FTS3_VARINT_MAX );
|
|
return (int) (q - (unsigned char *)p);
|
|
}
|
|
|
|
#define GETVARINT_STEP(v, ptr, shift, mask1, mask2, var, ret) \
|
|
v = (v & mask1) | ( (*(const unsigned char*)(ptr++)) << shift ); \
|
|
if( (v & mask2)==0 ){ var = v; return ret; }
|
|
#define GETVARINT_INIT(v, ptr, shift, mask1, mask2, var, ret) \
|
|
v = (*ptr++); \
|
|
if( (v & mask2)==0 ){ var = v; return ret; }
|
|
|
|
int sqlite3Fts3GetVarintU(const char *pBuf, sqlite_uint64 *v){
|
|
const unsigned char *p = (const unsigned char*)pBuf;
|
|
const unsigned char *pStart = p;
|
|
u32 a;
|
|
u64 b;
|
|
int shift;
|
|
|
|
GETVARINT_INIT(a, p, 0, 0x00, 0x80, *v, 1);
|
|
GETVARINT_STEP(a, p, 7, 0x7F, 0x4000, *v, 2);
|
|
GETVARINT_STEP(a, p, 14, 0x3FFF, 0x200000, *v, 3);
|
|
GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *v, 4);
|
|
b = (a & 0x0FFFFFFF );
|
|
|
|
for(shift=28; shift<=63; shift+=7){
|
|
u64 c = *p++;
|
|
b += (c&0x7F) << shift;
|
|
if( (c & 0x80)==0 ) break;
|
|
}
|
|
*v = b;
|
|
return (int)(p - pStart);
|
|
}
|
|
|
|
/*
|
|
** Read a 64-bit variable-length integer from memory starting at p[0].
|
|
** Return the number of bytes read, or 0 on error.
|
|
** The value is stored in *v.
|
|
*/
|
|
int sqlite3Fts3GetVarint(const char *pBuf, sqlite_int64 *v){
|
|
return sqlite3Fts3GetVarintU(pBuf, (sqlite3_uint64*)v);
|
|
}
|
|
|
|
/*
|
|
** Read a 64-bit variable-length integer from memory starting at p[0] and
|
|
** not extending past pEnd[-1].
|
|
** Return the number of bytes read, or 0 on error.
|
|
** The value is stored in *v.
|
|
*/
|
|
int sqlite3Fts3GetVarintBounded(
|
|
const char *pBuf,
|
|
const char *pEnd,
|
|
sqlite_int64 *v
|
|
){
|
|
const unsigned char *p = (const unsigned char*)pBuf;
|
|
const unsigned char *pStart = p;
|
|
const unsigned char *pX = (const unsigned char*)pEnd;
|
|
u64 b = 0;
|
|
int shift;
|
|
for(shift=0; shift<=63; shift+=7){
|
|
u64 c = p<pX ? *p : 0;
|
|
p++;
|
|
b += (c&0x7F) << shift;
|
|
if( (c & 0x80)==0 ) break;
|
|
}
|
|
*v = b;
|
|
return (int)(p - pStart);
|
|
}
|
|
|
|
/*
|
|
** Similar to sqlite3Fts3GetVarint(), except that the output is truncated to
|
|
** a non-negative 32-bit integer before it is returned.
|
|
*/
|
|
int sqlite3Fts3GetVarint32(const char *p, int *pi){
|
|
const unsigned char *ptr = (const unsigned char*)p;
|
|
u32 a;
|
|
|
|
#ifndef fts3GetVarint32
|
|
GETVARINT_INIT(a, ptr, 0, 0x00, 0x80, *pi, 1);
|
|
#else
|
|
a = (*ptr++);
|
|
assert( a & 0x80 );
|
|
#endif
|
|
|
|
GETVARINT_STEP(a, ptr, 7, 0x7F, 0x4000, *pi, 2);
|
|
GETVARINT_STEP(a, ptr, 14, 0x3FFF, 0x200000, *pi, 3);
|
|
GETVARINT_STEP(a, ptr, 21, 0x1FFFFF, 0x10000000, *pi, 4);
|
|
a = (a & 0x0FFFFFFF );
|
|
*pi = (int)(a | ((u32)(*ptr & 0x07) << 28));
|
|
assert( 0==(a & 0x80000000) );
|
|
assert( *pi>=0 );
|
|
return 5;
|
|
}
|
|
|
|
/*
|
|
** Return the number of bytes required to encode v as a varint
|
|
*/
|
|
int sqlite3Fts3VarintLen(sqlite3_uint64 v){
|
|
int i = 0;
|
|
do{
|
|
i++;
|
|
v >>= 7;
|
|
}while( v!=0 );
|
|
return i;
|
|
}
|
|
|
|
/*
|
|
** Convert an SQL-style quoted string into a normal string by removing
|
|
** the quote characters. The conversion is done in-place. If the
|
|
** input does not begin with a quote character, then this routine
|
|
** is a no-op.
|
|
**
|
|
** Examples:
|
|
**
|
|
** "abc" becomes abc
|
|
** 'xyz' becomes xyz
|
|
** [pqr] becomes pqr
|
|
** `mno` becomes mno
|
|
**
|
|
*/
|
|
void sqlite3Fts3Dequote(char *z){
|
|
char quote; /* Quote character (if any ) */
|
|
|
|
quote = z[0];
|
|
if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
|
|
int iIn = 1; /* Index of next byte to read from input */
|
|
int iOut = 0; /* Index of next byte to write to output */
|
|
|
|
/* If the first byte was a '[', then the close-quote character is a ']' */
|
|
if( quote=='[' ) quote = ']';
|
|
|
|
while( z[iIn] ){
|
|
if( z[iIn]==quote ){
|
|
if( z[iIn+1]!=quote ) break;
|
|
z[iOut++] = quote;
|
|
iIn += 2;
|
|
}else{
|
|
z[iOut++] = z[iIn++];
|
|
}
|
|
}
|
|
z[iOut] = '\0';
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Read a single varint from the doclist at *pp and advance *pp to point
|
|
** to the first byte past the end of the varint. Add the value of the varint
|
|
** to *pVal.
|
|
*/
|
|
static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){
|
|
sqlite3_int64 iVal;
|
|
*pp += sqlite3Fts3GetVarint(*pp, &iVal);
|
|
*pVal += iVal;
|
|
}
|
|
|
|
/*
|
|
** When this function is called, *pp points to the first byte following a
|
|
** varint that is part of a doclist (or position-list, or any other list
|
|
** of varints). This function moves *pp to point to the start of that varint,
|
|
** and sets *pVal by the varint value.
|
|
**
|
|
** Argument pStart points to the first byte of the doclist that the
|
|
** varint is part of.
|
|
*/
|
|
static void fts3GetReverseVarint(
|
|
char **pp,
|
|
char *pStart,
|
|
sqlite3_int64 *pVal
|
|
){
|
|
sqlite3_int64 iVal;
|
|
char *p;
|
|
|
|
/* Pointer p now points at the first byte past the varint we are
|
|
** interested in. So, unless the doclist is corrupt, the 0x80 bit is
|
|
** clear on character p[-1]. */
|
|
for(p = (*pp)-2; p>=pStart && *p&0x80; p--);
|
|
p++;
|
|
*pp = p;
|
|
|
|
sqlite3Fts3GetVarint(p, &iVal);
|
|
*pVal = iVal;
|
|
}
|
|
|
|
/*
|
|
** The xDisconnect() virtual table method.
|
|
*/
|
|
static int fts3DisconnectMethod(sqlite3_vtab *pVtab){
|
|
Fts3Table *p = (Fts3Table *)pVtab;
|
|
int i;
|
|
|
|
assert( p->nPendingData==0 );
|
|
assert( p->pSegments==0 );
|
|
|
|
/* Free any prepared statements held */
|
|
sqlite3_finalize(p->pSeekStmt);
|
|
for(i=0; i<SizeofArray(p->aStmt); i++){
|
|
sqlite3_finalize(p->aStmt[i]);
|
|
}
|
|
sqlite3_free(p->zSegmentsTbl);
|
|
sqlite3_free(p->zReadExprlist);
|
|
sqlite3_free(p->zWriteExprlist);
|
|
sqlite3_free(p->zContentTbl);
|
|
sqlite3_free(p->zLanguageid);
|
|
|
|
/* Invoke the tokenizer destructor to free the tokenizer. */
|
|
p->pTokenizer->pModule->xDestroy(p->pTokenizer);
|
|
|
|
sqlite3_free(p);
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Write an error message into *pzErr
|
|
*/
|
|
void sqlite3Fts3ErrMsg(char **pzErr, const char *zFormat, ...){
|
|
va_list ap;
|
|
sqlite3_free(*pzErr);
|
|
va_start(ap, zFormat);
|
|
*pzErr = sqlite3_vmprintf(zFormat, ap);
|
|
va_end(ap);
|
|
}
|
|
|
|
/*
|
|
** Construct one or more SQL statements from the format string given
|
|
** and then evaluate those statements. The success code is written
|
|
** into *pRc.
|
|
**
|
|
** If *pRc is initially non-zero then this routine is a no-op.
|
|
*/
|
|
static void fts3DbExec(
|
|
int *pRc, /* Success code */
|
|
sqlite3 *db, /* Database in which to run SQL */
|
|
const char *zFormat, /* Format string for SQL */
|
|
... /* Arguments to the format string */
|
|
){
|
|
va_list ap;
|
|
char *zSql;
|
|
if( *pRc ) return;
|
|
va_start(ap, zFormat);
|
|
zSql = sqlite3_vmprintf(zFormat, ap);
|
|
va_end(ap);
|
|
if( zSql==0 ){
|
|
*pRc = SQLITE_NOMEM;
|
|
}else{
|
|
*pRc = sqlite3_exec(db, zSql, 0, 0, 0);
|
|
sqlite3_free(zSql);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** The xDestroy() virtual table method.
|
|
*/
|
|
static int fts3DestroyMethod(sqlite3_vtab *pVtab){
|
|
Fts3Table *p = (Fts3Table *)pVtab;
|
|
int rc = SQLITE_OK; /* Return code */
|
|
const char *zDb = p->zDb; /* Name of database (e.g. "main", "temp") */
|
|
sqlite3 *db = p->db; /* Database handle */
|
|
|
|
/* Drop the shadow tables */
|
|
fts3DbExec(&rc, db,
|
|
"DROP TABLE IF EXISTS %Q.'%q_segments';"
|
|
"DROP TABLE IF EXISTS %Q.'%q_segdir';"
|
|
"DROP TABLE IF EXISTS %Q.'%q_docsize';"
|
|
"DROP TABLE IF EXISTS %Q.'%q_stat';"
|
|
"%s DROP TABLE IF EXISTS %Q.'%q_content';",
|
|
zDb, p->zName,
|
|
zDb, p->zName,
|
|
zDb, p->zName,
|
|
zDb, p->zName,
|
|
(p->zContentTbl ? "--" : ""), zDb,p->zName
|
|
);
|
|
|
|
/* If everything has worked, invoke fts3DisconnectMethod() to free the
|
|
** memory associated with the Fts3Table structure and return SQLITE_OK.
|
|
** Otherwise, return an SQLite error code.
|
|
*/
|
|
return (rc==SQLITE_OK ? fts3DisconnectMethod(pVtab) : rc);
|
|
}
|
|
|
|
|
|
/*
|
|
** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table
|
|
** passed as the first argument. This is done as part of the xConnect()
|
|
** and xCreate() methods.
|
|
**
|
|
** If *pRc is non-zero when this function is called, it is a no-op.
|
|
** Otherwise, if an error occurs, an SQLite error code is stored in *pRc
|
|
** before returning.
|
|
*/
|
|
static void fts3DeclareVtab(int *pRc, Fts3Table *p){
|
|
if( *pRc==SQLITE_OK ){
|
|
int i; /* Iterator variable */
|
|
int rc; /* Return code */
|
|
char *zSql; /* SQL statement passed to declare_vtab() */
|
|
char *zCols; /* List of user defined columns */
|
|
const char *zLanguageid;
|
|
|
|
zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid");
|
|
sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
|
|
sqlite3_vtab_config(p->db, SQLITE_VTAB_INNOCUOUS);
|
|
|
|
/* Create a list of user columns for the virtual table */
|
|
zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]);
|
|
for(i=1; zCols && i<p->nColumn; i++){
|
|
zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]);
|
|
}
|
|
|
|
/* Create the whole "CREATE TABLE" statement to pass to SQLite */
|
|
zSql = sqlite3_mprintf(
|
|
"CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)",
|
|
zCols, p->zName, zLanguageid
|
|
);
|
|
if( !zCols || !zSql ){
|
|
rc = SQLITE_NOMEM;
|
|
}else{
|
|
rc = sqlite3_declare_vtab(p->db, zSql);
|
|
}
|
|
|
|
sqlite3_free(zSql);
|
|
sqlite3_free(zCols);
|
|
*pRc = rc;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Create the %_stat table if it does not already exist.
|
|
*/
|
|
void sqlite3Fts3CreateStatTable(int *pRc, Fts3Table *p){
|
|
fts3DbExec(pRc, p->db,
|
|
"CREATE TABLE IF NOT EXISTS %Q.'%q_stat'"
|
|
"(id INTEGER PRIMARY KEY, value BLOB);",
|
|
p->zDb, p->zName
|
|
);
|
|
if( (*pRc)==SQLITE_OK ) p->bHasStat = 1;
|
|
}
|
|
|
|
/*
|
|
** Create the backing store tables (%_content, %_segments and %_segdir)
|
|
** required by the FTS3 table passed as the only argument. This is done
|
|
** as part of the vtab xCreate() method.
|
|
**
|
|
** If the p->bHasDocsize boolean is true (indicating that this is an
|
|
** FTS4 table, not an FTS3 table) then also create the %_docsize and
|
|
** %_stat tables required by FTS4.
|
|
*/
|
|
static int fts3CreateTables(Fts3Table *p){
|
|
int rc = SQLITE_OK; /* Return code */
|
|
int i; /* Iterator variable */
|
|
sqlite3 *db = p->db; /* The database connection */
|
|
|
|
if( p->zContentTbl==0 ){
|
|
const char *zLanguageid = p->zLanguageid;
|
|
char *zContentCols; /* Columns of %_content table */
|
|
|
|
/* Create a list of user columns for the content table */
|
|
zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY");
|
|
for(i=0; zContentCols && i<p->nColumn; i++){
|
|
char *z = p->azColumn[i];
|
|
zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
|
|
}
|
|
if( zLanguageid && zContentCols ){
|
|
zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid);
|
|
}
|
|
if( zContentCols==0 ) rc = SQLITE_NOMEM;
|
|
|
|
/* Create the content table */
|
|
fts3DbExec(&rc, db,
|
|
"CREATE TABLE %Q.'%q_content'(%s)",
|
|
p->zDb, p->zName, zContentCols
|
|
);
|
|
sqlite3_free(zContentCols);
|
|
}
|
|
|
|
/* Create other tables */
|
|
fts3DbExec(&rc, db,
|
|
"CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);",
|
|
p->zDb, p->zName
|
|
);
|
|
fts3DbExec(&rc, db,
|
|
"CREATE TABLE %Q.'%q_segdir'("
|
|
"level INTEGER,"
|
|
"idx INTEGER,"
|
|
"start_block INTEGER,"
|
|
"leaves_end_block INTEGER,"
|
|
"end_block INTEGER,"
|
|
"root BLOB,"
|
|
"PRIMARY KEY(level, idx)"
|
|
");",
|
|
p->zDb, p->zName
|
|
);
|
|
if( p->bHasDocsize ){
|
|
fts3DbExec(&rc, db,
|
|
"CREATE TABLE %Q.'%q_docsize'(docid INTEGER PRIMARY KEY, size BLOB);",
|
|
p->zDb, p->zName
|
|
);
|
|
}
|
|
assert( p->bHasStat==p->bFts4 );
|
|
if( p->bHasStat ){
|
|
sqlite3Fts3CreateStatTable(&rc, p);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Store the current database page-size in bytes in p->nPgsz.
|
|
**
|
|
** If *pRc is non-zero when this function is called, it is a no-op.
|
|
** Otherwise, if an error occurs, an SQLite error code is stored in *pRc
|
|
** before returning.
|
|
*/
|
|
static void fts3DatabasePageSize(int *pRc, Fts3Table *p){
|
|
if( *pRc==SQLITE_OK ){
|
|
int rc; /* Return code */
|
|
char *zSql; /* SQL text "PRAGMA %Q.page_size" */
|
|
sqlite3_stmt *pStmt; /* Compiled "PRAGMA %Q.page_size" statement */
|
|
|
|
zSql = sqlite3_mprintf("PRAGMA %Q.page_size", p->zDb);
|
|
if( !zSql ){
|
|
rc = SQLITE_NOMEM;
|
|
}else{
|
|
rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0);
|
|
if( rc==SQLITE_OK ){
|
|
sqlite3_step(pStmt);
|
|
p->nPgsz = sqlite3_column_int(pStmt, 0);
|
|
rc = sqlite3_finalize(pStmt);
|
|
}else if( rc==SQLITE_AUTH ){
|
|
p->nPgsz = 1024;
|
|
rc = SQLITE_OK;
|
|
}
|
|
}
|
|
assert( p->nPgsz>0 || rc!=SQLITE_OK );
|
|
sqlite3_free(zSql);
|
|
*pRc = rc;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** "Special" FTS4 arguments are column specifications of the following form:
|
|
**
|
|
** <key> = <value>
|
|
**
|
|
** There may not be whitespace surrounding the "=" character. The <value>
|
|
** term may be quoted, but the <key> may not.
|
|
*/
|
|
static int fts3IsSpecialColumn(
|
|
const char *z,
|
|
int *pnKey,
|
|
char **pzValue
|
|
){
|
|
char *zValue;
|
|
const char *zCsr = z;
|
|
|
|
while( *zCsr!='=' ){
|
|
if( *zCsr=='\0' ) return 0;
|
|
zCsr++;
|
|
}
|
|
|
|
*pnKey = (int)(zCsr-z);
|
|
zValue = sqlite3_mprintf("%s", &zCsr[1]);
|
|
if( zValue ){
|
|
sqlite3Fts3Dequote(zValue);
|
|
}
|
|
*pzValue = zValue;
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
** Append the output of a printf() style formatting to an existing string.
|
|
*/
|
|
static void fts3Appendf(
|
|
int *pRc, /* IN/OUT: Error code */
|
|
char **pz, /* IN/OUT: Pointer to string buffer */
|
|
const char *zFormat, /* Printf format string to append */
|
|
... /* Arguments for printf format string */
|
|
){
|
|
if( *pRc==SQLITE_OK ){
|
|
va_list ap;
|
|
char *z;
|
|
va_start(ap, zFormat);
|
|
z = sqlite3_vmprintf(zFormat, ap);
|
|
va_end(ap);
|
|
if( z && *pz ){
|
|
char *z2 = sqlite3_mprintf("%s%s", *pz, z);
|
|
sqlite3_free(z);
|
|
z = z2;
|
|
}
|
|
if( z==0 ) *pRc = SQLITE_NOMEM;
|
|
sqlite3_free(*pz);
|
|
*pz = z;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Return a copy of input string zInput enclosed in double-quotes (") and
|
|
** with all double quote characters escaped. For example:
|
|
**
|
|
** fts3QuoteId("un \"zip\"") -> "un \"\"zip\"\""
|
|
**
|
|
** The pointer returned points to memory obtained from sqlite3_malloc(). It
|
|
** is the callers responsibility to call sqlite3_free() to release this
|
|
** memory.
|
|
*/
|
|
static char *fts3QuoteId(char const *zInput){
|
|
sqlite3_int64 nRet;
|
|
char *zRet;
|
|
nRet = 2 + (int)strlen(zInput)*2 + 1;
|
|
zRet = sqlite3_malloc64(nRet);
|
|
if( zRet ){
|
|
int i;
|
|
char *z = zRet;
|
|
*(z++) = '"';
|
|
for(i=0; zInput[i]; i++){
|
|
if( zInput[i]=='"' ) *(z++) = '"';
|
|
*(z++) = zInput[i];
|
|
}
|
|
*(z++) = '"';
|
|
*(z++) = '\0';
|
|
}
|
|
return zRet;
|
|
}
|
|
|
|
/*
|
|
** Return a list of comma separated SQL expressions and a FROM clause that
|
|
** could be used in a SELECT statement such as the following:
|
|
**
|
|
** SELECT <list of expressions> FROM %_content AS x ...
|
|
**
|
|
** to return the docid, followed by each column of text data in order
|
|
** from left to write. If parameter zFunc is not NULL, then instead of
|
|
** being returned directly each column of text data is passed to an SQL
|
|
** function named zFunc first. For example, if zFunc is "unzip" and the
|
|
** table has the three user-defined columns "a", "b", and "c", the following
|
|
** string is returned:
|
|
**
|
|
** "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c') FROM %_content AS x"
|
|
**
|
|
** The pointer returned points to a buffer allocated by sqlite3_malloc(). It
|
|
** is the responsibility of the caller to eventually free it.
|
|
**
|
|
** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and
|
|
** a NULL pointer is returned). Otherwise, if an OOM error is encountered
|
|
** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If
|
|
** no error occurs, *pRc is left unmodified.
|
|
*/
|
|
static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){
|
|
char *zRet = 0;
|
|
char *zFree = 0;
|
|
char *zFunction;
|
|
int i;
|
|
|
|
if( p->zContentTbl==0 ){
|
|
if( !zFunc ){
|
|
zFunction = "";
|
|
}else{
|
|
zFree = zFunction = fts3QuoteId(zFunc);
|
|
}
|
|
fts3Appendf(pRc, &zRet, "docid");
|
|
for(i=0; i<p->nColumn; i++){
|
|
fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
|
|
}
|
|
if( p->zLanguageid ){
|
|
fts3Appendf(pRc, &zRet, ", x.%Q", "langid");
|
|
}
|
|
sqlite3_free(zFree);
|
|
}else{
|
|
fts3Appendf(pRc, &zRet, "rowid");
|
|
for(i=0; i<p->nColumn; i++){
|
|
fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
|
|
}
|
|
if( p->zLanguageid ){
|
|
fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid);
|
|
}
|
|
}
|
|
fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x",
|
|
p->zDb,
|
|
(p->zContentTbl ? p->zContentTbl : p->zName),
|
|
(p->zContentTbl ? "" : "_content")
|
|
);
|
|
return zRet;
|
|
}
|
|
|
|
/*
|
|
** Return a list of N comma separated question marks, where N is the number
|
|
** of columns in the %_content table (one for the docid plus one for each
|
|
** user-defined text column).
|
|
**
|
|
** If argument zFunc is not NULL, then all but the first question mark
|
|
** is preceded by zFunc and an open bracket, and followed by a closed
|
|
** bracket. For example, if zFunc is "zip" and the FTS3 table has three
|
|
** user-defined text columns, the following string is returned:
|
|
**
|
|
** "?, zip(?), zip(?), zip(?)"
|
|
**
|
|
** The pointer returned points to a buffer allocated by sqlite3_malloc(). It
|
|
** is the responsibility of the caller to eventually free it.
|
|
**
|
|
** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and
|
|
** a NULL pointer is returned). Otherwise, if an OOM error is encountered
|
|
** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If
|
|
** no error occurs, *pRc is left unmodified.
|
|
*/
|
|
static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){
|
|
char *zRet = 0;
|
|
char *zFree = 0;
|
|
char *zFunction;
|
|
int i;
|
|
|
|
if( !zFunc ){
|
|
zFunction = "";
|
|
}else{
|
|
zFree = zFunction = fts3QuoteId(zFunc);
|
|
}
|
|
fts3Appendf(pRc, &zRet, "?");
|
|
for(i=0; i<p->nColumn; i++){
|
|
fts3Appendf(pRc, &zRet, ",%s(?)", zFunction);
|
|
}
|
|
if( p->zLanguageid ){
|
|
fts3Appendf(pRc, &zRet, ", ?");
|
|
}
|
|
sqlite3_free(zFree);
|
|
return zRet;
|
|
}
|
|
|
|
/*
|
|
** Buffer z contains a positive integer value encoded as utf-8 text.
|
|
** Decode this value and store it in *pnOut, returning the number of bytes
|
|
** consumed. If an overflow error occurs return a negative value.
|
|
*/
|
|
int sqlite3Fts3ReadInt(const char *z, int *pnOut){
|
|
u64 iVal = 0;
|
|
int i;
|
|
for(i=0; z[i]>='0' && z[i]<='9'; i++){
|
|
iVal = iVal*10 + (z[i] - '0');
|
|
if( iVal>0x7FFFFFFF ) return -1;
|
|
}
|
|
*pnOut = (int)iVal;
|
|
return i;
|
|
}
|
|
|
|
/*
|
|
** This function interprets the string at (*pp) as a non-negative integer
|
|
** value. It reads the integer and sets *pnOut to the value read, then
|
|
** sets *pp to point to the byte immediately following the last byte of
|
|
** the integer value.
|
|
**
|
|
** Only decimal digits ('0'..'9') may be part of an integer value.
|
|
**
|
|
** If *pp does not being with a decimal digit SQLITE_ERROR is returned and
|
|
** the output value undefined. Otherwise SQLITE_OK is returned.
|
|
**
|
|
** This function is used when parsing the "prefix=" FTS4 parameter.
|
|
*/
|
|
static int fts3GobbleInt(const char **pp, int *pnOut){
|
|
const int MAX_NPREFIX = 10000000;
|
|
int nInt = 0; /* Output value */
|
|
int nByte;
|
|
nByte = sqlite3Fts3ReadInt(*pp, &nInt);
|
|
if( nInt>MAX_NPREFIX ){
|
|
nInt = 0;
|
|
}
|
|
if( nByte==0 ){
|
|
return SQLITE_ERROR;
|
|
}
|
|
*pnOut = nInt;
|
|
*pp += nByte;
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** This function is called to allocate an array of Fts3Index structures
|
|
** representing the indexes maintained by the current FTS table. FTS tables
|
|
** always maintain the main "terms" index, but may also maintain one or
|
|
** more "prefix" indexes, depending on the value of the "prefix=" parameter
|
|
** (if any) specified as part of the CREATE VIRTUAL TABLE statement.
|
|
**
|
|
** Argument zParam is passed the value of the "prefix=" option if one was
|
|
** specified, or NULL otherwise.
|
|
**
|
|
** If no error occurs, SQLITE_OK is returned and *apIndex set to point to
|
|
** the allocated array. *pnIndex is set to the number of elements in the
|
|
** array. If an error does occur, an SQLite error code is returned.
|
|
**
|
|
** Regardless of whether or not an error is returned, it is the responsibility
|
|
** of the caller to call sqlite3_free() on the output array to free it.
|
|
*/
|
|
static int fts3PrefixParameter(
|
|
const char *zParam, /* ABC in prefix=ABC parameter to parse */
|
|
int *pnIndex, /* OUT: size of *apIndex[] array */
|
|
struct Fts3Index **apIndex /* OUT: Array of indexes for this table */
|
|
){
|
|
struct Fts3Index *aIndex; /* Allocated array */
|
|
int nIndex = 1; /* Number of entries in array */
|
|
|
|
if( zParam && zParam[0] ){
|
|
const char *p;
|
|
nIndex++;
|
|
for(p=zParam; *p; p++){
|
|
if( *p==',' ) nIndex++;
|
|
}
|
|
}
|
|
|
|
aIndex = sqlite3_malloc64(sizeof(struct Fts3Index) * nIndex);
|
|
*apIndex = aIndex;
|
|
if( !aIndex ){
|
|
return SQLITE_NOMEM;
|
|
}
|
|
|
|
memset(aIndex, 0, sizeof(struct Fts3Index) * nIndex);
|
|
if( zParam ){
|
|
const char *p = zParam;
|
|
int i;
|
|
for(i=1; i<nIndex; i++){
|
|
int nPrefix = 0;
|
|
if( fts3GobbleInt(&p, &nPrefix) ) return SQLITE_ERROR;
|
|
assert( nPrefix>=0 );
|
|
if( nPrefix==0 ){
|
|
nIndex--;
|
|
i--;
|
|
}else{
|
|
aIndex[i].nPrefix = nPrefix;
|
|
}
|
|
p++;
|
|
}
|
|
}
|
|
|
|
*pnIndex = nIndex;
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** This function is called when initializing an FTS4 table that uses the
|
|
** content=xxx option. It determines the number of and names of the columns
|
|
** of the new FTS4 table.
|
|
**
|
|
** The third argument passed to this function is the value passed to the
|
|
** config=xxx option (i.e. "xxx"). This function queries the database for
|
|
** a table of that name. If found, the output variables are populated
|
|
** as follows:
|
|
**
|
|
** *pnCol: Set to the number of columns table xxx has,
|
|
**
|
|
** *pnStr: Set to the total amount of space required to store a copy
|
|
** of each columns name, including the nul-terminator.
|
|
**
|
|
** *pazCol: Set to point to an array of *pnCol strings. Each string is
|
|
** the name of the corresponding column in table xxx. The array
|
|
** and its contents are allocated using a single allocation. It
|
|
** is the responsibility of the caller to free this allocation
|
|
** by eventually passing the *pazCol value to sqlite3_free().
|
|
**
|
|
** If the table cannot be found, an error code is returned and the output
|
|
** variables are undefined. Or, if an OOM is encountered, SQLITE_NOMEM is
|
|
** returned (and the output variables are undefined).
|
|
*/
|
|
static int fts3ContentColumns(
|
|
sqlite3 *db, /* Database handle */
|
|
const char *zDb, /* Name of db (i.e. "main", "temp" etc.) */
|
|
const char *zTbl, /* Name of content table */
|
|
const char ***pazCol, /* OUT: Malloc'd array of column names */
|
|
int *pnCol, /* OUT: Size of array *pazCol */
|
|
int *pnStr, /* OUT: Bytes of string content */
|
|
char **pzErr /* OUT: error message */
|
|
){
|
|
int rc = SQLITE_OK; /* Return code */
|
|
char *zSql; /* "SELECT *" statement on zTbl */
|
|
sqlite3_stmt *pStmt = 0; /* Compiled version of zSql */
|
|
|
|
zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zTbl);
|
|
if( !zSql ){
|
|
rc = SQLITE_NOMEM;
|
|
}else{
|
|
rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
|
|
if( rc!=SQLITE_OK ){
|
|
sqlite3Fts3ErrMsg(pzErr, "%s", sqlite3_errmsg(db));
|
|
}
|
|
}
|
|
sqlite3_free(zSql);
|
|
|
|
if( rc==SQLITE_OK ){
|
|
const char **azCol; /* Output array */
|
|
sqlite3_int64 nStr = 0; /* Size of all column names (incl. 0x00) */
|
|
int nCol; /* Number of table columns */
|
|
int i; /* Used to iterate through columns */
|
|
|
|
/* Loop through the returned columns. Set nStr to the number of bytes of
|
|
** space required to store a copy of each column name, including the
|
|
** nul-terminator byte. */
|
|
nCol = sqlite3_column_count(pStmt);
|
|
for(i=0; i<nCol; i++){
|
|
const char *zCol = sqlite3_column_name(pStmt, i);
|
|
nStr += strlen(zCol) + 1;
|
|
}
|
|
|
|
/* Allocate and populate the array to return. */
|
|
azCol = (const char **)sqlite3_malloc64(sizeof(char *) * nCol + nStr);
|
|
if( azCol==0 ){
|
|
rc = SQLITE_NOMEM;
|
|
}else{
|
|
char *p = (char *)&azCol[nCol];
|
|
for(i=0; i<nCol; i++){
|
|
const char *zCol = sqlite3_column_name(pStmt, i);
|
|
int n = (int)strlen(zCol)+1;
|
|
memcpy(p, zCol, n);
|
|
azCol[i] = p;
|
|
p += n;
|
|
}
|
|
}
|
|
sqlite3_finalize(pStmt);
|
|
|
|
/* Set the output variables. */
|
|
*pnCol = nCol;
|
|
*pnStr = nStr;
|
|
*pazCol = azCol;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** This function is the implementation of both the xConnect and xCreate
|
|
** methods of the FTS3 virtual table.
|
|
**
|
|
** The argv[] array contains the following:
|
|
**
|
|
** argv[0] -> module name ("fts3" or "fts4")
|
|
** argv[1] -> database name
|
|
** argv[2] -> table name
|
|
** argv[...] -> "column name" and other module argument fields.
|
|
*/
|
|
static int fts3InitVtab(
|
|
int isCreate, /* True for xCreate, false for xConnect */
|
|
sqlite3 *db, /* The SQLite database connection */
|
|
void *pAux, /* Hash table containing tokenizers */
|
|
int argc, /* Number of elements in argv array */
|
|
const char * const *argv, /* xCreate/xConnect argument array */
|
|
sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
|
|
char **pzErr /* Write any error message here */
|
|
){
|
|
Fts3Hash *pHash = &((Fts3HashWrapper*)pAux)->hash;
|
|
Fts3Table *p = 0; /* Pointer to allocated vtab */
|
|
int rc = SQLITE_OK; /* Return code */
|
|
int i; /* Iterator variable */
|
|
sqlite3_int64 nByte; /* Size of allocation used for *p */
|
|
int iCol; /* Column index */
|
|
int nString = 0; /* Bytes required to hold all column names */
|
|
int nCol = 0; /* Number of columns in the FTS table */
|
|
char *zCsr; /* Space for holding column names */
|
|
int nDb; /* Bytes required to hold database name */
|
|
int nName; /* Bytes required to hold table name */
|
|
int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */
|
|
const char **aCol; /* Array of column names */
|
|
sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */
|
|
|
|
int nIndex = 0; /* Size of aIndex[] array */
|
|
struct Fts3Index *aIndex = 0; /* Array of indexes for this table */
|
|
|
|
/* The results of parsing supported FTS4 key=value options: */
|
|
int bNoDocsize = 0; /* True to omit %_docsize table */
|
|
int bDescIdx = 0; /* True to store descending indexes */
|
|
char *zPrefix = 0; /* Prefix parameter value (or NULL) */
|
|
char *zCompress = 0; /* compress=? parameter (or NULL) */
|
|
char *zUncompress = 0; /* uncompress=? parameter (or NULL) */
|
|
char *zContent = 0; /* content=? parameter (or NULL) */
|
|
char *zLanguageid = 0; /* languageid=? parameter (or NULL) */
|
|
char **azNotindexed = 0; /* The set of notindexed= columns */
|
|
int nNotindexed = 0; /* Size of azNotindexed[] array */
|
|
|
|
assert( strlen(argv[0])==4 );
|
|
assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4)
|
|
|| (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4)
|
|
);
|
|
|
|
nDb = (int)strlen(argv[1]) + 1;
|
|
nName = (int)strlen(argv[2]) + 1;
|
|
|
|
nByte = sizeof(const char *) * (argc-2);
|
|
aCol = (const char **)sqlite3_malloc64(nByte);
|
|
if( aCol ){
|
|
memset((void*)aCol, 0, nByte);
|
|
azNotindexed = (char **)sqlite3_malloc64(nByte);
|
|
}
|
|
if( azNotindexed ){
|
|
memset(azNotindexed, 0, nByte);
|
|
}
|
|
if( !aCol || !azNotindexed ){
|
|
rc = SQLITE_NOMEM;
|
|
goto fts3_init_out;
|
|
}
|
|
|
|
/* Loop through all of the arguments passed by the user to the FTS3/4
|
|
** module (i.e. all the column names and special arguments). This loop
|
|
** does the following:
|
|
**
|
|
** + Figures out the number of columns the FTSX table will have, and
|
|
** the number of bytes of space that must be allocated to store copies
|
|
** of the column names.
|
|
**
|
|
** + If there is a tokenizer specification included in the arguments,
|
|
** initializes the tokenizer pTokenizer.
|
|
*/
|
|
for(i=3; rc==SQLITE_OK && i<argc; i++){
|
|
char const *z = argv[i];
|
|
int nKey;
|
|
char *zVal;
|
|
|
|
/* Check if this is a tokenizer specification */
|
|
if( !pTokenizer
|
|
&& strlen(z)>8
|
|
&& 0==sqlite3_strnicmp(z, "tokenize", 8)
|
|
&& 0==sqlite3Fts3IsIdChar(z[8])
|
|
){
|
|
rc = sqlite3Fts3InitTokenizer(pHash, &z[9], &pTokenizer, pzErr);
|
|
}
|
|
|
|
/* Check if it is an FTS4 special argument. */
|
|
else if( isFts4 && fts3IsSpecialColumn(z, &nKey, &zVal) ){
|
|
struct Fts4Option {
|
|
const char *zOpt;
|
|
int nOpt;
|
|
} aFts4Opt[] = {
|
|
{ "matchinfo", 9 }, /* 0 -> MATCHINFO */
|
|
{ "prefix", 6 }, /* 1 -> PREFIX */
|
|
{ "compress", 8 }, /* 2 -> COMPRESS */
|
|
{ "uncompress", 10 }, /* 3 -> UNCOMPRESS */
|
|
{ "order", 5 }, /* 4 -> ORDER */
|
|
{ "content", 7 }, /* 5 -> CONTENT */
|
|
{ "languageid", 10 }, /* 6 -> LANGUAGEID */
|
|
{ "notindexed", 10 } /* 7 -> NOTINDEXED */
|
|
};
|
|
|
|
int iOpt;
|
|
if( !zVal ){
|
|
rc = SQLITE_NOMEM;
|
|
}else{
|
|
for(iOpt=0; iOpt<SizeofArray(aFts4Opt); iOpt++){
|
|
struct Fts4Option *pOp = &aFts4Opt[iOpt];
|
|
if( nKey==pOp->nOpt && !sqlite3_strnicmp(z, pOp->zOpt, pOp->nOpt) ){
|
|
break;
|
|
}
|
|
}
|
|
switch( iOpt ){
|
|
case 0: /* MATCHINFO */
|
|
if( strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "fts3", 4) ){
|
|
sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo: %s", zVal);
|
|
rc = SQLITE_ERROR;
|
|
}
|
|
bNoDocsize = 1;
|
|
break;
|
|
|
|
case 1: /* PREFIX */
|
|
sqlite3_free(zPrefix);
|
|
zPrefix = zVal;
|
|
zVal = 0;
|
|
break;
|
|
|
|
case 2: /* COMPRESS */
|
|
sqlite3_free(zCompress);
|
|
zCompress = zVal;
|
|
zVal = 0;
|
|
break;
|
|
|
|
case 3: /* UNCOMPRESS */
|
|
sqlite3_free(zUncompress);
|
|
zUncompress = zVal;
|
|
zVal = 0;
|
|
break;
|
|
|
|
case 4: /* ORDER */
|
|
if( (strlen(zVal)!=3 || sqlite3_strnicmp(zVal, "asc", 3))
|
|
&& (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 4))
|
|
){
|
|
sqlite3Fts3ErrMsg(pzErr, "unrecognized order: %s", zVal);
|
|
rc = SQLITE_ERROR;
|
|
}
|
|
bDescIdx = (zVal[0]=='d' || zVal[0]=='D');
|
|
break;
|
|
|
|
case 5: /* CONTENT */
|
|
sqlite3_free(zContent);
|
|
zContent = zVal;
|
|
zVal = 0;
|
|
break;
|
|
|
|
case 6: /* LANGUAGEID */
|
|
assert( iOpt==6 );
|
|
sqlite3_free(zLanguageid);
|
|
zLanguageid = zVal;
|
|
zVal = 0;
|
|
break;
|
|
|
|
case 7: /* NOTINDEXED */
|
|
azNotindexed[nNotindexed++] = zVal;
|
|
zVal = 0;
|
|
break;
|
|
|
|
default:
|
|
assert( iOpt==SizeofArray(aFts4Opt) );
|
|
sqlite3Fts3ErrMsg(pzErr, "unrecognized parameter: %s", z);
|
|
rc = SQLITE_ERROR;
|
|
break;
|
|
}
|
|
sqlite3_free(zVal);
|
|
}
|
|
}
|
|
|
|
/* Otherwise, the argument is a column name. */
|
|
else {
|
|
nString += (int)(strlen(z) + 1);
|
|
aCol[nCol++] = z;
|
|
}
|
|
}
|
|
|
|
/* If a content=xxx option was specified, the following:
|
|
**
|
|
** 1. Ignore any compress= and uncompress= options.
|
|
**
|
|
** 2. If no column names were specified as part of the CREATE VIRTUAL
|
|
** TABLE statement, use all columns from the content table.
|
|
*/
|
|
if( rc==SQLITE_OK && zContent ){
|
|
sqlite3_free(zCompress);
|
|
sqlite3_free(zUncompress);
|
|
zCompress = 0;
|
|
zUncompress = 0;
|
|
if( nCol==0 ){
|
|
sqlite3_free((void*)aCol);
|
|
aCol = 0;
|
|
rc = fts3ContentColumns(db, argv[1], zContent,&aCol,&nCol,&nString,pzErr);
|
|
|
|
/* If a languageid= option was specified, remove the language id
|
|
** column from the aCol[] array. */
|
|
if( rc==SQLITE_OK && zLanguageid ){
|
|
int j;
|
|
for(j=0; j<nCol; j++){
|
|
if( sqlite3_stricmp(zLanguageid, aCol[j])==0 ){
|
|
int k;
|
|
for(k=j; k<nCol; k++) aCol[k] = aCol[k+1];
|
|
nCol--;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if( rc!=SQLITE_OK ) goto fts3_init_out;
|
|
|
|
if( nCol==0 ){
|
|
assert( nString==0 );
|
|
aCol[0] = "content";
|
|
nString = 8;
|
|
nCol = 1;
|
|
}
|
|
|
|
if( pTokenizer==0 ){
|
|
rc = sqlite3Fts3InitTokenizer(pHash, "simple", &pTokenizer, pzErr);
|
|
if( rc!=SQLITE_OK ) goto fts3_init_out;
|
|
}
|
|
assert( pTokenizer );
|
|
|
|
rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex);
|
|
if( rc==SQLITE_ERROR ){
|
|
assert( zPrefix );
|
|
sqlite3Fts3ErrMsg(pzErr, "error parsing prefix parameter: %s", zPrefix);
|
|
}
|
|
if( rc!=SQLITE_OK ) goto fts3_init_out;
|
|
|
|
/* Allocate and populate the Fts3Table structure. */
|
|
nByte = sizeof(Fts3Table) + /* Fts3Table */
|
|
nCol * sizeof(char *) + /* azColumn */
|
|
nIndex * sizeof(struct Fts3Index) + /* aIndex */
|
|
nCol * sizeof(u8) + /* abNotindexed */
|
|
nName + /* zName */
|
|
nDb + /* zDb */
|
|
nString; /* Space for azColumn strings */
|
|
p = (Fts3Table*)sqlite3_malloc64(nByte);
|
|
if( p==0 ){
|
|
rc = SQLITE_NOMEM;
|
|
goto fts3_init_out;
|
|
}
|
|
memset(p, 0, nByte);
|
|
p->db = db;
|
|
p->nColumn = nCol;
|
|
p->nPendingData = 0;
|
|
p->azColumn = (char **)&p[1];
|
|
p->pTokenizer = pTokenizer;
|
|
p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
|
|
p->bHasDocsize = (isFts4 && bNoDocsize==0);
|
|
p->bHasStat = (u8)isFts4;
|
|
p->bFts4 = (u8)isFts4;
|
|
p->bDescIdx = (u8)bDescIdx;
|
|
p->nAutoincrmerge = 0xff; /* 0xff means setting unknown */
|
|
p->zContentTbl = zContent;
|
|
p->zLanguageid = zLanguageid;
|
|
zContent = 0;
|
|
zLanguageid = 0;
|
|
TESTONLY( p->inTransaction = -1 );
|
|
TESTONLY( p->mxSavepoint = -1 );
|
|
|
|
p->aIndex = (struct Fts3Index *)&p->azColumn[nCol];
|
|
memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex);
|
|
p->nIndex = nIndex;
|
|
for(i=0; i<nIndex; i++){
|
|
fts3HashInit(&p->aIndex[i].hPending, FTS3_HASH_STRING, 1);
|
|
}
|
|
p->abNotindexed = (u8 *)&p->aIndex[nIndex];
|
|
|
|
/* Fill in the zName and zDb fields of the vtab structure. */
|
|
zCsr = (char *)&p->abNotindexed[nCol];
|
|
p->zName = zCsr;
|
|
memcpy(zCsr, argv[2], nName);
|
|
zCsr += nName;
|
|
p->zDb = zCsr;
|
|
memcpy(zCsr, argv[1], nDb);
|
|
zCsr += nDb;
|
|
|
|
/* Fill in the azColumn array */
|
|
for(iCol=0; iCol<nCol; iCol++){
|
|
char *z;
|
|
int n = 0;
|
|
z = (char *)sqlite3Fts3NextToken(aCol[iCol], &n);
|
|
if( n>0 ){
|
|
memcpy(zCsr, z, n);
|
|
}
|
|
zCsr[n] = '\0';
|
|
sqlite3Fts3Dequote(zCsr);
|
|
p->azColumn[iCol] = zCsr;
|
|
zCsr += n+1;
|
|
assert( zCsr <= &((char *)p)[nByte] );
|
|
}
|
|
|
|
/* Fill in the abNotindexed array */
|
|
for(iCol=0; iCol<nCol; iCol++){
|
|
int n = (int)strlen(p->azColumn[iCol]);
|
|
for(i=0; i<nNotindexed; i++){
|
|
char *zNot = azNotindexed[i];
|
|
if( zNot && n==(int)strlen(zNot)
|
|
&& 0==sqlite3_strnicmp(p->azColumn[iCol], zNot, n)
|
|
){
|
|
p->abNotindexed[iCol] = 1;
|
|
sqlite3_free(zNot);
|
|
azNotindexed[i] = 0;
|
|
}
|
|
}
|
|
}
|
|
for(i=0; i<nNotindexed; i++){
|
|
if( azNotindexed[i] ){
|
|
sqlite3Fts3ErrMsg(pzErr, "no such column: %s", azNotindexed[i]);
|
|
rc = SQLITE_ERROR;
|
|
}
|
|
}
|
|
|
|
if( rc==SQLITE_OK && (zCompress==0)!=(zUncompress==0) ){
|
|
char const *zMiss = (zCompress==0 ? "compress" : "uncompress");
|
|
rc = SQLITE_ERROR;
|
|
sqlite3Fts3ErrMsg(pzErr, "missing %s parameter in fts4 constructor", zMiss);
|
|
}
|
|
p->zReadExprlist = fts3ReadExprList(p, zUncompress, &rc);
|
|
p->zWriteExprlist = fts3WriteExprList(p, zCompress, &rc);
|
|
if( rc!=SQLITE_OK ) goto fts3_init_out;
|
|
|
|
/* If this is an xCreate call, create the underlying tables in the
|
|
** database. TODO: For xConnect(), it could verify that said tables exist.
|
|
*/
|
|
if( isCreate ){
|
|
rc = fts3CreateTables(p);
|
|
}
|
|
|
|
/* Check to see if a legacy fts3 table has been "upgraded" by the
|
|
** addition of a %_stat table so that it can use incremental merge.
|
|
*/
|
|
if( !isFts4 && !isCreate ){
|
|
p->bHasStat = 2;
|
|
}
|
|
|
|
/* Figure out the page-size for the database. This is required in order to
|
|
** estimate the cost of loading large doclists from the database. */
|
|
fts3DatabasePageSize(&rc, p);
|
|
p->nNodeSize = p->nPgsz-35;
|
|
|
|
#if defined(SQLITE_DEBUG)||defined(SQLITE_TEST)
|
|
p->nMergeCount = FTS3_MERGE_COUNT;
|
|
#endif
|
|
|
|
/* Declare the table schema to SQLite. */
|
|
fts3DeclareVtab(&rc, p);
|
|
|
|
fts3_init_out:
|
|
sqlite3_free(zPrefix);
|
|
sqlite3_free(aIndex);
|
|
sqlite3_free(zCompress);
|
|
sqlite3_free(zUncompress);
|
|
sqlite3_free(zContent);
|
|
sqlite3_free(zLanguageid);
|
|
for(i=0; i<nNotindexed; i++) sqlite3_free(azNotindexed[i]);
|
|
sqlite3_free((void *)aCol);
|
|
sqlite3_free((void *)azNotindexed);
|
|
if( rc!=SQLITE_OK ){
|
|
if( p ){
|
|
fts3DisconnectMethod((sqlite3_vtab *)p);
|
|
}else if( pTokenizer ){
|
|
pTokenizer->pModule->xDestroy(pTokenizer);
|
|
}
|
|
}else{
|
|
assert( p->pSegments==0 );
|
|
*ppVTab = &p->base;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** The xConnect() and xCreate() methods for the virtual table. All the
|
|
** work is done in function fts3InitVtab().
|
|
*/
|
|
static int fts3ConnectMethod(
|
|
sqlite3 *db, /* Database connection */
|
|
void *pAux, /* Pointer to tokenizer hash table */
|
|
int argc, /* Number of elements in argv array */
|
|
const char * const *argv, /* xCreate/xConnect argument array */
|
|
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
|
|
char **pzErr /* OUT: sqlite3_malloc'd error message */
|
|
){
|
|
return fts3InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr);
|
|
}
|
|
static int fts3CreateMethod(
|
|
sqlite3 *db, /* Database connection */
|
|
void *pAux, /* Pointer to tokenizer hash table */
|
|
int argc, /* Number of elements in argv array */
|
|
const char * const *argv, /* xCreate/xConnect argument array */
|
|
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
|
|
char **pzErr /* OUT: sqlite3_malloc'd error message */
|
|
){
|
|
return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);
|
|
}
|
|
|
|
/*
|
|
** Set the pIdxInfo->estimatedRows variable to nRow. Unless this
|
|
** extension is currently being used by a version of SQLite too old to
|
|
** support estimatedRows. In that case this function is a no-op.
|
|
*/
|
|
static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){
|
|
#if SQLITE_VERSION_NUMBER>=3008002
|
|
if( sqlite3_libversion_number()>=3008002 ){
|
|
pIdxInfo->estimatedRows = nRow;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this
|
|
** extension is currently being used by a version of SQLite too old to
|
|
** support index-info flags. In that case this function is a no-op.
|
|
*/
|
|
static void fts3SetUniqueFlag(sqlite3_index_info *pIdxInfo){
|
|
#if SQLITE_VERSION_NUMBER>=3008012
|
|
if( sqlite3_libversion_number()>=3008012 ){
|
|
pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
** Implementation of the xBestIndex method for FTS3 tables. There
|
|
** are three possible strategies, in order of preference:
|
|
**
|
|
** 1. Direct lookup by rowid or docid.
|
|
** 2. Full-text search using a MATCH operator on a non-docid column.
|
|
** 3. Linear scan of %_content table.
|
|
*/
|
|
static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
|
|
Fts3Table *p = (Fts3Table *)pVTab;
|
|
int i; /* Iterator variable */
|
|
int iCons = -1; /* Index of constraint to use */
|
|
|
|
int iLangidCons = -1; /* Index of langid=x constraint, if present */
|
|
int iDocidGe = -1; /* Index of docid>=x constraint, if present */
|
|
int iDocidLe = -1; /* Index of docid<=x constraint, if present */
|
|
int iIdx;
|
|
|
|
if( p->bLock ){
|
|
return SQLITE_ERROR;
|
|
}
|
|
|
|
/* By default use a full table scan. This is an expensive option,
|
|
** so search through the constraints to see if a more efficient
|
|
** strategy is possible.
|
|
*/
|
|
pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
|
|
pInfo->estimatedCost = 5000000;
|
|
for(i=0; i<pInfo->nConstraint; i++){
|
|
int bDocid; /* True if this constraint is on docid */
|
|
struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i];
|
|
if( pCons->usable==0 ){
|
|
if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
|
|
/* There exists an unusable MATCH constraint. This means that if
|
|
** the planner does elect to use the results of this call as part
|
|
** of the overall query plan the user will see an "unable to use
|
|
** function MATCH in the requested context" error. To discourage
|
|
** this, return a very high cost here. */
|
|
pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
|
|
pInfo->estimatedCost = 1e50;
|
|
fts3SetEstimatedRows(pInfo, ((sqlite3_int64)1) << 50);
|
|
return SQLITE_OK;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1);
|
|
|
|
/* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */
|
|
if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && bDocid ){
|
|
pInfo->idxNum = FTS3_DOCID_SEARCH;
|
|
pInfo->estimatedCost = 1.0;
|
|
iCons = i;
|
|
}
|
|
|
|
/* A MATCH constraint. Use a full-text search.
|
|
**
|
|
** If there is more than one MATCH constraint available, use the first
|
|
** one encountered. If there is both a MATCH constraint and a direct
|
|
** rowid/docid lookup, prefer the MATCH strategy. This is done even
|
|
** though the rowid/docid lookup is faster than a MATCH query, selecting
|
|
** it would lead to an "unable to use function MATCH in the requested
|
|
** context" error.
|
|
*/
|
|
if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH
|
|
&& pCons->iColumn>=0 && pCons->iColumn<=p->nColumn
|
|
){
|
|
pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn;
|
|
pInfo->estimatedCost = 2.0;
|
|
iCons = i;
|
|
}
|
|
|
|
/* Equality constraint on the langid column */
|
|
if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
|
|
&& pCons->iColumn==p->nColumn + 2
|
|
){
|
|
iLangidCons = i;
|
|
}
|
|
|
|
if( bDocid ){
|
|
switch( pCons->op ){
|
|
case SQLITE_INDEX_CONSTRAINT_GE:
|
|
case SQLITE_INDEX_CONSTRAINT_GT:
|
|
iDocidGe = i;
|
|
break;
|
|
|
|
case SQLITE_INDEX_CONSTRAINT_LE:
|
|
case SQLITE_INDEX_CONSTRAINT_LT:
|
|
iDocidLe = i;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* If using a docid=? or rowid=? strategy, set the UNIQUE flag. */
|
|
if( pInfo->idxNum==FTS3_DOCID_SEARCH ) fts3SetUniqueFlag(pInfo);
|
|
|
|
iIdx = 1;
|
|
if( iCons>=0 ){
|
|
pInfo->aConstraintUsage[iCons].argvIndex = iIdx++;
|
|
pInfo->aConstraintUsage[iCons].omit = 1;
|
|
}
|
|
if( iLangidCons>=0 ){
|
|
pInfo->idxNum |= FTS3_HAVE_LANGID;
|
|
pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++;
|
|
}
|
|
if( iDocidGe>=0 ){
|
|
pInfo->idxNum |= FTS3_HAVE_DOCID_GE;
|
|
pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++;
|
|
}
|
|
if( iDocidLe>=0 ){
|
|
pInfo->idxNum |= FTS3_HAVE_DOCID_LE;
|
|
pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++;
|
|
}
|
|
|
|
/* Regardless of the strategy selected, FTS can deliver rows in rowid (or
|
|
** docid) order. Both ascending and descending are possible.
|
|
*/
|
|
if( pInfo->nOrderBy==1 ){
|
|
struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0];
|
|
if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){
|
|
if( pOrder->desc ){
|
|
pInfo->idxStr = "DESC";
|
|
}else{
|
|
pInfo->idxStr = "ASC";
|
|
}
|
|
pInfo->orderByConsumed = 1;
|
|
}
|
|
}
|
|
|
|
assert( p->pSegments==0 );
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Implementation of xOpen method.
|
|
*/
|
|
static int fts3OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
|
|
sqlite3_vtab_cursor *pCsr; /* Allocated cursor */
|
|
|
|
UNUSED_PARAMETER(pVTab);
|
|
|
|
/* Allocate a buffer large enough for an Fts3Cursor structure. If the
|
|
** allocation succeeds, zero it and return SQLITE_OK. Otherwise,
|
|
** if the allocation fails, return SQLITE_NOMEM.
|
|
*/
|
|
*ppCsr = pCsr = (sqlite3_vtab_cursor *)sqlite3_malloc(sizeof(Fts3Cursor));
|
|
if( !pCsr ){
|
|
return SQLITE_NOMEM;
|
|
}
|
|
memset(pCsr, 0, sizeof(Fts3Cursor));
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Finalize the statement handle at pCsr->pStmt.
|
|
**
|
|
** Or, if that statement handle is one created by fts3CursorSeekStmt(),
|
|
** and the Fts3Table.pSeekStmt slot is currently NULL, save the statement
|
|
** pointer there instead of finalizing it.
|
|
*/
|
|
static void fts3CursorFinalizeStmt(Fts3Cursor *pCsr){
|
|
if( pCsr->bSeekStmt ){
|
|
Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
|
|
if( p->pSeekStmt==0 ){
|
|
p->pSeekStmt = pCsr->pStmt;
|
|
sqlite3_reset(pCsr->pStmt);
|
|
pCsr->pStmt = 0;
|
|
}
|
|
pCsr->bSeekStmt = 0;
|
|
}
|
|
sqlite3_finalize(pCsr->pStmt);
|
|
}
|
|
|
|
/*
|
|
** Free all resources currently held by the cursor passed as the only
|
|
** argument.
|
|
*/
|
|
static void fts3ClearCursor(Fts3Cursor *pCsr){
|
|
fts3CursorFinalizeStmt(pCsr);
|
|
sqlite3Fts3FreeDeferredTokens(pCsr);
|
|
sqlite3_free(pCsr->aDoclist);
|
|
sqlite3Fts3MIBufferFree(pCsr->pMIBuffer);
|
|
sqlite3Fts3ExprFree(pCsr->pExpr);
|
|
memset(&(&pCsr->base)[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor));
|
|
}
|
|
|
|
/*
|
|
** Close the cursor. For additional information see the documentation
|
|
** on the xClose method of the virtual table interface.
|
|
*/
|
|
static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){
|
|
Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
|
|
assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
|
|
fts3ClearCursor(pCsr);
|
|
assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
|
|
sqlite3_free(pCsr);
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** If pCsr->pStmt has not been prepared (i.e. if pCsr->pStmt==0), then
|
|
** compose and prepare an SQL statement of the form:
|
|
**
|
|
** "SELECT <columns> FROM %_content WHERE rowid = ?"
|
|
**
|
|
** (or the equivalent for a content=xxx table) and set pCsr->pStmt to
|
|
** it. If an error occurs, return an SQLite error code.
|
|
*/
|
|
static int fts3CursorSeekStmt(Fts3Cursor *pCsr){
|
|
int rc = SQLITE_OK;
|
|
if( pCsr->pStmt==0 ){
|
|
Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
|
|
char *zSql;
|
|
if( p->pSeekStmt ){
|
|
pCsr->pStmt = p->pSeekStmt;
|
|
p->pSeekStmt = 0;
|
|
}else{
|
|
zSql = sqlite3_mprintf("SELECT %s WHERE rowid = ?", p->zReadExprlist);
|
|
if( !zSql ) return SQLITE_NOMEM;
|
|
p->bLock++;
|
|
rc = sqlite3_prepare_v3(
|
|
p->db, zSql,-1,SQLITE_PREPARE_PERSISTENT,&pCsr->pStmt,0
|
|
);
|
|
p->bLock--;
|
|
sqlite3_free(zSql);
|
|
}
|
|
if( rc==SQLITE_OK ) pCsr->bSeekStmt = 1;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Position the pCsr->pStmt statement so that it is on the row
|
|
** of the %_content table that contains the last match. Return
|
|
** SQLITE_OK on success.
|
|
*/
|
|
static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){
|
|
int rc = SQLITE_OK;
|
|
if( pCsr->isRequireSeek ){
|
|
rc = fts3CursorSeekStmt(pCsr);
|
|
if( rc==SQLITE_OK ){
|
|
Fts3Table *pTab = (Fts3Table*)pCsr->base.pVtab;
|
|
pTab->bLock++;
|
|
sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId);
|
|
pCsr->isRequireSeek = 0;
|
|
if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){
|
|
pTab->bLock--;
|
|
return SQLITE_OK;
|
|
}else{
|
|
pTab->bLock--;
|
|
rc = sqlite3_reset(pCsr->pStmt);
|
|
if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){
|
|
/* If no row was found and no error has occurred, then the %_content
|
|
** table is missing a row that is present in the full-text index.
|
|
** The data structures are corrupt. */
|
|
rc = FTS_CORRUPT_VTAB;
|
|
pCsr->isEof = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if( rc!=SQLITE_OK && pContext ){
|
|
sqlite3_result_error_code(pContext, rc);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** This function is used to process a single interior node when searching
|
|
** a b-tree for a term or term prefix. The node data is passed to this
|
|
** function via the zNode/nNode parameters. The term to search for is
|
|
** passed in zTerm/nTerm.
|
|
**
|
|
** If piFirst is not NULL, then this function sets *piFirst to the blockid
|
|
** of the child node that heads the sub-tree that may contain the term.
|
|
**
|
|
** If piLast is not NULL, then *piLast is set to the right-most child node
|
|
** that heads a sub-tree that may contain a term for which zTerm/nTerm is
|
|
** a prefix.
|
|
**
|
|
** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK.
|
|
*/
|
|
static int fts3ScanInteriorNode(
|
|
const char *zTerm, /* Term to select leaves for */
|
|
int nTerm, /* Size of term zTerm in bytes */
|
|
const char *zNode, /* Buffer containing segment interior node */
|
|
int nNode, /* Size of buffer at zNode */
|
|
sqlite3_int64 *piFirst, /* OUT: Selected child node */
|
|
sqlite3_int64 *piLast /* OUT: Selected child node */
|
|
){
|
|
int rc = SQLITE_OK; /* Return code */
|
|
const char *zCsr = zNode; /* Cursor to iterate through node */
|
|
const char *zEnd = &zCsr[nNode];/* End of interior node buffer */
|
|
char *zBuffer = 0; /* Buffer to load terms into */
|
|
i64 nAlloc = 0; /* Size of allocated buffer */
|
|
int isFirstTerm = 1; /* True when processing first term on page */
|
|
u64 iChild; /* Block id of child node to descend to */
|
|
int nBuffer = 0; /* Total term size */
|
|
|
|
/* Skip over the 'height' varint that occurs at the start of every
|
|
** interior node. Then load the blockid of the left-child of the b-tree
|
|
** node into variable iChild.
|
|
**
|
|
** Even if the data structure on disk is corrupted, this (reading two
|
|
** varints from the buffer) does not risk an overread. If zNode is a
|
|
** root node, then the buffer comes from a SELECT statement. SQLite does
|
|
** not make this guarantee explicitly, but in practice there are always
|
|
** either more than 20 bytes of allocated space following the nNode bytes of
|
|
** contents, or two zero bytes. Or, if the node is read from the %_segments
|
|
** table, then there are always 20 bytes of zeroed padding following the
|
|
** nNode bytes of content (see sqlite3Fts3ReadBlock() for details).
|
|
*/
|
|
zCsr += sqlite3Fts3GetVarintU(zCsr, &iChild);
|
|
zCsr += sqlite3Fts3GetVarintU(zCsr, &iChild);
|
|
if( zCsr>zEnd ){
|
|
return FTS_CORRUPT_VTAB;
|
|
}
|
|
|
|
while( zCsr<zEnd && (piFirst || piLast) ){
|
|
int cmp; /* memcmp() result */
|
|
int nSuffix; /* Size of term suffix */
|
|
int nPrefix = 0; /* Size of term prefix */
|
|
|
|
/* Load the next term on the node into zBuffer. Use realloc() to expand
|
|
** the size of zBuffer if required. */
|
|
if( !isFirstTerm ){
|
|
zCsr += fts3GetVarint32(zCsr, &nPrefix);
|
|
if( nPrefix>nBuffer ){
|
|
rc = FTS_CORRUPT_VTAB;
|
|
goto finish_scan;
|
|
}
|
|
}
|
|
isFirstTerm = 0;
|
|
zCsr += fts3GetVarint32(zCsr, &nSuffix);
|
|
|
|
assert( nPrefix>=0 && nSuffix>=0 );
|
|
if( nPrefix>zCsr-zNode || nSuffix>zEnd-zCsr || nSuffix==0 ){
|
|
rc = FTS_CORRUPT_VTAB;
|
|
goto finish_scan;
|
|
}
|
|
if( (i64)nPrefix+nSuffix>nAlloc ){
|
|
char *zNew;
|
|
nAlloc = ((i64)nPrefix+nSuffix) * 2;
|
|
zNew = (char *)sqlite3_realloc64(zBuffer, nAlloc);
|
|
if( !zNew ){
|
|
rc = SQLITE_NOMEM;
|
|
goto finish_scan;
|
|
}
|
|
zBuffer = zNew;
|
|
}
|
|
assert( zBuffer );
|
|
memcpy(&zBuffer[nPrefix], zCsr, nSuffix);
|
|
nBuffer = nPrefix + nSuffix;
|
|
zCsr += nSuffix;
|
|
|
|
/* Compare the term we are searching for with the term just loaded from
|
|
** the interior node. If the specified term is greater than or equal
|
|
** to the term from the interior node, then all terms on the sub-tree
|
|
** headed by node iChild are smaller than zTerm. No need to search
|
|
** iChild.
|
|
**
|
|
** If the interior node term is larger than the specified term, then
|
|
** the tree headed by iChild may contain the specified term.
|
|
*/
|
|
cmp = memcmp(zTerm, zBuffer, (nBuffer>nTerm ? nTerm : nBuffer));
|
|
if( piFirst && (cmp<0 || (cmp==0 && nBuffer>nTerm)) ){
|
|
*piFirst = (i64)iChild;
|
|
piFirst = 0;
|
|
}
|
|
|
|
if( piLast && cmp<0 ){
|
|
*piLast = (i64)iChild;
|
|
piLast = 0;
|
|
}
|
|
|
|
iChild++;
|
|
};
|
|
|
|
if( piFirst ) *piFirst = (i64)iChild;
|
|
if( piLast ) *piLast = (i64)iChild;
|
|
|
|
finish_scan:
|
|
sqlite3_free(zBuffer);
|
|
return rc;
|
|
}
|
|
|
|
|
|
/*
|
|
** The buffer pointed to by argument zNode (size nNode bytes) contains an
|
|
** interior node of a b-tree segment. The zTerm buffer (size nTerm bytes)
|
|
** contains a term. This function searches the sub-tree headed by the zNode
|
|
** node for the range of leaf nodes that may contain the specified term
|
|
** or terms for which the specified term is a prefix.
|
|
**
|
|
** If piLeaf is not NULL, then *piLeaf is set to the blockid of the
|
|
** left-most leaf node in the tree that may contain the specified term.
|
|
** If piLeaf2 is not NULL, then *piLeaf2 is set to the blockid of the
|
|
** right-most leaf node that may contain a term for which the specified
|
|
** term is a prefix.
|
|
**
|
|
** It is possible that the range of returned leaf nodes does not contain
|
|
** the specified term or any terms for which it is a prefix. However, if the
|
|
** segment does contain any such terms, they are stored within the identified
|
|
** range. Because this function only inspects interior segment nodes (and
|
|
** never loads leaf nodes into memory), it is not possible to be sure.
|
|
**
|
|
** If an error occurs, an error code other than SQLITE_OK is returned.
|
|
*/
|
|
static int fts3SelectLeaf(
|
|
Fts3Table *p, /* Virtual table handle */
|
|
const char *zTerm, /* Term to select leaves for */
|
|
int nTerm, /* Size of term zTerm in bytes */
|
|
const char *zNode, /* Buffer containing segment interior node */
|
|
int nNode, /* Size of buffer at zNode */
|
|
sqlite3_int64 *piLeaf, /* Selected leaf node */
|
|
sqlite3_int64 *piLeaf2 /* Selected leaf node */
|
|
){
|
|
int rc = SQLITE_OK; /* Return code */
|
|
int iHeight; /* Height of this node in tree */
|
|
|
|
assert( piLeaf || piLeaf2 );
|
|
|
|
fts3GetVarint32(zNode, &iHeight);
|
|
rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2);
|
|
assert_fts3_nc( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) );
|
|
|
|
if( rc==SQLITE_OK && iHeight>1 ){
|
|
char *zBlob = 0; /* Blob read from %_segments table */
|
|
int nBlob = 0; /* Size of zBlob in bytes */
|
|
|
|
if( piLeaf && piLeaf2 && (*piLeaf!=*piLeaf2) ){
|
|
rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob, 0);
|
|
if( rc==SQLITE_OK ){
|
|
rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, 0);
|
|
}
|
|
sqlite3_free(zBlob);
|
|
piLeaf = 0;
|
|
zBlob = 0;
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3Fts3ReadBlock(p, piLeaf?*piLeaf:*piLeaf2, &zBlob, &nBlob, 0);
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
int iNewHeight = 0;
|
|
fts3GetVarint32(zBlob, &iNewHeight);
|
|
if( iNewHeight>=iHeight ){
|
|
rc = FTS_CORRUPT_VTAB;
|
|
}else{
|
|
rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2);
|
|
}
|
|
}
|
|
sqlite3_free(zBlob);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** This function is used to create delta-encoded serialized lists of FTS3
|
|
** varints. Each call to this function appends a single varint to a list.
|
|
*/
|
|
static void fts3PutDeltaVarint(
|
|
char **pp, /* IN/OUT: Output pointer */
|
|
sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */
|
|
sqlite3_int64 iVal /* Write this value to the list */
|
|
){
|
|
assert_fts3_nc( iVal-*piPrev > 0 || (*piPrev==0 && iVal==0) );
|
|
*pp += sqlite3Fts3PutVarint(*pp, iVal-*piPrev);
|
|
*piPrev = iVal;
|
|
}
|
|
|
|
/*
|
|
** When this function is called, *ppPoslist is assumed to point to the
|
|
** start of a position-list. After it returns, *ppPoslist points to the
|
|
** first byte after the position-list.
|
|
**
|
|
** A position list is list of positions (delta encoded) and columns for
|
|
** a single document record of a doclist. So, in other words, this
|
|
** routine advances *ppPoslist so that it points to the next docid in
|
|
** the doclist, or to the first byte past the end of the doclist.
|
|
**
|
|
** If pp is not NULL, then the contents of the position list are copied
|
|
** to *pp. *pp is set to point to the first byte past the last byte copied
|
|
** before this function returns.
|
|
*/
|
|
static void fts3PoslistCopy(char **pp, char **ppPoslist){
|
|
char *pEnd = *ppPoslist;
|
|
char c = 0;
|
|
|
|
/* The end of a position list is marked by a zero encoded as an FTS3
|
|
** varint. A single POS_END (0) byte. Except, if the 0 byte is preceded by
|
|
** a byte with the 0x80 bit set, then it is not a varint 0, but the tail
|
|
** of some other, multi-byte, value.
|
|
**
|
|
** The following while-loop moves pEnd to point to the first byte that is not
|
|
** immediately preceded by a byte with the 0x80 bit set. Then increments
|
|
** pEnd once more so that it points to the byte immediately following the
|
|
** last byte in the position-list.
|
|
*/
|
|
while( *pEnd | c ){
|
|
c = *pEnd++ & 0x80;
|
|
testcase( c!=0 && (*pEnd)==0 );
|
|
}
|
|
pEnd++; /* Advance past the POS_END terminator byte */
|
|
|
|
if( pp ){
|
|
int n = (int)(pEnd - *ppPoslist);
|
|
char *p = *pp;
|
|
memcpy(p, *ppPoslist, n);
|
|
p += n;
|
|
*pp = p;
|
|
}
|
|
*ppPoslist = pEnd;
|
|
}
|
|
|
|
/*
|
|
** When this function is called, *ppPoslist is assumed to point to the
|
|
** start of a column-list. After it returns, *ppPoslist points to the
|
|
** to the terminator (POS_COLUMN or POS_END) byte of the column-list.
|
|
**
|
|
** A column-list is list of delta-encoded positions for a single column
|
|
** within a single document within a doclist.
|
|
**
|
|
** The column-list is terminated either by a POS_COLUMN varint (1) or
|
|
** a POS_END varint (0). This routine leaves *ppPoslist pointing to
|
|
** the POS_COLUMN or POS_END that terminates the column-list.
|
|
**
|
|
** If pp is not NULL, then the contents of the column-list are copied
|
|
** to *pp. *pp is set to point to the first byte past the last byte copied
|
|
** before this function returns. The POS_COLUMN or POS_END terminator
|
|
** is not copied into *pp.
|
|
*/
|
|
static void fts3ColumnlistCopy(char **pp, char **ppPoslist){
|
|
char *pEnd = *ppPoslist;
|
|
char c = 0;
|
|
|
|
/* A column-list is terminated by either a 0x01 or 0x00 byte that is
|
|
** not part of a multi-byte varint.
|
|
*/
|
|
while( 0xFE & (*pEnd | c) ){
|
|
c = *pEnd++ & 0x80;
|
|
testcase( c!=0 && ((*pEnd)&0xfe)==0 );
|
|
}
|
|
if( pp ){
|
|
int n = (int)(pEnd - *ppPoslist);
|
|
char *p = *pp;
|
|
memcpy(p, *ppPoslist, n);
|
|
p += n;
|
|
*pp = p;
|
|
}
|
|
*ppPoslist = pEnd;
|
|
}
|
|
|
|
/*
|
|
** Value used to signify the end of an position-list. This must be
|
|
** as large or larger than any value that might appear on the
|
|
** position-list, even a position list that has been corrupted.
|
|
*/
|
|
#define POSITION_LIST_END LARGEST_INT64
|
|
|
|
/*
|
|
** This function is used to help parse position-lists. When this function is
|
|
** called, *pp may point to the start of the next varint in the position-list
|
|
** being parsed, or it may point to 1 byte past the end of the position-list
|
|
** (in which case **pp will be a terminator bytes POS_END (0) or
|
|
** (1)).
|
|
**
|
|
** If *pp points past the end of the current position-list, set *pi to
|
|
** POSITION_LIST_END and return. Otherwise, read the next varint from *pp,
|
|
** increment the current value of *pi by the value read, and set *pp to
|
|
** point to the next value before returning.
|
|
**
|
|
** Before calling this routine *pi must be initialized to the value of
|
|
** the previous position, or zero if we are reading the first position
|
|
** in the position-list. Because positions are delta-encoded, the value
|
|
** of the previous position is needed in order to compute the value of
|
|
** the next position.
|
|
*/
|
|
static void fts3ReadNextPos(
|
|
char **pp, /* IN/OUT: Pointer into position-list buffer */
|
|
sqlite3_int64 *pi /* IN/OUT: Value read from position-list */
|
|
){
|
|
if( (**pp)&0xFE ){
|
|
int iVal;
|
|
*pp += fts3GetVarint32((*pp), &iVal);
|
|
*pi += iVal;
|
|
*pi -= 2;
|
|
}else{
|
|
*pi = POSITION_LIST_END;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** If parameter iCol is not 0, write an POS_COLUMN (1) byte followed by
|
|
** the value of iCol encoded as a varint to *pp. This will start a new
|
|
** column list.
|
|
**
|
|
** Set *pp to point to the byte just after the last byte written before
|
|
** returning (do not modify it if iCol==0). Return the total number of bytes
|
|
** written (0 if iCol==0).
|
|
*/
|
|
static int fts3PutColNumber(char **pp, int iCol){
|
|
int n = 0; /* Number of bytes written */
|
|
if( iCol ){
|
|
char *p = *pp; /* Output pointer */
|
|
n = 1 + sqlite3Fts3PutVarint(&p[1], iCol);
|
|
*p = 0x01;
|
|
*pp = &p[n];
|
|
}
|
|
return n;
|
|
}
|
|
|
|
/*
|
|
** Compute the union of two position lists. The output written
|
|
** into *pp contains all positions of both *pp1 and *pp2 in sorted
|
|
** order and with any duplicates removed. All pointers are
|
|
** updated appropriately. The caller is responsible for insuring
|
|
** that there is enough space in *pp to hold the complete output.
|
|
*/
|
|
static int fts3PoslistMerge(
|
|
char **pp, /* Output buffer */
|
|
char **pp1, /* Left input list */
|
|
char **pp2 /* Right input list */
|
|
){
|
|
char *p = *pp;
|
|
char *p1 = *pp1;
|
|
char *p2 = *pp2;
|
|
|
|
while( *p1 || *p2 ){
|
|
int iCol1; /* The current column index in pp1 */
|
|
int iCol2; /* The current column index in pp2 */
|
|
|
|
if( *p1==POS_COLUMN ){
|
|
fts3GetVarint32(&p1[1], &iCol1);
|
|
if( iCol1==0 ) return FTS_CORRUPT_VTAB;
|
|
}
|
|
else if( *p1==POS_END ) iCol1 = 0x7fffffff;
|
|
else iCol1 = 0;
|
|
|
|
if( *p2==POS_COLUMN ){
|
|
fts3GetVarint32(&p2[1], &iCol2);
|
|
if( iCol2==0 ) return FTS_CORRUPT_VTAB;
|
|
}
|
|
else if( *p2==POS_END ) iCol2 = 0x7fffffff;
|
|
else iCol2 = 0;
|
|
|
|
if( iCol1==iCol2 ){
|
|
sqlite3_int64 i1 = 0; /* Last position from pp1 */
|
|
sqlite3_int64 i2 = 0; /* Last position from pp2 */
|
|
sqlite3_int64 iPrev = 0;
|
|
int n = fts3PutColNumber(&p, iCol1);
|
|
p1 += n;
|
|
p2 += n;
|
|
|
|
/* At this point, both p1 and p2 point to the start of column-lists
|
|
** for the same column (the column with index iCol1 and iCol2).
|
|
** A column-list is a list of non-negative delta-encoded varints, each
|
|
** incremented by 2 before being stored. Each list is terminated by a
|
|
** POS_END (0) or POS_COLUMN (1). The following block merges the two lists
|
|
** and writes the results to buffer p. p is left pointing to the byte
|
|
** after the list written. No terminator (POS_END or POS_COLUMN) is
|
|
** written to the output.
|
|
*/
|
|
fts3GetDeltaVarint(&p1, &i1);
|
|
fts3GetDeltaVarint(&p2, &i2);
|
|
if( i1<2 || i2<2 ){
|
|
break;
|
|
}
|
|
do {
|
|
fts3PutDeltaVarint(&p, &iPrev, (i1<i2) ? i1 : i2);
|
|
iPrev -= 2;
|
|
if( i1==i2 ){
|
|
fts3ReadNextPos(&p1, &i1);
|
|
fts3ReadNextPos(&p2, &i2);
|
|
}else if( i1<i2 ){
|
|
fts3ReadNextPos(&p1, &i1);
|
|
}else{
|
|
fts3ReadNextPos(&p2, &i2);
|
|
}
|
|
}while( i1!=POSITION_LIST_END || i2!=POSITION_LIST_END );
|
|
}else if( iCol1<iCol2 ){
|
|
p1 += fts3PutColNumber(&p, iCol1);
|
|
fts3ColumnlistCopy(&p, &p1);
|
|
}else{
|
|
p2 += fts3PutColNumber(&p, iCol2);
|
|
fts3ColumnlistCopy(&p, &p2);
|
|
}
|
|
}
|
|
|
|
*p++ = POS_END;
|
|
*pp = p;
|
|
*pp1 = p1 + 1;
|
|
*pp2 = p2 + 1;
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** This function is used to merge two position lists into one. When it is
|
|
** called, *pp1 and *pp2 must both point to position lists. A position-list is
|
|
** the part of a doclist that follows each document id. For example, if a row
|
|
** contains:
|
|
**
|
|
** 'a b c'|'x y z'|'a b b a'
|
|
**
|
|
** Then the position list for this row for token 'b' would consist of:
|
|
**
|
|
** 0x02 0x01 0x02 0x03 0x03 0x00
|
|
**
|
|
** When this function returns, both *pp1 and *pp2 are left pointing to the
|
|
** byte following the 0x00 terminator of their respective position lists.
|
|
**
|
|
** If isSaveLeft is 0, an entry is added to the output position list for
|
|
** each position in *pp2 for which there exists one or more positions in
|
|
** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e.
|
|
** when the *pp1 token appears before the *pp2 token, but not more than nToken
|
|
** slots before it.
|
|
**
|
|
** e.g. nToken==1 searches for adjacent positions.
|
|
*/
|
|
static int fts3PoslistPhraseMerge(
|
|
char **pp, /* IN/OUT: Preallocated output buffer */
|
|
int nToken, /* Maximum difference in token positions */
|
|
int isSaveLeft, /* Save the left position */
|
|
int isExact, /* If *pp1 is exactly nTokens before *pp2 */
|
|
char **pp1, /* IN/OUT: Left input list */
|
|
char **pp2 /* IN/OUT: Right input list */
|
|
){
|
|
char *p = *pp;
|
|
char *p1 = *pp1;
|
|
char *p2 = *pp2;
|
|
int iCol1 = 0;
|
|
int iCol2 = 0;
|
|
|
|
/* Never set both isSaveLeft and isExact for the same invocation. */
|
|
assert( isSaveLeft==0 || isExact==0 );
|
|
|
|
assert_fts3_nc( p!=0 && *p1!=0 && *p2!=0 );
|
|
if( *p1==POS_COLUMN ){
|
|
p1++;
|
|
p1 += fts3GetVarint32(p1, &iCol1);
|
|
}
|
|
if( *p2==POS_COLUMN ){
|
|
p2++;
|
|
p2 += fts3GetVarint32(p2, &iCol2);
|
|
}
|
|
|
|
while( 1 ){
|
|
if( iCol1==iCol2 ){
|
|
char *pSave = p;
|
|
sqlite3_int64 iPrev = 0;
|
|
sqlite3_int64 iPos1 = 0;
|
|
sqlite3_int64 iPos2 = 0;
|
|
|
|
if( iCol1 ){
|
|
*p++ = POS_COLUMN;
|
|
p += sqlite3Fts3PutVarint(p, iCol1);
|
|
}
|
|
|
|
fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2;
|
|
fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
|
|
if( iPos1<0 || iPos2<0 ) break;
|
|
|
|
while( 1 ){
|
|
if( iPos2==iPos1+nToken
|
|
|| (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken)
|
|
){
|
|
sqlite3_int64 iSave;
|
|
iSave = isSaveLeft ? iPos1 : iPos2;
|
|
fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2;
|
|
pSave = 0;
|
|
assert( p );
|
|
}
|
|
if( (!isSaveLeft && iPos2<=(iPos1+nToken)) || iPos2<=iPos1 ){
|
|
if( (*p2&0xFE)==0 ) break;
|
|
fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
|
|
}else{
|
|
if( (*p1&0xFE)==0 ) break;
|
|
fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2;
|
|
}
|
|
}
|
|
|
|
if( pSave ){
|
|
assert( pp && p );
|
|
p = pSave;
|
|
}
|
|
|
|
fts3ColumnlistCopy(0, &p1);
|
|
fts3ColumnlistCopy(0, &p2);
|
|
assert( (*p1&0xFE)==0 && (*p2&0xFE)==0 );
|
|
if( 0==*p1 || 0==*p2 ) break;
|
|
|
|
p1++;
|
|
p1 += fts3GetVarint32(p1, &iCol1);
|
|
p2++;
|
|
p2 += fts3GetVarint32(p2, &iCol2);
|
|
}
|
|
|
|
/* Advance pointer p1 or p2 (whichever corresponds to the smaller of
|
|
** iCol1 and iCol2) so that it points to either the 0x00 that marks the
|
|
** end of the position list, or the 0x01 that precedes the next
|
|
** column-number in the position list.
|
|
*/
|
|
else if( iCol1<iCol2 ){
|
|
fts3ColumnlistCopy(0, &p1);
|
|
if( 0==*p1 ) break;
|
|
p1++;
|
|
p1 += fts3GetVarint32(p1, &iCol1);
|
|
}else{
|
|
fts3ColumnlistCopy(0, &p2);
|
|
if( 0==*p2 ) break;
|
|
p2++;
|
|
p2 += fts3GetVarint32(p2, &iCol2);
|
|
}
|
|
}
|
|
|
|
fts3PoslistCopy(0, &p2);
|
|
fts3PoslistCopy(0, &p1);
|
|
*pp1 = p1;
|
|
*pp2 = p2;
|
|
if( *pp==p ){
|
|
return 0;
|
|
}
|
|
*p++ = 0x00;
|
|
*pp = p;
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
** Merge two position-lists as required by the NEAR operator. The argument
|
|
** position lists correspond to the left and right phrases of an expression
|
|
** like:
|
|
**
|
|
** "phrase 1" NEAR "phrase number 2"
|
|
**
|
|
** Position list *pp1 corresponds to the left-hand side of the NEAR
|
|
** expression and *pp2 to the right. As usual, the indexes in the position
|
|
** lists are the offsets of the last token in each phrase (tokens "1" and "2"
|
|
** in the example above).
|
|
**
|
|
** The output position list - written to *pp - is a copy of *pp2 with those
|
|
** entries that are not sufficiently NEAR entries in *pp1 removed.
|
|
*/
|
|
static int fts3PoslistNearMerge(
|
|
char **pp, /* Output buffer */
|
|
char *aTmp, /* Temporary buffer space */
|
|
int nRight, /* Maximum difference in token positions */
|
|
int nLeft, /* Maximum difference in token positions */
|
|
char **pp1, /* IN/OUT: Left input list */
|
|
char **pp2 /* IN/OUT: Right input list */
|
|
){
|
|
char *p1 = *pp1;
|
|
char *p2 = *pp2;
|
|
|
|
char *pTmp1 = aTmp;
|
|
char *pTmp2;
|
|
char *aTmp2;
|
|
int res = 1;
|
|
|
|
fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2);
|
|
aTmp2 = pTmp2 = pTmp1;
|
|
*pp1 = p1;
|
|
*pp2 = p2;
|
|
fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1);
|
|
if( pTmp1!=aTmp && pTmp2!=aTmp2 ){
|
|
fts3PoslistMerge(pp, &aTmp, &aTmp2);
|
|
}else if( pTmp1!=aTmp ){
|
|
fts3PoslistCopy(pp, &aTmp);
|
|
}else if( pTmp2!=aTmp2 ){
|
|
fts3PoslistCopy(pp, &aTmp2);
|
|
}else{
|
|
res = 0;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/*
|
|
** An instance of this function is used to merge together the (potentially
|
|
** large number of) doclists for each term that matches a prefix query.
|
|
** See function fts3TermSelectMerge() for details.
|
|
*/
|
|
typedef struct TermSelect TermSelect;
|
|
struct TermSelect {
|
|
char *aaOutput[16]; /* Malloc'd output buffers */
|
|
int anOutput[16]; /* Size each output buffer in bytes */
|
|
};
|
|
|
|
/*
|
|
** This function is used to read a single varint from a buffer. Parameter
|
|
** pEnd points 1 byte past the end of the buffer. When this function is
|
|
** called, if *pp points to pEnd or greater, then the end of the buffer
|
|
** has been reached. In this case *pp is set to 0 and the function returns.
|
|
**
|
|
** If *pp does not point to or past pEnd, then a single varint is read
|
|
** from *pp. *pp is then set to point 1 byte past the end of the read varint.
|
|
**
|
|
** If bDescIdx is false, the value read is added to *pVal before returning.
|
|
** If it is true, the value read is subtracted from *pVal before this
|
|
** function returns.
|
|
*/
|
|
static void fts3GetDeltaVarint3(
|
|
char **pp, /* IN/OUT: Point to read varint from */
|
|
char *pEnd, /* End of buffer */
|
|
int bDescIdx, /* True if docids are descending */
|
|
sqlite3_int64 *pVal /* IN/OUT: Integer value */
|
|
){
|
|
if( *pp>=pEnd ){
|
|
*pp = 0;
|
|
}else{
|
|
u64 iVal;
|
|
*pp += sqlite3Fts3GetVarintU(*pp, &iVal);
|
|
if( bDescIdx ){
|
|
*pVal = (i64)((u64)*pVal - iVal);
|
|
}else{
|
|
*pVal = (i64)((u64)*pVal + iVal);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** This function is used to write a single varint to a buffer. The varint
|
|
** is written to *pp. Before returning, *pp is set to point 1 byte past the
|
|
** end of the value written.
|
|
**
|
|
** If *pbFirst is zero when this function is called, the value written to
|
|
** the buffer is that of parameter iVal.
|
|
**
|
|
** If *pbFirst is non-zero when this function is called, then the value
|
|
** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal)
|
|
** (if bDescIdx is non-zero).
|
|
**
|
|
** Before returning, this function always sets *pbFirst to 1 and *piPrev
|
|
** to the value of parameter iVal.
|
|
*/
|
|
static void fts3PutDeltaVarint3(
|
|
char **pp, /* IN/OUT: Output pointer */
|
|
int bDescIdx, /* True for descending docids */
|
|
sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */
|
|
int *pbFirst, /* IN/OUT: True after first int written */
|
|
sqlite3_int64 iVal /* Write this value to the list */
|
|
){
|
|
sqlite3_uint64 iWrite;
|
|
if( bDescIdx==0 || *pbFirst==0 ){
|
|
assert_fts3_nc( *pbFirst==0 || iVal>=*piPrev );
|
|
iWrite = (u64)iVal - (u64)*piPrev;
|
|
}else{
|
|
assert_fts3_nc( *piPrev>=iVal );
|
|
iWrite = (u64)*piPrev - (u64)iVal;
|
|
}
|
|
assert( *pbFirst || *piPrev==0 );
|
|
assert_fts3_nc( *pbFirst==0 || iWrite>0 );
|
|
*pp += sqlite3Fts3PutVarint(*pp, iWrite);
|
|
*piPrev = iVal;
|
|
*pbFirst = 1;
|
|
}
|
|
|
|
|
|
/*
|
|
** This macro is used by various functions that merge doclists. The two
|
|
** arguments are 64-bit docid values. If the value of the stack variable
|
|
** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2).
|
|
** Otherwise, (i2-i1).
|
|
**
|
|
** Using this makes it easier to write code that can merge doclists that are
|
|
** sorted in either ascending or descending order.
|
|
*/
|
|
/* #define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i64)((u64)i1-i2)) */
|
|
#define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1>i2?1:((i1==i2)?0:-1)))
|
|
|
|
/*
|
|
** This function does an "OR" merge of two doclists (output contains all
|
|
** positions contained in either argument doclist). If the docids in the
|
|
** input doclists are sorted in ascending order, parameter bDescDoclist
|
|
** should be false. If they are sorted in ascending order, it should be
|
|
** passed a non-zero value.
|
|
**
|
|
** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer
|
|
** containing the output doclist and SQLITE_OK is returned. In this case
|
|
** *pnOut is set to the number of bytes in the output doclist.
|
|
**
|
|
** If an error occurs, an SQLite error code is returned. The output values
|
|
** are undefined in this case.
|
|
*/
|
|
static int fts3DoclistOrMerge(
|
|
int bDescDoclist, /* True if arguments are desc */
|
|
char *a1, int n1, /* First doclist */
|
|
char *a2, int n2, /* Second doclist */
|
|
char **paOut, int *pnOut /* OUT: Malloc'd doclist */
|
|
){
|
|
int rc = SQLITE_OK;
|
|
sqlite3_int64 i1 = 0;
|
|
sqlite3_int64 i2 = 0;
|
|
sqlite3_int64 iPrev = 0;
|
|
char *pEnd1 = &a1[n1];
|
|
char *pEnd2 = &a2[n2];
|
|
char *p1 = a1;
|
|
char *p2 = a2;
|
|
char *p;
|
|
char *aOut;
|
|
int bFirstOut = 0;
|
|
|
|
*paOut = 0;
|
|
*pnOut = 0;
|
|
|
|
/* Allocate space for the output. Both the input and output doclists
|
|
** are delta encoded. If they are in ascending order (bDescDoclist==0),
|
|
** then the first docid in each list is simply encoded as a varint. For
|
|
** each subsequent docid, the varint stored is the difference between the
|
|
** current and previous docid (a positive number - since the list is in
|
|
** ascending order).
|
|
**
|
|
** The first docid written to the output is therefore encoded using the
|
|
** same number of bytes as it is in whichever of the input lists it is
|
|
** read from. And each subsequent docid read from the same input list
|
|
** consumes either the same or less bytes as it did in the input (since
|
|
** the difference between it and the previous value in the output must
|
|
** be a positive value less than or equal to the delta value read from
|
|
** the input list). The same argument applies to all but the first docid
|
|
** read from the 'other' list. And to the contents of all position lists
|
|
** that will be copied and merged from the input to the output.
|
|
**
|
|
** However, if the first docid copied to the output is a negative number,
|
|
** then the encoding of the first docid from the 'other' input list may
|
|
** be larger in the output than it was in the input (since the delta value
|
|
** may be a larger positive integer than the actual docid).
|
|
**
|
|
** The space required to store the output is therefore the sum of the
|
|
** sizes of the two inputs, plus enough space for exactly one of the input
|
|
** docids to grow.
|
|
**
|
|
** A symetric argument may be made if the doclists are in descending
|
|
** order.
|
|
*/
|
|
aOut = sqlite3_malloc64((i64)n1+n2+FTS3_VARINT_MAX-1+FTS3_BUFFER_PADDING);
|
|
if( !aOut ) return SQLITE_NOMEM;
|
|
|
|
p = aOut;
|
|
fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
|
|
fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
|
|
while( p1 || p2 ){
|
|
sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
|
|
|
|
if( p2 && p1 && iDiff==0 ){
|
|
fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
|
|
rc = fts3PoslistMerge(&p, &p1, &p2);
|
|
if( rc ) break;
|
|
fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
|
|
fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
|
|
}else if( !p2 || (p1 && iDiff<0) ){
|
|
fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
|
|
fts3PoslistCopy(&p, &p1);
|
|
fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
|
|
}else{
|
|
fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2);
|
|
fts3PoslistCopy(&p, &p2);
|
|
fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
|
|
}
|
|
|
|
assert( (p-aOut)<=((p1?(p1-a1):n1)+(p2?(p2-a2):n2)+FTS3_VARINT_MAX-1) );
|
|
}
|
|
|
|
if( rc!=SQLITE_OK ){
|
|
sqlite3_free(aOut);
|
|
p = aOut = 0;
|
|
}else{
|
|
assert( (p-aOut)<=n1+n2+FTS3_VARINT_MAX-1 );
|
|
memset(&aOut[(p-aOut)], 0, FTS3_BUFFER_PADDING);
|
|
}
|
|
*paOut = aOut;
|
|
*pnOut = (int)(p-aOut);
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** This function does a "phrase" merge of two doclists. In a phrase merge,
|
|
** the output contains a copy of each position from the right-hand input
|
|
** doclist for which there is a position in the left-hand input doclist
|
|
** exactly nDist tokens before it.
|
|
**
|
|
** If the docids in the input doclists are sorted in ascending order,
|
|
** parameter bDescDoclist should be false. If they are sorted in ascending
|
|
** order, it should be passed a non-zero value.
|
|
**
|
|
** The right-hand input doclist is overwritten by this function.
|
|
*/
|
|
static int fts3DoclistPhraseMerge(
|
|
int bDescDoclist, /* True if arguments are desc */
|
|
int nDist, /* Distance from left to right (1=adjacent) */
|
|
char *aLeft, int nLeft, /* Left doclist */
|
|
char **paRight, int *pnRight /* IN/OUT: Right/output doclist */
|
|
){
|
|
sqlite3_int64 i1 = 0;
|
|
sqlite3_int64 i2 = 0;
|
|
sqlite3_int64 iPrev = 0;
|
|
char *aRight = *paRight;
|
|
char *pEnd1 = &aLeft[nLeft];
|
|
char *pEnd2 = &aRight[*pnRight];
|
|
char *p1 = aLeft;
|
|
char *p2 = aRight;
|
|
char *p;
|
|
int bFirstOut = 0;
|
|
char *aOut;
|
|
|
|
assert( nDist>0 );
|
|
if( bDescDoclist ){
|
|
aOut = sqlite3_malloc64((sqlite3_int64)*pnRight + FTS3_VARINT_MAX);
|
|
if( aOut==0 ) return SQLITE_NOMEM;
|
|
}else{
|
|
aOut = aRight;
|
|
}
|
|
p = aOut;
|
|
|
|
fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
|
|
fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
|
|
|
|
while( p1 && p2 ){
|
|
sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
|
|
if( iDiff==0 ){
|
|
char *pSave = p;
|
|
sqlite3_int64 iPrevSave = iPrev;
|
|
int bFirstOutSave = bFirstOut;
|
|
|
|
fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
|
|
if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){
|
|
p = pSave;
|
|
iPrev = iPrevSave;
|
|
bFirstOut = bFirstOutSave;
|
|
}
|
|
fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
|
|
fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
|
|
}else if( iDiff<0 ){
|
|
fts3PoslistCopy(0, &p1);
|
|
fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
|
|
}else{
|
|
fts3PoslistCopy(0, &p2);
|
|
fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
|
|
}
|
|
}
|
|
|
|
*pnRight = (int)(p - aOut);
|
|
if( bDescDoclist ){
|
|
sqlite3_free(aRight);
|
|
*paRight = aOut;
|
|
}
|
|
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Argument pList points to a position list nList bytes in size. This
|
|
** function checks to see if the position list contains any entries for
|
|
** a token in position 0 (of any column). If so, it writes argument iDelta
|
|
** to the output buffer pOut, followed by a position list consisting only
|
|
** of the entries from pList at position 0, and terminated by an 0x00 byte.
|
|
** The value returned is the number of bytes written to pOut (if any).
|
|
*/
|
|
int sqlite3Fts3FirstFilter(
|
|
sqlite3_int64 iDelta, /* Varint that may be written to pOut */
|
|
char *pList, /* Position list (no 0x00 term) */
|
|
int nList, /* Size of pList in bytes */
|
|
char *pOut /* Write output here */
|
|
){
|
|
int nOut = 0;
|
|
int bWritten = 0; /* True once iDelta has been written */
|
|
char *p = pList;
|
|
char *pEnd = &pList[nList];
|
|
|
|
if( *p!=0x01 ){
|
|
if( *p==0x02 ){
|
|
nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
|
|
pOut[nOut++] = 0x02;
|
|
bWritten = 1;
|
|
}
|
|
fts3ColumnlistCopy(0, &p);
|
|
}
|
|
|
|
while( p<pEnd ){
|
|
sqlite3_int64 iCol;
|
|
p++;
|
|
p += sqlite3Fts3GetVarint(p, &iCol);
|
|
if( *p==0x02 ){
|
|
if( bWritten==0 ){
|
|
nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
|
|
bWritten = 1;
|
|
}
|
|
pOut[nOut++] = 0x01;
|
|
nOut += sqlite3Fts3PutVarint(&pOut[nOut], iCol);
|
|
pOut[nOut++] = 0x02;
|
|
}
|
|
fts3ColumnlistCopy(0, &p);
|
|
}
|
|
if( bWritten ){
|
|
pOut[nOut++] = 0x00;
|
|
}
|
|
|
|
return nOut;
|
|
}
|
|
|
|
|
|
/*
|
|
** Merge all doclists in the TermSelect.aaOutput[] array into a single
|
|
** doclist stored in TermSelect.aaOutput[0]. If successful, delete all
|
|
** other doclists (except the aaOutput[0] one) and return SQLITE_OK.
|
|
**
|
|
** If an OOM error occurs, return SQLITE_NOMEM. In this case it is
|
|
** the responsibility of the caller to free any doclists left in the
|
|
** TermSelect.aaOutput[] array.
|
|
*/
|
|
static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){
|
|
char *aOut = 0;
|
|
int nOut = 0;
|
|
int i;
|
|
|
|
/* Loop through the doclists in the aaOutput[] array. Merge them all
|
|
** into a single doclist.
|
|
*/
|
|
for(i=0; i<SizeofArray(pTS->aaOutput); i++){
|
|
if( pTS->aaOutput[i] ){
|
|
if( !aOut ){
|
|
aOut = pTS->aaOutput[i];
|
|
nOut = pTS->anOutput[i];
|
|
pTS->aaOutput[i] = 0;
|
|
}else{
|
|
int nNew;
|
|
char *aNew;
|
|
|
|
int rc = fts3DoclistOrMerge(p->bDescIdx,
|
|
pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, &aNew, &nNew
|
|
);
|
|
if( rc!=SQLITE_OK ){
|
|
sqlite3_free(aOut);
|
|
return rc;
|
|
}
|
|
|
|
sqlite3_free(pTS->aaOutput[i]);
|
|
sqlite3_free(aOut);
|
|
pTS->aaOutput[i] = 0;
|
|
aOut = aNew;
|
|
nOut = nNew;
|
|
}
|
|
}
|
|
}
|
|
|
|
pTS->aaOutput[0] = aOut;
|
|
pTS->anOutput[0] = nOut;
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Merge the doclist aDoclist/nDoclist into the TermSelect object passed
|
|
** as the first argument. The merge is an "OR" merge (see function
|
|
** fts3DoclistOrMerge() for details).
|
|
**
|
|
** This function is called with the doclist for each term that matches
|
|
** a queried prefix. It merges all these doclists into one, the doclist
|
|
** for the specified prefix. Since there can be a very large number of
|
|
** doclists to merge, the merging is done pair-wise using the TermSelect
|
|
** object.
|
|
**
|
|
** This function returns SQLITE_OK if the merge is successful, or an
|
|
** SQLite error code (SQLITE_NOMEM) if an error occurs.
|
|
*/
|
|
static int fts3TermSelectMerge(
|
|
Fts3Table *p, /* FTS table handle */
|
|
TermSelect *pTS, /* TermSelect object to merge into */
|
|
char *aDoclist, /* Pointer to doclist */
|
|
int nDoclist /* Size of aDoclist in bytes */
|
|
){
|
|
if( pTS->aaOutput[0]==0 ){
|
|
/* If this is the first term selected, copy the doclist to the output
|
|
** buffer using memcpy().
|
|
**
|
|
** Add FTS3_VARINT_MAX bytes of unused space to the end of the
|
|
** allocation. This is so as to ensure that the buffer is big enough
|
|
** to hold the current doclist AND'd with any other doclist. If the
|
|
** doclists are stored in order=ASC order, this padding would not be
|
|
** required (since the size of [doclistA AND doclistB] is always less
|
|
** than or equal to the size of [doclistA] in that case). But this is
|
|
** not true for order=DESC. For example, a doclist containing (1, -1)
|
|
** may be smaller than (-1), as in the first example the -1 may be stored
|
|
** as a single-byte delta, whereas in the second it must be stored as a
|
|
** FTS3_VARINT_MAX byte varint.
|
|
**
|
|
** Similar padding is added in the fts3DoclistOrMerge() function.
|
|
*/
|
|
pTS->aaOutput[0] = sqlite3_malloc64((i64)nDoclist + FTS3_VARINT_MAX + 1);
|
|
pTS->anOutput[0] = nDoclist;
|
|
if( pTS->aaOutput[0] ){
|
|
memcpy(pTS->aaOutput[0], aDoclist, nDoclist);
|
|
memset(&pTS->aaOutput[0][nDoclist], 0, FTS3_VARINT_MAX);
|
|
}else{
|
|
return SQLITE_NOMEM;
|
|
}
|
|
}else{
|
|
char *aMerge = aDoclist;
|
|
int nMerge = nDoclist;
|
|
int iOut;
|
|
|
|
for(iOut=0; iOut<SizeofArray(pTS->aaOutput); iOut++){
|
|
if( pTS->aaOutput[iOut]==0 ){
|
|
assert( iOut>0 );
|
|
pTS->aaOutput[iOut] = aMerge;
|
|
pTS->anOutput[iOut] = nMerge;
|
|
break;
|
|
}else{
|
|
char *aNew;
|
|
int nNew;
|
|
|
|
int rc = fts3DoclistOrMerge(p->bDescIdx, aMerge, nMerge,
|
|
pTS->aaOutput[iOut], pTS->anOutput[iOut], &aNew, &nNew
|
|
);
|
|
if( rc!=SQLITE_OK ){
|
|
if( aMerge!=aDoclist ) sqlite3_free(aMerge);
|
|
return rc;
|
|
}
|
|
|
|
if( aMerge!=aDoclist ) sqlite3_free(aMerge);
|
|
sqlite3_free(pTS->aaOutput[iOut]);
|
|
pTS->aaOutput[iOut] = 0;
|
|
|
|
aMerge = aNew;
|
|
nMerge = nNew;
|
|
if( (iOut+1)==SizeofArray(pTS->aaOutput) ){
|
|
pTS->aaOutput[iOut] = aMerge;
|
|
pTS->anOutput[iOut] = nMerge;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Append SegReader object pNew to the end of the pCsr->apSegment[] array.
|
|
*/
|
|
static int fts3SegReaderCursorAppend(
|
|
Fts3MultiSegReader *pCsr,
|
|
Fts3SegReader *pNew
|
|
){
|
|
if( (pCsr->nSegment%16)==0 ){
|
|
Fts3SegReader **apNew;
|
|
sqlite3_int64 nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*);
|
|
apNew = (Fts3SegReader **)sqlite3_realloc64(pCsr->apSegment, nByte);
|
|
if( !apNew ){
|
|
sqlite3Fts3SegReaderFree(pNew);
|
|
return SQLITE_NOMEM;
|
|
}
|
|
pCsr->apSegment = apNew;
|
|
}
|
|
pCsr->apSegment[pCsr->nSegment++] = pNew;
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Add seg-reader objects to the Fts3MultiSegReader object passed as the
|
|
** 8th argument.
|
|
**
|
|
** This function returns SQLITE_OK if successful, or an SQLite error code
|
|
** otherwise.
|
|
*/
|
|
static int fts3SegReaderCursor(
|
|
Fts3Table *p, /* FTS3 table handle */
|
|
int iLangid, /* Language id */
|
|
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
|
|
int iLevel, /* Level of segments to scan */
|
|
const char *zTerm, /* Term to query for */
|
|
int nTerm, /* Size of zTerm in bytes */
|
|
int isPrefix, /* True for a prefix search */
|
|
int isScan, /* True to scan from zTerm to EOF */
|
|
Fts3MultiSegReader *pCsr /* Cursor object to populate */
|
|
){
|
|
int rc = SQLITE_OK; /* Error code */
|
|
sqlite3_stmt *pStmt = 0; /* Statement to iterate through segments */
|
|
int rc2; /* Result of sqlite3_reset() */
|
|
|
|
/* If iLevel is less than 0 and this is not a scan, include a seg-reader
|
|
** for the pending-terms. If this is a scan, then this call must be being
|
|
** made by an fts4aux module, not an FTS table. In this case calling
|
|
** Fts3SegReaderPending might segfault, as the data structures used by
|
|
** fts4aux are not completely populated. So it's easiest to filter these
|
|
** calls out here. */
|
|
if( iLevel<0 && p->aIndex && p->iPrevLangid==iLangid ){
|
|
Fts3SegReader *pSeg = 0;
|
|
rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix||isScan, &pSeg);
|
|
if( rc==SQLITE_OK && pSeg ){
|
|
rc = fts3SegReaderCursorAppend(pCsr, pSeg);
|
|
}
|
|
}
|
|
|
|
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt);
|
|
}
|
|
|
|
while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
|
|
Fts3SegReader *pSeg = 0;
|
|
|
|
/* Read the values returned by the SELECT into local variables. */
|
|
sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1);
|
|
sqlite3_int64 iLeavesEndBlock = sqlite3_column_int64(pStmt, 2);
|
|
sqlite3_int64 iEndBlock = sqlite3_column_int64(pStmt, 3);
|
|
int nRoot = sqlite3_column_bytes(pStmt, 4);
|
|
char const *zRoot = sqlite3_column_blob(pStmt, 4);
|
|
|
|
/* If zTerm is not NULL, and this segment is not stored entirely on its
|
|
** root node, the range of leaves scanned can be reduced. Do this. */
|
|
if( iStartBlock && zTerm && zRoot ){
|
|
sqlite3_int64 *pi = (isPrefix ? &iLeavesEndBlock : 0);
|
|
rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &iStartBlock, pi);
|
|
if( rc!=SQLITE_OK ) goto finished;
|
|
if( isPrefix==0 && isScan==0 ) iLeavesEndBlock = iStartBlock;
|
|
}
|
|
|
|
rc = sqlite3Fts3SegReaderNew(pCsr->nSegment+1,
|
|
(isPrefix==0 && isScan==0),
|
|
iStartBlock, iLeavesEndBlock,
|
|
iEndBlock, zRoot, nRoot, &pSeg
|
|
);
|
|
if( rc!=SQLITE_OK ) goto finished;
|
|
rc = fts3SegReaderCursorAppend(pCsr, pSeg);
|
|
}
|
|
}
|
|
|
|
finished:
|
|
rc2 = sqlite3_reset(pStmt);
|
|
if( rc==SQLITE_DONE ) rc = rc2;
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Set up a cursor object for iterating through a full-text index or a
|
|
** single level therein.
|
|
*/
|
|
int sqlite3Fts3SegReaderCursor(
|
|
Fts3Table *p, /* FTS3 table handle */
|
|
int iLangid, /* Language-id to search */
|
|
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
|
|
int iLevel, /* Level of segments to scan */
|
|
const char *zTerm, /* Term to query for */
|
|
int nTerm, /* Size of zTerm in bytes */
|
|
int isPrefix, /* True for a prefix search */
|
|
int isScan, /* True to scan from zTerm to EOF */
|
|
Fts3MultiSegReader *pCsr /* Cursor object to populate */
|
|
){
|
|
assert( iIndex>=0 && iIndex<p->nIndex );
|
|
assert( iLevel==FTS3_SEGCURSOR_ALL
|
|
|| iLevel==FTS3_SEGCURSOR_PENDING
|
|
|| iLevel>=0
|
|
);
|
|
assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
|
|
assert( FTS3_SEGCURSOR_ALL<0 && FTS3_SEGCURSOR_PENDING<0 );
|
|
assert( isPrefix==0 || isScan==0 );
|
|
|
|
memset(pCsr, 0, sizeof(Fts3MultiSegReader));
|
|
return fts3SegReaderCursor(
|
|
p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
|
|
);
|
|
}
|
|
|
|
/*
|
|
** In addition to its current configuration, have the Fts3MultiSegReader
|
|
** passed as the 4th argument also scan the doclist for term zTerm/nTerm.
|
|
**
|
|
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
|
|
*/
|
|
static int fts3SegReaderCursorAddZero(
|
|
Fts3Table *p, /* FTS virtual table handle */
|
|
int iLangid,
|
|
const char *zTerm, /* Term to scan doclist of */
|
|
int nTerm, /* Number of bytes in zTerm */
|
|
Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */
|
|
){
|
|
return fts3SegReaderCursor(p,
|
|
iLangid, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr
|
|
);
|
|
}
|
|
|
|
/*
|
|
** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or,
|
|
** if isPrefix is true, to scan the doclist for all terms for which
|
|
** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write
|
|
** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return
|
|
** an SQLite error code.
|
|
**
|
|
** It is the responsibility of the caller to free this object by eventually
|
|
** passing it to fts3SegReaderCursorFree()
|
|
**
|
|
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
|
|
** Output parameter *ppSegcsr is set to 0 if an error occurs.
|
|
*/
|
|
static int fts3TermSegReaderCursor(
|
|
Fts3Cursor *pCsr, /* Virtual table cursor handle */
|
|
const char *zTerm, /* Term to query for */
|
|
int nTerm, /* Size of zTerm in bytes */
|
|
int isPrefix, /* True for a prefix search */
|
|
Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */
|
|
){
|
|
Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */
|
|
int rc = SQLITE_NOMEM; /* Return code */
|
|
|
|
pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader));
|
|
if( pSegcsr ){
|
|
int i;
|
|
int bFound = 0; /* True once an index has been found */
|
|
Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
|
|
|
|
if( isPrefix ){
|
|
for(i=1; bFound==0 && i<p->nIndex; i++){
|
|
if( p->aIndex[i].nPrefix==nTerm ){
|
|
bFound = 1;
|
|
rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
|
|
i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr
|
|
);
|
|
pSegcsr->bLookup = 1;
|
|
}
|
|
}
|
|
|
|
for(i=1; bFound==0 && i<p->nIndex; i++){
|
|
if( p->aIndex[i].nPrefix==nTerm+1 ){
|
|
bFound = 1;
|
|
rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
|
|
i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
|
|
);
|
|
if( rc==SQLITE_OK ){
|
|
rc = fts3SegReaderCursorAddZero(
|
|
p, pCsr->iLangid, zTerm, nTerm, pSegcsr
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if( bFound==0 ){
|
|
rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
|
|
0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
|
|
);
|
|
pSegcsr->bLookup = !isPrefix;
|
|
}
|
|
}
|
|
|
|
*ppSegcsr = pSegcsr;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor().
|
|
*/
|
|
static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){
|
|
sqlite3Fts3SegReaderFinish(pSegcsr);
|
|
sqlite3_free(pSegcsr);
|
|
}
|
|
|
|
/*
|
|
** This function retrieves the doclist for the specified term (or term
|
|
** prefix) from the database.
|
|
*/
|
|
static int fts3TermSelect(
|
|
Fts3Table *p, /* Virtual table handle */
|
|
Fts3PhraseToken *pTok, /* Token to query for */
|
|
int iColumn, /* Column to query (or -ve for all columns) */
|
|
int *pnOut, /* OUT: Size of buffer at *ppOut */
|
|
char **ppOut /* OUT: Malloced result buffer */
|
|
){
|
|
int rc; /* Return code */
|
|
Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */
|
|
TermSelect tsc; /* Object for pair-wise doclist merging */
|
|
Fts3SegFilter filter; /* Segment term filter configuration */
|
|
|
|
pSegcsr = pTok->pSegcsr;
|
|
memset(&tsc, 0, sizeof(TermSelect));
|
|
|
|
filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS
|
|
| (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0)
|
|
| (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0)
|
|
| (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
|
|
filter.iCol = iColumn;
|
|
filter.zTerm = pTok->z;
|
|
filter.nTerm = pTok->n;
|
|
|
|
rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter);
|
|
while( SQLITE_OK==rc
|
|
&& SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr))
|
|
){
|
|
rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist);
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
rc = fts3TermSelectFinishMerge(p, &tsc);
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
*ppOut = tsc.aaOutput[0];
|
|
*pnOut = tsc.anOutput[0];
|
|
}else{
|
|
int i;
|
|
for(i=0; i<SizeofArray(tsc.aaOutput); i++){
|
|
sqlite3_free(tsc.aaOutput[i]);
|
|
}
|
|
}
|
|
|
|
fts3SegReaderCursorFree(pSegcsr);
|
|
pTok->pSegcsr = 0;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** This function counts the total number of docids in the doclist stored
|
|
** in buffer aList[], size nList bytes.
|
|
**
|
|
** If the isPoslist argument is true, then it is assumed that the doclist
|
|
** contains a position-list following each docid. Otherwise, it is assumed
|
|
** that the doclist is simply a list of docids stored as delta encoded
|
|
** varints.
|
|
*/
|
|
static int fts3DoclistCountDocids(char *aList, int nList){
|
|
int nDoc = 0; /* Return value */
|
|
if( aList ){
|
|
char *aEnd = &aList[nList]; /* Pointer to one byte after EOF */
|
|
char *p = aList; /* Cursor */
|
|
while( p<aEnd ){
|
|
nDoc++;
|
|
while( (*p++)&0x80 ); /* Skip docid varint */
|
|
fts3PoslistCopy(0, &p); /* Skip over position list */
|
|
}
|
|
}
|
|
|
|
return nDoc;
|
|
}
|
|
|
|
/*
|
|
** Advance the cursor to the next row in the %_content table that
|
|
** matches the search criteria. For a MATCH search, this will be
|
|
** the next row that matches. For a full-table scan, this will be
|
|
** simply the next row in the %_content table. For a docid lookup,
|
|
** this routine simply sets the EOF flag.
|
|
**
|
|
** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned
|
|
** even if we reach end-of-file. The fts3EofMethod() will be called
|
|
** subsequently to determine whether or not an EOF was hit.
|
|
*/
|
|
static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){
|
|
int rc;
|
|
Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
|
|
if( pCsr->eSearch==FTS3_DOCID_SEARCH || pCsr->eSearch==FTS3_FULLSCAN_SEARCH ){
|
|
Fts3Table *pTab = (Fts3Table*)pCursor->pVtab;
|
|
pTab->bLock++;
|
|
if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){
|
|
pCsr->isEof = 1;
|
|
rc = sqlite3_reset(pCsr->pStmt);
|
|
}else{
|
|
pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0);
|
|
rc = SQLITE_OK;
|
|
}
|
|
pTab->bLock--;
|
|
}else{
|
|
rc = fts3EvalNext((Fts3Cursor *)pCursor);
|
|
}
|
|
assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** If the numeric type of argument pVal is "integer", then return it
|
|
** converted to a 64-bit signed integer. Otherwise, return a copy of
|
|
** the second parameter, iDefault.
|
|
*/
|
|
static sqlite3_int64 fts3DocidRange(sqlite3_value *pVal, i64 iDefault){
|
|
if( pVal ){
|
|
int eType = sqlite3_value_numeric_type(pVal);
|
|
if( eType==SQLITE_INTEGER ){
|
|
return sqlite3_value_int64(pVal);
|
|
}
|
|
}
|
|
return iDefault;
|
|
}
|
|
|
|
/*
|
|
** This is the xFilter interface for the virtual table. See
|
|
** the virtual table xFilter method documentation for additional
|
|
** information.
|
|
**
|
|
** If idxNum==FTS3_FULLSCAN_SEARCH then do a full table scan against
|
|
** the %_content table.
|
|
**
|
|
** If idxNum==FTS3_DOCID_SEARCH then do a docid lookup for a single entry
|
|
** in the %_content table.
|
|
**
|
|
** If idxNum>=FTS3_FULLTEXT_SEARCH then use the full text index. The
|
|
** column on the left-hand side of the MATCH operator is column
|
|
** number idxNum-FTS3_FULLTEXT_SEARCH, 0 indexed. argv[0] is the right-hand
|
|
** side of the MATCH operator.
|
|
*/
|
|
static int fts3FilterMethod(
|
|
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
|
|
int idxNum, /* Strategy index */
|
|
const char *idxStr, /* Unused */
|
|
int nVal, /* Number of elements in apVal */
|
|
sqlite3_value **apVal /* Arguments for the indexing scheme */
|
|
){
|
|
int rc = SQLITE_OK;
|
|
char *zSql; /* SQL statement used to access %_content */
|
|
int eSearch;
|
|
Fts3Table *p = (Fts3Table *)pCursor->pVtab;
|
|
Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
|
|
|
|
sqlite3_value *pCons = 0; /* The MATCH or rowid constraint, if any */
|
|
sqlite3_value *pLangid = 0; /* The "langid = ?" constraint, if any */
|
|
sqlite3_value *pDocidGe = 0; /* The "docid >= ?" constraint, if any */
|
|
sqlite3_value *pDocidLe = 0; /* The "docid <= ?" constraint, if any */
|
|
int iIdx;
|
|
|
|
UNUSED_PARAMETER(idxStr);
|
|
UNUSED_PARAMETER(nVal);
|
|
|
|
if( p->bLock ){
|
|
return SQLITE_ERROR;
|
|
}
|
|
|
|
eSearch = (idxNum & 0x0000FFFF);
|
|
assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
|
|
assert( p->pSegments==0 );
|
|
|
|
/* Collect arguments into local variables */
|
|
iIdx = 0;
|
|
if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++];
|
|
if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++];
|
|
if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++];
|
|
if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++];
|
|
assert( iIdx==nVal );
|
|
|
|
/* In case the cursor has been used before, clear it now. */
|
|
fts3ClearCursor(pCsr);
|
|
|
|
/* Set the lower and upper bounds on docids to return */
|
|
pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64);
|
|
pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64);
|
|
|
|
if( idxStr ){
|
|
pCsr->bDesc = (idxStr[0]=='D');
|
|
}else{
|
|
pCsr->bDesc = p->bDescIdx;
|
|
}
|
|
pCsr->eSearch = (i16)eSearch;
|
|
|
|
if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){
|
|
int iCol = eSearch-FTS3_FULLTEXT_SEARCH;
|
|
const char *zQuery = (const char *)sqlite3_value_text(pCons);
|
|
|
|
if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){
|
|
return SQLITE_NOMEM;
|
|
}
|
|
|
|
pCsr->iLangid = 0;
|
|
if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid);
|
|
|
|
assert( p->base.zErrMsg==0 );
|
|
rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
|
|
p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr,
|
|
&p->base.zErrMsg
|
|
);
|
|
if( rc!=SQLITE_OK ){
|
|
return rc;
|
|
}
|
|
|
|
rc = fts3EvalStart(pCsr);
|
|
sqlite3Fts3SegmentsClose(p);
|
|
if( rc!=SQLITE_OK ) return rc;
|
|
pCsr->pNextId = pCsr->aDoclist;
|
|
pCsr->iPrevId = 0;
|
|
}
|
|
|
|
/* Compile a SELECT statement for this cursor. For a full-table-scan, the
|
|
** statement loops through all rows of the %_content table. For a
|
|
** full-text query or docid lookup, the statement retrieves a single
|
|
** row by docid.
|
|
*/
|
|
if( eSearch==FTS3_FULLSCAN_SEARCH ){
|
|
if( pDocidGe || pDocidLe ){
|
|
zSql = sqlite3_mprintf(
|
|
"SELECT %s WHERE rowid BETWEEN %lld AND %lld ORDER BY rowid %s",
|
|
p->zReadExprlist, pCsr->iMinDocid, pCsr->iMaxDocid,
|
|
(pCsr->bDesc ? "DESC" : "ASC")
|
|
);
|
|
}else{
|
|
zSql = sqlite3_mprintf("SELECT %s ORDER BY rowid %s",
|
|
p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC")
|
|
);
|
|
}
|
|
if( zSql ){
|
|
p->bLock++;
|
|
rc = sqlite3_prepare_v3(
|
|
p->db,zSql,-1,SQLITE_PREPARE_PERSISTENT,&pCsr->pStmt,0
|
|
);
|
|
p->bLock--;
|
|
sqlite3_free(zSql);
|
|
}else{
|
|
rc = SQLITE_NOMEM;
|
|
}
|
|
}else if( eSearch==FTS3_DOCID_SEARCH ){
|
|
rc = fts3CursorSeekStmt(pCsr);
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3_bind_value(pCsr->pStmt, 1, pCons);
|
|
}
|
|
}
|
|
if( rc!=SQLITE_OK ) return rc;
|
|
|
|
return fts3NextMethod(pCursor);
|
|
}
|
|
|
|
/*
|
|
** This is the xEof method of the virtual table. SQLite calls this
|
|
** routine to find out if it has reached the end of a result set.
|
|
*/
|
|
static int fts3EofMethod(sqlite3_vtab_cursor *pCursor){
|
|
Fts3Cursor *pCsr = (Fts3Cursor*)pCursor;
|
|
if( pCsr->isEof ){
|
|
fts3ClearCursor(pCsr);
|
|
pCsr->isEof = 1;
|
|
}
|
|
return pCsr->isEof;
|
|
}
|
|
|
|
/*
|
|
** This is the xRowid method. The SQLite core calls this routine to
|
|
** retrieve the rowid for the current row of the result set. fts3
|
|
** exposes %_content.docid as the rowid for the virtual table. The
|
|
** rowid should be written to *pRowid.
|
|
*/
|
|
static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
|
|
Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
|
|
*pRowid = pCsr->iPrevId;
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** This is the xColumn method, called by SQLite to request a value from
|
|
** the row that the supplied cursor currently points to.
|
|
**
|
|
** If:
|
|
**
|
|
** (iCol < p->nColumn) -> The value of the iCol'th user column.
|
|
** (iCol == p->nColumn) -> Magic column with the same name as the table.
|
|
** (iCol == p->nColumn+1) -> Docid column
|
|
** (iCol == p->nColumn+2) -> Langid column
|
|
*/
|
|
static int fts3ColumnMethod(
|
|
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
|
|
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
|
|
int iCol /* Index of column to read value from */
|
|
){
|
|
int rc = SQLITE_OK; /* Return Code */
|
|
Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
|
|
Fts3Table *p = (Fts3Table *)pCursor->pVtab;
|
|
|
|
/* The column value supplied by SQLite must be in range. */
|
|
assert( iCol>=0 && iCol<=p->nColumn+2 );
|
|
|
|
switch( iCol-p->nColumn ){
|
|
case 0:
|
|
/* The special 'table-name' column */
|
|
sqlite3_result_pointer(pCtx, pCsr, "fts3cursor", 0);
|
|
break;
|
|
|
|
case 1:
|
|
/* The docid column */
|
|
sqlite3_result_int64(pCtx, pCsr->iPrevId);
|
|
break;
|
|
|
|
case 2:
|
|
if( pCsr->pExpr ){
|
|
sqlite3_result_int64(pCtx, pCsr->iLangid);
|
|
break;
|
|
}else if( p->zLanguageid==0 ){
|
|
sqlite3_result_int(pCtx, 0);
|
|
break;
|
|
}else{
|
|
iCol = p->nColumn;
|
|
/* no break */ deliberate_fall_through
|
|
}
|
|
|
|
default:
|
|
/* A user column. Or, if this is a full-table scan, possibly the
|
|
** language-id column. Seek the cursor. */
|
|
rc = fts3CursorSeek(0, pCsr);
|
|
if( rc==SQLITE_OK && sqlite3_data_count(pCsr->pStmt)-1>iCol ){
|
|
sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
|
|
}
|
|
break;
|
|
}
|
|
|
|
assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** This function is the implementation of the xUpdate callback used by
|
|
** FTS3 virtual tables. It is invoked by SQLite each time a row is to be
|
|
** inserted, updated or deleted.
|
|
*/
|
|
static int fts3UpdateMethod(
|
|
sqlite3_vtab *pVtab, /* Virtual table handle */
|
|
int nArg, /* Size of argument array */
|
|
sqlite3_value **apVal, /* Array of arguments */
|
|
sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */
|
|
){
|
|
return sqlite3Fts3UpdateMethod(pVtab, nArg, apVal, pRowid);
|
|
}
|
|
|
|
/*
|
|
** Implementation of xSync() method. Flush the contents of the pending-terms
|
|
** hash-table to the database.
|
|
*/
|
|
static int fts3SyncMethod(sqlite3_vtab *pVtab){
|
|
|
|
/* Following an incremental-merge operation, assuming that the input
|
|
** segments are not completely consumed (the usual case), they are updated
|
|
** in place to remove the entries that have already been merged. This
|
|
** involves updating the leaf block that contains the smallest unmerged
|
|
** entry and each block (if any) between the leaf and the root node. So
|
|
** if the height of the input segment b-trees is N, and input segments
|
|
** are merged eight at a time, updating the input segments at the end
|
|
** of an incremental-merge requires writing (8*(1+N)) blocks. N is usually
|
|
** small - often between 0 and 2. So the overhead of the incremental
|
|
** merge is somewhere between 8 and 24 blocks. To avoid this overhead
|
|
** dwarfing the actual productive work accomplished, the incremental merge
|
|
** is only attempted if it will write at least 64 leaf blocks. Hence
|
|
** nMinMerge.
|
|
**
|
|
** Of course, updating the input segments also involves deleting a bunch
|
|
** of blocks from the segments table. But this is not considered overhead
|
|
** as it would also be required by a crisis-merge that used the same input
|
|
** segments.
|
|
*/
|
|
const u32 nMinMerge = 64; /* Minimum amount of incr-merge work to do */
|
|
|
|
Fts3Table *p = (Fts3Table*)pVtab;
|
|
int rc;
|
|
i64 iLastRowid = sqlite3_last_insert_rowid(p->db);
|
|
|
|
rc = sqlite3Fts3PendingTermsFlush(p);
|
|
if( rc==SQLITE_OK
|
|
&& p->nLeafAdd>(nMinMerge/16)
|
|
&& p->nAutoincrmerge && p->nAutoincrmerge!=0xff
|
|
){
|
|
int mxLevel = 0; /* Maximum relative level value in db */
|
|
int A; /* Incr-merge parameter A */
|
|
|
|
rc = sqlite3Fts3MaxLevel(p, &mxLevel);
|
|
assert( rc==SQLITE_OK || mxLevel==0 );
|
|
A = p->nLeafAdd * mxLevel;
|
|
A += (A/2);
|
|
if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, p->nAutoincrmerge);
|
|
}
|
|
sqlite3Fts3SegmentsClose(p);
|
|
sqlite3_set_last_insert_rowid(p->db, iLastRowid);
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** If it is currently unknown whether or not the FTS table has an %_stat
|
|
** table (if p->bHasStat==2), attempt to determine this (set p->bHasStat
|
|
** to 0 or 1). Return SQLITE_OK if successful, or an SQLite error code
|
|
** if an error occurs.
|
|
*/
|
|
static int fts3SetHasStat(Fts3Table *p){
|
|
int rc = SQLITE_OK;
|
|
if( p->bHasStat==2 ){
|
|
char *zTbl = sqlite3_mprintf("%s_stat", p->zName);
|
|
if( zTbl ){
|
|
int res = sqlite3_table_column_metadata(p->db, p->zDb, zTbl, 0,0,0,0,0,0);
|
|
sqlite3_free(zTbl);
|
|
p->bHasStat = (res==SQLITE_OK);
|
|
}else{
|
|
rc = SQLITE_NOMEM;
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Implementation of xBegin() method.
|
|
*/
|
|
static int fts3BeginMethod(sqlite3_vtab *pVtab){
|
|
Fts3Table *p = (Fts3Table*)pVtab;
|
|
int rc;
|
|
UNUSED_PARAMETER(pVtab);
|
|
assert( p->pSegments==0 );
|
|
assert( p->nPendingData==0 );
|
|
assert( p->inTransaction!=1 );
|
|
p->nLeafAdd = 0;
|
|
rc = fts3SetHasStat(p);
|
|
#ifdef SQLITE_DEBUG
|
|
if( rc==SQLITE_OK ){
|
|
p->inTransaction = 1;
|
|
p->mxSavepoint = -1;
|
|
}
|
|
#endif
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Implementation of xCommit() method. This is a no-op. The contents of
|
|
** the pending-terms hash-table have already been flushed into the database
|
|
** by fts3SyncMethod().
|
|
*/
|
|
static int fts3CommitMethod(sqlite3_vtab *pVtab){
|
|
TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
|
|
UNUSED_PARAMETER(pVtab);
|
|
assert( p->nPendingData==0 );
|
|
assert( p->inTransaction!=0 );
|
|
assert( p->pSegments==0 );
|
|
TESTONLY( p->inTransaction = 0 );
|
|
TESTONLY( p->mxSavepoint = -1; );
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Implementation of xRollback(). Discard the contents of the pending-terms
|
|
** hash-table. Any changes made to the database are reverted by SQLite.
|
|
*/
|
|
static int fts3RollbackMethod(sqlite3_vtab *pVtab){
|
|
Fts3Table *p = (Fts3Table*)pVtab;
|
|
sqlite3Fts3PendingTermsClear(p);
|
|
assert( p->inTransaction!=0 );
|
|
TESTONLY( p->inTransaction = 0 );
|
|
TESTONLY( p->mxSavepoint = -1; );
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** When called, *ppPoslist must point to the byte immediately following the
|
|
** end of a position-list. i.e. ( (*ppPoslist)[-1]==POS_END ). This function
|
|
** moves *ppPoslist so that it instead points to the first byte of the
|
|
** same position list.
|
|
*/
|
|
static void fts3ReversePoslist(char *pStart, char **ppPoslist){
|
|
char *p = &(*ppPoslist)[-2];
|
|
char c = 0;
|
|
|
|
/* Skip backwards passed any trailing 0x00 bytes added by NearTrim() */
|
|
while( p>pStart && (c=*p--)==0 );
|
|
|
|
/* Search backwards for a varint with value zero (the end of the previous
|
|
** poslist). This is an 0x00 byte preceded by some byte that does not
|
|
** have the 0x80 bit set. */
|
|
while( p>pStart && (*p & 0x80) | c ){
|
|
c = *p--;
|
|
}
|
|
assert( p==pStart || c==0 );
|
|
|
|
/* At this point p points to that preceding byte without the 0x80 bit
|
|
** set. So to find the start of the poslist, skip forward 2 bytes then
|
|
** over a varint.
|
|
**
|
|
** Normally. The other case is that p==pStart and the poslist to return
|
|
** is the first in the doclist. In this case do not skip forward 2 bytes.
|
|
** The second part of the if condition (c==0 && *ppPoslist>&p[2])
|
|
** is required for cases where the first byte of a doclist and the
|
|
** doclist is empty. For example, if the first docid is 10, a doclist
|
|
** that begins with:
|
|
**
|
|
** 0x0A 0x00 <next docid delta varint>
|
|
*/
|
|
if( p>pStart || (c==0 && *ppPoslist>&p[2]) ){ p = &p[2]; }
|
|
while( *p++&0x80 );
|
|
*ppPoslist = p;
|
|
}
|
|
|
|
/*
|
|
** Helper function used by the implementation of the overloaded snippet(),
|
|
** offsets() and optimize() SQL functions.
|
|
**
|
|
** If the value passed as the third argument is a blob of size
|
|
** sizeof(Fts3Cursor*), then the blob contents are copied to the
|
|
** output variable *ppCsr and SQLITE_OK is returned. Otherwise, an error
|
|
** message is written to context pContext and SQLITE_ERROR returned. The
|
|
** string passed via zFunc is used as part of the error message.
|
|
*/
|
|
static int fts3FunctionArg(
|
|
sqlite3_context *pContext, /* SQL function call context */
|
|
const char *zFunc, /* Function name */
|
|
sqlite3_value *pVal, /* argv[0] passed to function */
|
|
Fts3Cursor **ppCsr /* OUT: Store cursor handle here */
|
|
){
|
|
int rc;
|
|
*ppCsr = (Fts3Cursor*)sqlite3_value_pointer(pVal, "fts3cursor");
|
|
if( (*ppCsr)!=0 ){
|
|
rc = SQLITE_OK;
|
|
}else{
|
|
char *zErr = sqlite3_mprintf("illegal first argument to %s", zFunc);
|
|
sqlite3_result_error(pContext, zErr, -1);
|
|
sqlite3_free(zErr);
|
|
rc = SQLITE_ERROR;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Implementation of the snippet() function for FTS3
|
|
*/
|
|
static void fts3SnippetFunc(
|
|
sqlite3_context *pContext, /* SQLite function call context */
|
|
int nVal, /* Size of apVal[] array */
|
|
sqlite3_value **apVal /* Array of arguments */
|
|
){
|
|
Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */
|
|
const char *zStart = "<b>";
|
|
const char *zEnd = "</b>";
|
|
const char *zEllipsis = "<b>...</b>";
|
|
int iCol = -1;
|
|
int nToken = 15; /* Default number of tokens in snippet */
|
|
|
|
/* There must be at least one argument passed to this function (otherwise
|
|
** the non-overloaded version would have been called instead of this one).
|
|
*/
|
|
assert( nVal>=1 );
|
|
|
|
if( nVal>6 ){
|
|
sqlite3_result_error(pContext,
|
|
"wrong number of arguments to function snippet()", -1);
|
|
return;
|
|
}
|
|
if( fts3FunctionArg(pContext, "snippet", apVal[0], &pCsr) ) return;
|
|
|
|
switch( nVal ){
|
|
case 6: nToken = sqlite3_value_int(apVal[5]);
|
|
/* no break */ deliberate_fall_through
|
|
case 5: iCol = sqlite3_value_int(apVal[4]);
|
|
/* no break */ deliberate_fall_through
|
|
case 4: zEllipsis = (const char*)sqlite3_value_text(apVal[3]);
|
|
/* no break */ deliberate_fall_through
|
|
case 3: zEnd = (const char*)sqlite3_value_text(apVal[2]);
|
|
/* no break */ deliberate_fall_through
|
|
case 2: zStart = (const char*)sqlite3_value_text(apVal[1]);
|
|
}
|
|
if( !zEllipsis || !zEnd || !zStart ){
|
|
sqlite3_result_error_nomem(pContext);
|
|
}else if( nToken==0 ){
|
|
sqlite3_result_text(pContext, "", -1, SQLITE_STATIC);
|
|
}else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
|
|
sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Implementation of the offsets() function for FTS3
|
|
*/
|
|
static void fts3OffsetsFunc(
|
|
sqlite3_context *pContext, /* SQLite function call context */
|
|
int nVal, /* Size of argument array */
|
|
sqlite3_value **apVal /* Array of arguments */
|
|
){
|
|
Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */
|
|
|
|
UNUSED_PARAMETER(nVal);
|
|
|
|
assert( nVal==1 );
|
|
if( fts3FunctionArg(pContext, "offsets", apVal[0], &pCsr) ) return;
|
|
assert( pCsr );
|
|
if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
|
|
sqlite3Fts3Offsets(pContext, pCsr);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Implementation of the special optimize() function for FTS3. This
|
|
** function merges all segments in the database to a single segment.
|
|
** Example usage is:
|
|
**
|
|
** SELECT optimize(t) FROM t LIMIT 1;
|
|
**
|
|
** where 't' is the name of an FTS3 table.
|
|
*/
|
|
static void fts3OptimizeFunc(
|
|
sqlite3_context *pContext, /* SQLite function call context */
|
|
int nVal, /* Size of argument array */
|
|
sqlite3_value **apVal /* Array of arguments */
|
|
){
|
|
int rc; /* Return code */
|
|
Fts3Table *p; /* Virtual table handle */
|
|
Fts3Cursor *pCursor; /* Cursor handle passed through apVal[0] */
|
|
|
|
UNUSED_PARAMETER(nVal);
|
|
|
|
assert( nVal==1 );
|
|
if( fts3FunctionArg(pContext, "optimize", apVal[0], &pCursor) ) return;
|
|
p = (Fts3Table *)pCursor->base.pVtab;
|
|
assert( p );
|
|
|
|
rc = sqlite3Fts3Optimize(p);
|
|
|
|
switch( rc ){
|
|
case SQLITE_OK:
|
|
sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC);
|
|
break;
|
|
case SQLITE_DONE:
|
|
sqlite3_result_text(pContext, "Index already optimal", -1, SQLITE_STATIC);
|
|
break;
|
|
default:
|
|
sqlite3_result_error_code(pContext, rc);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Implementation of the matchinfo() function for FTS3
|
|
*/
|
|
static void fts3MatchinfoFunc(
|
|
sqlite3_context *pContext, /* SQLite function call context */
|
|
int nVal, /* Size of argument array */
|
|
sqlite3_value **apVal /* Array of arguments */
|
|
){
|
|
Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */
|
|
assert( nVal==1 || nVal==2 );
|
|
if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){
|
|
const char *zArg = 0;
|
|
if( nVal>1 ){
|
|
zArg = (const char *)sqlite3_value_text(apVal[1]);
|
|
}
|
|
sqlite3Fts3Matchinfo(pContext, pCsr, zArg);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** This routine implements the xFindFunction method for the FTS3
|
|
** virtual table.
|
|
*/
|
|
static int fts3FindFunctionMethod(
|
|
sqlite3_vtab *pVtab, /* Virtual table handle */
|
|
int nArg, /* Number of SQL function arguments */
|
|
const char *zName, /* Name of SQL function */
|
|
void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */
|
|
void **ppArg /* Unused */
|
|
){
|
|
struct Overloaded {
|
|
const char *zName;
|
|
void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
|
|
} aOverload[] = {
|
|
{ "snippet", fts3SnippetFunc },
|
|
{ "offsets", fts3OffsetsFunc },
|
|
{ "optimize", fts3OptimizeFunc },
|
|
{ "matchinfo", fts3MatchinfoFunc },
|
|
};
|
|
int i; /* Iterator variable */
|
|
|
|
UNUSED_PARAMETER(pVtab);
|
|
UNUSED_PARAMETER(nArg);
|
|
UNUSED_PARAMETER(ppArg);
|
|
|
|
for(i=0; i<SizeofArray(aOverload); i++){
|
|
if( strcmp(zName, aOverload[i].zName)==0 ){
|
|
*pxFunc = aOverload[i].xFunc;
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
/* No function of the specified name was found. Return 0. */
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
** Implementation of FTS3 xRename method. Rename an fts3 table.
|
|
*/
|
|
static int fts3RenameMethod(
|
|
sqlite3_vtab *pVtab, /* Virtual table handle */
|
|
const char *zName /* New name of table */
|
|
){
|
|
Fts3Table *p = (Fts3Table *)pVtab;
|
|
sqlite3 *db = p->db; /* Database connection */
|
|
int rc; /* Return Code */
|
|
|
|
/* At this point it must be known if the %_stat table exists or not.
|
|
** So bHasStat may not be 2. */
|
|
rc = fts3SetHasStat(p);
|
|
|
|
/* As it happens, the pending terms table is always empty here. This is
|
|
** because an "ALTER TABLE RENAME TABLE" statement inside a transaction
|
|
** always opens a savepoint transaction. And the xSavepoint() method
|
|
** flushes the pending terms table. But leave the (no-op) call to
|
|
** PendingTermsFlush() in in case that changes.
|
|
*/
|
|
assert( p->nPendingData==0 );
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3Fts3PendingTermsFlush(p);
|
|
}
|
|
|
|
p->bIgnoreSavepoint = 1;
|
|
|
|
if( p->zContentTbl==0 ){
|
|
fts3DbExec(&rc, db,
|
|
"ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';",
|
|
p->zDb, p->zName, zName
|
|
);
|
|
}
|
|
|
|
if( p->bHasDocsize ){
|
|
fts3DbExec(&rc, db,
|
|
"ALTER TABLE %Q.'%q_docsize' RENAME TO '%q_docsize';",
|
|
p->zDb, p->zName, zName
|
|
);
|
|
}
|
|
if( p->bHasStat ){
|
|
fts3DbExec(&rc, db,
|
|
"ALTER TABLE %Q.'%q_stat' RENAME TO '%q_stat';",
|
|
p->zDb, p->zName, zName
|
|
);
|
|
}
|
|
fts3DbExec(&rc, db,
|
|
"ALTER TABLE %Q.'%q_segments' RENAME TO '%q_segments';",
|
|
p->zDb, p->zName, zName
|
|
);
|
|
fts3DbExec(&rc, db,
|
|
"ALTER TABLE %Q.'%q_segdir' RENAME TO '%q_segdir';",
|
|
p->zDb, p->zName, zName
|
|
);
|
|
|
|
p->bIgnoreSavepoint = 0;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** The xSavepoint() method.
|
|
**
|
|
** Flush the contents of the pending-terms table to disk.
|
|
*/
|
|
static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
|
|
int rc = SQLITE_OK;
|
|
Fts3Table *pTab = (Fts3Table*)pVtab;
|
|
assert( pTab->inTransaction );
|
|
assert( pTab->mxSavepoint<=iSavepoint );
|
|
TESTONLY( pTab->mxSavepoint = iSavepoint );
|
|
|
|
if( pTab->bIgnoreSavepoint==0 ){
|
|
if( fts3HashCount(&pTab->aIndex[0].hPending)>0 ){
|
|
char *zSql = sqlite3_mprintf("INSERT INTO %Q.%Q(%Q) VALUES('flush')",
|
|
pTab->zDb, pTab->zName, pTab->zName
|
|
);
|
|
if( zSql ){
|
|
pTab->bIgnoreSavepoint = 1;
|
|
rc = sqlite3_exec(pTab->db, zSql, 0, 0, 0);
|
|
pTab->bIgnoreSavepoint = 0;
|
|
sqlite3_free(zSql);
|
|
}else{
|
|
rc = SQLITE_NOMEM;
|
|
}
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
pTab->iSavepoint = iSavepoint+1;
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** The xRelease() method.
|
|
**
|
|
** This is a no-op.
|
|
*/
|
|
static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
|
|
Fts3Table *pTab = (Fts3Table*)pVtab;
|
|
assert( pTab->inTransaction );
|
|
assert( pTab->mxSavepoint >= iSavepoint );
|
|
TESTONLY( pTab->mxSavepoint = iSavepoint-1 );
|
|
pTab->iSavepoint = iSavepoint;
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** The xRollbackTo() method.
|
|
**
|
|
** Discard the contents of the pending terms table.
|
|
*/
|
|
static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
|
|
Fts3Table *pTab = (Fts3Table*)pVtab;
|
|
UNUSED_PARAMETER(iSavepoint);
|
|
assert( pTab->inTransaction );
|
|
TESTONLY( pTab->mxSavepoint = iSavepoint );
|
|
if( (iSavepoint+1)<=pTab->iSavepoint ){
|
|
sqlite3Fts3PendingTermsClear(pTab);
|
|
}
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Return true if zName is the extension on one of the shadow tables used
|
|
** by this module.
|
|
*/
|
|
static int fts3ShadowName(const char *zName){
|
|
static const char *azName[] = {
|
|
"content", "docsize", "segdir", "segments", "stat",
|
|
};
|
|
unsigned int i;
|
|
for(i=0; i<sizeof(azName)/sizeof(azName[0]); i++){
|
|
if( sqlite3_stricmp(zName, azName[i])==0 ) return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
** Implementation of the xIntegrity() method on the FTS3/FTS4 virtual
|
|
** table.
|
|
*/
|
|
static int fts3IntegrityMethod(
|
|
sqlite3_vtab *pVtab, /* The virtual table to be checked */
|
|
const char *zSchema, /* Name of schema in which pVtab lives */
|
|
const char *zTabname, /* Name of the pVTab table */
|
|
int isQuick, /* True if this is a quick_check */
|
|
char **pzErr /* Write error message here */
|
|
){
|
|
Fts3Table *p = (Fts3Table*)pVtab;
|
|
int rc;
|
|
int bOk = 0;
|
|
|
|
UNUSED_PARAMETER(isQuick);
|
|
rc = sqlite3Fts3IntegrityCheck(p, &bOk);
|
|
assert( rc!=SQLITE_CORRUPT_VTAB || bOk==0 );
|
|
if( rc!=SQLITE_OK && rc!=SQLITE_CORRUPT_VTAB ){
|
|
*pzErr = sqlite3_mprintf("unable to validate the inverted index for"
|
|
" FTS%d table %s.%s: %s",
|
|
p->bFts4 ? 4 : 3, zSchema, zTabname, sqlite3_errstr(rc));
|
|
}else if( bOk==0 ){
|
|
*pzErr = sqlite3_mprintf("malformed inverted index for FTS%d table %s.%s",
|
|
p->bFts4 ? 4 : 3, zSchema, zTabname);
|
|
}
|
|
sqlite3Fts3SegmentsClose(p);
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
|
|
|
|
static const sqlite3_module fts3Module = {
|
|
/* iVersion */ 4,
|
|
/* xCreate */ fts3CreateMethod,
|
|
/* xConnect */ fts3ConnectMethod,
|
|
/* xBestIndex */ fts3BestIndexMethod,
|
|
/* xDisconnect */ fts3DisconnectMethod,
|
|
/* xDestroy */ fts3DestroyMethod,
|
|
/* xOpen */ fts3OpenMethod,
|
|
/* xClose */ fts3CloseMethod,
|
|
/* xFilter */ fts3FilterMethod,
|
|
/* xNext */ fts3NextMethod,
|
|
/* xEof */ fts3EofMethod,
|
|
/* xColumn */ fts3ColumnMethod,
|
|
/* xRowid */ fts3RowidMethod,
|
|
/* xUpdate */ fts3UpdateMethod,
|
|
/* xBegin */ fts3BeginMethod,
|
|
/* xSync */ fts3SyncMethod,
|
|
/* xCommit */ fts3CommitMethod,
|
|
/* xRollback */ fts3RollbackMethod,
|
|
/* xFindFunction */ fts3FindFunctionMethod,
|
|
/* xRename */ fts3RenameMethod,
|
|
/* xSavepoint */ fts3SavepointMethod,
|
|
/* xRelease */ fts3ReleaseMethod,
|
|
/* xRollbackTo */ fts3RollbackToMethod,
|
|
/* xShadowName */ fts3ShadowName,
|
|
/* xIntegrity */ fts3IntegrityMethod,
|
|
};
|
|
|
|
/*
|
|
** This function is registered as the module destructor (called when an
|
|
** FTS3 enabled database connection is closed). It frees the memory
|
|
** allocated for the tokenizer hash table.
|
|
*/
|
|
static void hashDestroy(void *p){
|
|
Fts3HashWrapper *pHash = (Fts3HashWrapper *)p;
|
|
pHash->nRef--;
|
|
if( pHash->nRef<=0 ){
|
|
sqlite3Fts3HashClear(&pHash->hash);
|
|
sqlite3_free(pHash);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** The fts3 built-in tokenizers - "simple", "porter" and "icu"- are
|
|
** implemented in files fts3_tokenizer1.c, fts3_porter.c and fts3_icu.c
|
|
** respectively. The following three forward declarations are for functions
|
|
** declared in these files used to retrieve the respective implementations.
|
|
**
|
|
** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed
|
|
** to by the argument to point to the "simple" tokenizer implementation.
|
|
** And so on.
|
|
*/
|
|
void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
|
|
void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
|
|
#ifndef SQLITE_DISABLE_FTS3_UNICODE
|
|
void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule);
|
|
#endif
|
|
#ifdef SQLITE_ENABLE_ICU
|
|
void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
|
|
#endif
|
|
|
|
/*
|
|
** Initialize the fts3 extension. If this extension is built as part
|
|
** of the sqlite library, then this function is called directly by
|
|
** SQLite. If fts3 is built as a dynamically loadable extension, this
|
|
** function is called by the sqlite3_extension_init() entry point.
|
|
*/
|
|
int sqlite3Fts3Init(sqlite3 *db){
|
|
int rc = SQLITE_OK;
|
|
Fts3HashWrapper *pHash = 0;
|
|
const sqlite3_tokenizer_module *pSimple = 0;
|
|
const sqlite3_tokenizer_module *pPorter = 0;
|
|
#ifndef SQLITE_DISABLE_FTS3_UNICODE
|
|
const sqlite3_tokenizer_module *pUnicode = 0;
|
|
#endif
|
|
|
|
#ifdef SQLITE_ENABLE_ICU
|
|
const sqlite3_tokenizer_module *pIcu = 0;
|
|
sqlite3Fts3IcuTokenizerModule(&pIcu);
|
|
#endif
|
|
|
|
#ifndef SQLITE_DISABLE_FTS3_UNICODE
|
|
sqlite3Fts3UnicodeTokenizer(&pUnicode);
|
|
#endif
|
|
|
|
#ifdef SQLITE_TEST
|
|
rc = sqlite3Fts3InitTerm(db);
|
|
if( rc!=SQLITE_OK ) return rc;
|
|
#endif
|
|
|
|
rc = sqlite3Fts3InitAux(db);
|
|
if( rc!=SQLITE_OK ) return rc;
|
|
|
|
sqlite3Fts3SimpleTokenizerModule(&pSimple);
|
|
sqlite3Fts3PorterTokenizerModule(&pPorter);
|
|
|
|
/* Allocate and initialize the hash-table used to store tokenizers. */
|
|
pHash = sqlite3_malloc(sizeof(Fts3HashWrapper));
|
|
if( !pHash ){
|
|
rc = SQLITE_NOMEM;
|
|
}else{
|
|
sqlite3Fts3HashInit(&pHash->hash, FTS3_HASH_STRING, 1);
|
|
pHash->nRef = 0;
|
|
}
|
|
|
|
/* Load the built-in tokenizers into the hash table */
|
|
if( rc==SQLITE_OK ){
|
|
if( sqlite3Fts3HashInsert(&pHash->hash, "simple", 7, (void *)pSimple)
|
|
|| sqlite3Fts3HashInsert(&pHash->hash, "porter", 7, (void *)pPorter)
|
|
|
|
#ifndef SQLITE_DISABLE_FTS3_UNICODE
|
|
|| sqlite3Fts3HashInsert(&pHash->hash, "unicode61", 10, (void *)pUnicode)
|
|
#endif
|
|
#ifdef SQLITE_ENABLE_ICU
|
|
|| (pIcu && sqlite3Fts3HashInsert(&pHash->hash, "icu", 4, (void *)pIcu))
|
|
#endif
|
|
){
|
|
rc = SQLITE_NOMEM;
|
|
}
|
|
}
|
|
|
|
#ifdef SQLITE_TEST
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3Fts3ExprInitTestInterface(db, &pHash->hash);
|
|
}
|
|
#endif
|
|
|
|
/* Create the virtual table wrapper around the hash-table and overload
|
|
** the four scalar functions. If this is successful, register the
|
|
** module with sqlite.
|
|
*/
|
|
if( SQLITE_OK==rc
|
|
&& SQLITE_OK==(rc=sqlite3Fts3InitHashTable(db,&pHash->hash,"fts3_tokenizer"))
|
|
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
|
|
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1))
|
|
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 1))
|
|
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 2))
|
|
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1))
|
|
){
|
|
pHash->nRef++;
|
|
rc = sqlite3_create_module_v2(
|
|
db, "fts3", &fts3Module, (void *)pHash, hashDestroy
|
|
);
|
|
if( rc==SQLITE_OK ){
|
|
pHash->nRef++;
|
|
rc = sqlite3_create_module_v2(
|
|
db, "fts4", &fts3Module, (void *)pHash, hashDestroy
|
|
);
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
pHash->nRef++;
|
|
rc = sqlite3Fts3InitTok(db, (void *)pHash, hashDestroy);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
|
|
/* An error has occurred. Delete the hash table and return the error code. */
|
|
assert( rc!=SQLITE_OK );
|
|
if( pHash ){
|
|
sqlite3Fts3HashClear(&pHash->hash);
|
|
sqlite3_free(pHash);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Allocate an Fts3MultiSegReader for each token in the expression headed
|
|
** by pExpr.
|
|
**
|
|
** An Fts3SegReader object is a cursor that can seek or scan a range of
|
|
** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple
|
|
** Fts3SegReader objects internally to provide an interface to seek or scan
|
|
** within the union of all segments of a b-tree. Hence the name.
|
|
**
|
|
** If the allocated Fts3MultiSegReader just seeks to a single entry in a
|
|
** segment b-tree (if the term is not a prefix or it is a prefix for which
|
|
** there exists prefix b-tree of the right length) then it may be traversed
|
|
** and merged incrementally. Otherwise, it has to be merged into an in-memory
|
|
** doclist and then traversed.
|
|
*/
|
|
static void fts3EvalAllocateReaders(
|
|
Fts3Cursor *pCsr, /* FTS cursor handle */
|
|
Fts3Expr *pExpr, /* Allocate readers for this expression */
|
|
int *pnToken, /* OUT: Total number of tokens in phrase. */
|
|
int *pnOr, /* OUT: Total number of OR nodes in expr. */
|
|
int *pRc /* IN/OUT: Error code */
|
|
){
|
|
if( pExpr && SQLITE_OK==*pRc ){
|
|
if( pExpr->eType==FTSQUERY_PHRASE ){
|
|
int i;
|
|
int nToken = pExpr->pPhrase->nToken;
|
|
*pnToken += nToken;
|
|
for(i=0; i<nToken; i++){
|
|
Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i];
|
|
int rc = fts3TermSegReaderCursor(pCsr,
|
|
pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
|
|
);
|
|
if( rc!=SQLITE_OK ){
|
|
*pRc = rc;
|
|
return;
|
|
}
|
|
}
|
|
assert( pExpr->pPhrase->iDoclistToken==0 );
|
|
pExpr->pPhrase->iDoclistToken = -1;
|
|
}else{
|
|
*pnOr += (pExpr->eType==FTSQUERY_OR);
|
|
fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc);
|
|
fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Arguments pList/nList contain the doclist for token iToken of phrase p.
|
|
** It is merged into the main doclist stored in p->doclist.aAll/nAll.
|
|
**
|
|
** This function assumes that pList points to a buffer allocated using
|
|
** sqlite3_malloc(). This function takes responsibility for eventually
|
|
** freeing the buffer.
|
|
**
|
|
** SQLITE_OK is returned if successful, or SQLITE_NOMEM if an error occurs.
|
|
*/
|
|
static int fts3EvalPhraseMergeToken(
|
|
Fts3Table *pTab, /* FTS Table pointer */
|
|
Fts3Phrase *p, /* Phrase to merge pList/nList into */
|
|
int iToken, /* Token pList/nList corresponds to */
|
|
char *pList, /* Pointer to doclist */
|
|
int nList /* Number of bytes in pList */
|
|
){
|
|
int rc = SQLITE_OK;
|
|
assert( iToken!=p->iDoclistToken );
|
|
|
|
if( pList==0 ){
|
|
sqlite3_free(p->doclist.aAll);
|
|
p->doclist.aAll = 0;
|
|
p->doclist.nAll = 0;
|
|
}
|
|
|
|
else if( p->iDoclistToken<0 ){
|
|
p->doclist.aAll = pList;
|
|
p->doclist.nAll = nList;
|
|
}
|
|
|
|
else if( p->doclist.aAll==0 ){
|
|
sqlite3_free(pList);
|
|
}
|
|
|
|
else {
|
|
char *pLeft;
|
|
char *pRight;
|
|
int nLeft;
|
|
int nRight;
|
|
int nDiff;
|
|
|
|
if( p->iDoclistToken<iToken ){
|
|
pLeft = p->doclist.aAll;
|
|
nLeft = p->doclist.nAll;
|
|
pRight = pList;
|
|
nRight = nList;
|
|
nDiff = iToken - p->iDoclistToken;
|
|
}else{
|
|
pRight = p->doclist.aAll;
|
|
nRight = p->doclist.nAll;
|
|
pLeft = pList;
|
|
nLeft = nList;
|
|
nDiff = p->iDoclistToken - iToken;
|
|
}
|
|
|
|
rc = fts3DoclistPhraseMerge(
|
|
pTab->bDescIdx, nDiff, pLeft, nLeft, &pRight, &nRight
|
|
);
|
|
sqlite3_free(pLeft);
|
|
p->doclist.aAll = pRight;
|
|
p->doclist.nAll = nRight;
|
|
}
|
|
|
|
if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist
|
|
** does not take deferred tokens into account.
|
|
**
|
|
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
|
|
*/
|
|
static int fts3EvalPhraseLoad(
|
|
Fts3Cursor *pCsr, /* FTS Cursor handle */
|
|
Fts3Phrase *p /* Phrase object */
|
|
){
|
|
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
|
|
int iToken;
|
|
int rc = SQLITE_OK;
|
|
|
|
for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){
|
|
Fts3PhraseToken *pToken = &p->aToken[iToken];
|
|
assert( pToken->pDeferred==0 || pToken->pSegcsr==0 );
|
|
|
|
if( pToken->pSegcsr ){
|
|
int nThis = 0;
|
|
char *pThis = 0;
|
|
rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis);
|
|
if( rc==SQLITE_OK ){
|
|
rc = fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
|
|
}
|
|
}
|
|
assert( pToken->pSegcsr==0 );
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
#ifndef SQLITE_DISABLE_FTS4_DEFERRED
|
|
/*
|
|
** This function is called on each phrase after the position lists for
|
|
** any deferred tokens have been loaded into memory. It updates the phrases
|
|
** current position list to include only those positions that are really
|
|
** instances of the phrase (after considering deferred tokens). If this
|
|
** means that the phrase does not appear in the current row, doclist.pList
|
|
** and doclist.nList are both zeroed.
|
|
**
|
|
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
|
|
*/
|
|
static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
|
|
int iToken; /* Used to iterate through phrase tokens */
|
|
char *aPoslist = 0; /* Position list for deferred tokens */
|
|
int nPoslist = 0; /* Number of bytes in aPoslist */
|
|
int iPrev = -1; /* Token number of previous deferred token */
|
|
char *aFree = (pPhrase->doclist.bFreeList ? pPhrase->doclist.pList : 0);
|
|
|
|
for(iToken=0; iToken<pPhrase->nToken; iToken++){
|
|
Fts3PhraseToken *pToken = &pPhrase->aToken[iToken];
|
|
Fts3DeferredToken *pDeferred = pToken->pDeferred;
|
|
|
|
if( pDeferred ){
|
|
char *pList;
|
|
int nList;
|
|
int rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList);
|
|
if( rc!=SQLITE_OK ) return rc;
|
|
|
|
if( pList==0 ){
|
|
sqlite3_free(aPoslist);
|
|
sqlite3_free(aFree);
|
|
pPhrase->doclist.pList = 0;
|
|
pPhrase->doclist.nList = 0;
|
|
return SQLITE_OK;
|
|
|
|
}else if( aPoslist==0 ){
|
|
aPoslist = pList;
|
|
nPoslist = nList;
|
|
|
|
}else{
|
|
char *aOut = pList;
|
|
char *p1 = aPoslist;
|
|
char *p2 = aOut;
|
|
|
|
assert( iPrev>=0 );
|
|
fts3PoslistPhraseMerge(&aOut, iToken-iPrev, 0, 1, &p1, &p2);
|
|
sqlite3_free(aPoslist);
|
|
aPoslist = pList;
|
|
nPoslist = (int)(aOut - aPoslist);
|
|
if( nPoslist==0 ){
|
|
sqlite3_free(aPoslist);
|
|
sqlite3_free(aFree);
|
|
pPhrase->doclist.pList = 0;
|
|
pPhrase->doclist.nList = 0;
|
|
return SQLITE_OK;
|
|
}
|
|
}
|
|
iPrev = iToken;
|
|
}
|
|
}
|
|
|
|
if( iPrev>=0 ){
|
|
int nMaxUndeferred = pPhrase->iDoclistToken;
|
|
if( nMaxUndeferred<0 ){
|
|
pPhrase->doclist.pList = aPoslist;
|
|
pPhrase->doclist.nList = nPoslist;
|
|
pPhrase->doclist.iDocid = pCsr->iPrevId;
|
|
pPhrase->doclist.bFreeList = 1;
|
|
}else{
|
|
int nDistance;
|
|
char *p1;
|
|
char *p2;
|
|
char *aOut;
|
|
|
|
if( nMaxUndeferred>iPrev ){
|
|
p1 = aPoslist;
|
|
p2 = pPhrase->doclist.pList;
|
|
nDistance = nMaxUndeferred - iPrev;
|
|
}else{
|
|
p1 = pPhrase->doclist.pList;
|
|
p2 = aPoslist;
|
|
nDistance = iPrev - nMaxUndeferred;
|
|
}
|
|
|
|
aOut = (char *)sqlite3Fts3MallocZero(nPoslist+FTS3_BUFFER_PADDING);
|
|
if( !aOut ){
|
|
sqlite3_free(aPoslist);
|
|
return SQLITE_NOMEM;
|
|
}
|
|
|
|
pPhrase->doclist.pList = aOut;
|
|
assert( p1 && p2 );
|
|
if( fts3PoslistPhraseMerge(&aOut, nDistance, 0, 1, &p1, &p2) ){
|
|
pPhrase->doclist.bFreeList = 1;
|
|
pPhrase->doclist.nList = (int)(aOut - pPhrase->doclist.pList);
|
|
}else{
|
|
sqlite3_free(aOut);
|
|
pPhrase->doclist.pList = 0;
|
|
pPhrase->doclist.nList = 0;
|
|
}
|
|
sqlite3_free(aPoslist);
|
|
}
|
|
}
|
|
|
|
if( pPhrase->doclist.pList!=aFree ) sqlite3_free(aFree);
|
|
return SQLITE_OK;
|
|
}
|
|
#endif /* SQLITE_DISABLE_FTS4_DEFERRED */
|
|
|
|
/*
|
|
** Maximum number of tokens a phrase may have to be considered for the
|
|
** incremental doclists strategy.
|
|
*/
|
|
#define MAX_INCR_PHRASE_TOKENS 4
|
|
|
|
/*
|
|
** This function is called for each Fts3Phrase in a full-text query
|
|
** expression to initialize the mechanism for returning rows. Once this
|
|
** function has been called successfully on an Fts3Phrase, it may be
|
|
** used with fts3EvalPhraseNext() to iterate through the matching docids.
|
|
**
|
|
** If parameter bOptOk is true, then the phrase may (or may not) use the
|
|
** incremental loading strategy. Otherwise, the entire doclist is loaded into
|
|
** memory within this call.
|
|
**
|
|
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
|
|
*/
|
|
static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
|
|
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
|
|
int rc = SQLITE_OK; /* Error code */
|
|
int i;
|
|
|
|
/* Determine if doclists may be loaded from disk incrementally. This is
|
|
** possible if the bOptOk argument is true, the FTS doclists will be
|
|
** scanned in forward order, and the phrase consists of
|
|
** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first"
|
|
** tokens or prefix tokens that cannot use a prefix-index. */
|
|
int bHaveIncr = 0;
|
|
int bIncrOk = (bOptOk
|
|
&& pCsr->bDesc==pTab->bDescIdx
|
|
&& p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0
|
|
#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
|
|
&& pTab->bNoIncrDoclist==0
|
|
#endif
|
|
);
|
|
for(i=0; bIncrOk==1 && i<p->nToken; i++){
|
|
Fts3PhraseToken *pToken = &p->aToken[i];
|
|
if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){
|
|
bIncrOk = 0;
|
|
}
|
|
if( pToken->pSegcsr ) bHaveIncr = 1;
|
|
}
|
|
|
|
if( bIncrOk && bHaveIncr ){
|
|
/* Use the incremental approach. */
|
|
int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
|
|
for(i=0; rc==SQLITE_OK && i<p->nToken; i++){
|
|
Fts3PhraseToken *pToken = &p->aToken[i];
|
|
Fts3MultiSegReader *pSegcsr = pToken->pSegcsr;
|
|
if( pSegcsr ){
|
|
rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n);
|
|
}
|
|
}
|
|
p->bIncr = 1;
|
|
}else{
|
|
/* Load the full doclist for the phrase into memory. */
|
|
rc = fts3EvalPhraseLoad(pCsr, p);
|
|
p->bIncr = 0;
|
|
}
|
|
|
|
assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr );
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** This function is used to iterate backwards (from the end to start)
|
|
** through doclists. It is used by this module to iterate through phrase
|
|
** doclists in reverse and by the fts3_write.c module to iterate through
|
|
** pending-terms lists when writing to databases with "order=desc".
|
|
**
|
|
** The doclist may be sorted in ascending (parameter bDescIdx==0) or
|
|
** descending (parameter bDescIdx==1) order of docid. Regardless, this
|
|
** function iterates from the end of the doclist to the beginning.
|
|
*/
|
|
void sqlite3Fts3DoclistPrev(
|
|
int bDescIdx, /* True if the doclist is desc */
|
|
char *aDoclist, /* Pointer to entire doclist */
|
|
int nDoclist, /* Length of aDoclist in bytes */
|
|
char **ppIter, /* IN/OUT: Iterator pointer */
|
|
sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */
|
|
int *pnList, /* OUT: List length pointer */
|
|
u8 *pbEof /* OUT: End-of-file flag */
|
|
){
|
|
char *p = *ppIter;
|
|
|
|
assert( nDoclist>0 );
|
|
assert( *pbEof==0 );
|
|
assert_fts3_nc( p || *piDocid==0 );
|
|
assert( !p || (p>aDoclist && p<&aDoclist[nDoclist]) );
|
|
|
|
if( p==0 ){
|
|
sqlite3_int64 iDocid = 0;
|
|
char *pNext = 0;
|
|
char *pDocid = aDoclist;
|
|
char *pEnd = &aDoclist[nDoclist];
|
|
int iMul = 1;
|
|
|
|
while( pDocid<pEnd ){
|
|
sqlite3_int64 iDelta;
|
|
pDocid += sqlite3Fts3GetVarint(pDocid, &iDelta);
|
|
iDocid += (iMul * iDelta);
|
|
pNext = pDocid;
|
|
fts3PoslistCopy(0, &pDocid);
|
|
while( pDocid<pEnd && *pDocid==0 ) pDocid++;
|
|
iMul = (bDescIdx ? -1 : 1);
|
|
}
|
|
|
|
*pnList = (int)(pEnd - pNext);
|
|
*ppIter = pNext;
|
|
*piDocid = iDocid;
|
|
}else{
|
|
int iMul = (bDescIdx ? -1 : 1);
|
|
sqlite3_int64 iDelta;
|
|
fts3GetReverseVarint(&p, aDoclist, &iDelta);
|
|
*piDocid -= (iMul * iDelta);
|
|
|
|
if( p==aDoclist ){
|
|
*pbEof = 1;
|
|
}else{
|
|
char *pSave = p;
|
|
fts3ReversePoslist(aDoclist, &p);
|
|
*pnList = (int)(pSave - p);
|
|
}
|
|
*ppIter = p;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Iterate forwards through a doclist.
|
|
*/
|
|
void sqlite3Fts3DoclistNext(
|
|
int bDescIdx, /* True if the doclist is desc */
|
|
char *aDoclist, /* Pointer to entire doclist */
|
|
int nDoclist, /* Length of aDoclist in bytes */
|
|
char **ppIter, /* IN/OUT: Iterator pointer */
|
|
sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */
|
|
u8 *pbEof /* OUT: End-of-file flag */
|
|
){
|
|
char *p = *ppIter;
|
|
|
|
assert( nDoclist>0 );
|
|
assert( *pbEof==0 );
|
|
assert_fts3_nc( p || *piDocid==0 );
|
|
assert( !p || (p>=aDoclist && p<=&aDoclist[nDoclist]) );
|
|
|
|
if( p==0 ){
|
|
p = aDoclist;
|
|
p += sqlite3Fts3GetVarint(p, piDocid);
|
|
}else{
|
|
fts3PoslistCopy(0, &p);
|
|
while( p<&aDoclist[nDoclist] && *p==0 ) p++;
|
|
if( p>=&aDoclist[nDoclist] ){
|
|
*pbEof = 1;
|
|
}else{
|
|
sqlite3_int64 iVar;
|
|
p += sqlite3Fts3GetVarint(p, &iVar);
|
|
*piDocid += ((bDescIdx ? -1 : 1) * iVar);
|
|
}
|
|
}
|
|
|
|
*ppIter = p;
|
|
}
|
|
|
|
/*
|
|
** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof
|
|
** to true if EOF is reached.
|
|
*/
|
|
static void fts3EvalDlPhraseNext(
|
|
Fts3Table *pTab,
|
|
Fts3Doclist *pDL,
|
|
u8 *pbEof
|
|
){
|
|
char *pIter; /* Used to iterate through aAll */
|
|
char *pEnd; /* 1 byte past end of aAll */
|
|
|
|
if( pDL->pNextDocid ){
|
|
pIter = pDL->pNextDocid;
|
|
assert( pDL->aAll!=0 || pIter==0 );
|
|
}else{
|
|
pIter = pDL->aAll;
|
|
}
|
|
|
|
if( pIter==0 || pIter>=(pEnd = pDL->aAll + pDL->nAll) ){
|
|
/* We have already reached the end of this doclist. EOF. */
|
|
*pbEof = 1;
|
|
}else{
|
|
sqlite3_int64 iDelta;
|
|
pIter += sqlite3Fts3GetVarint(pIter, &iDelta);
|
|
if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){
|
|
pDL->iDocid += iDelta;
|
|
}else{
|
|
pDL->iDocid -= iDelta;
|
|
}
|
|
pDL->pList = pIter;
|
|
fts3PoslistCopy(0, &pIter);
|
|
pDL->nList = (int)(pIter - pDL->pList);
|
|
|
|
/* pIter now points just past the 0x00 that terminates the position-
|
|
** list for document pDL->iDocid. However, if this position-list was
|
|
** edited in place by fts3EvalNearTrim(), then pIter may not actually
|
|
** point to the start of the next docid value. The following line deals
|
|
** with this case by advancing pIter past the zero-padding added by
|
|
** fts3EvalNearTrim(). */
|
|
while( pIter<pEnd && *pIter==0 ) pIter++;
|
|
|
|
pDL->pNextDocid = pIter;
|
|
assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter );
|
|
*pbEof = 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext().
|
|
*/
|
|
typedef struct TokenDoclist TokenDoclist;
|
|
struct TokenDoclist {
|
|
int bIgnore;
|
|
sqlite3_int64 iDocid;
|
|
char *pList;
|
|
int nList;
|
|
};
|
|
|
|
/*
|
|
** Token pToken is an incrementally loaded token that is part of a
|
|
** multi-token phrase. Advance it to the next matching document in the
|
|
** database and populate output variable *p with the details of the new
|
|
** entry. Or, if the iterator has reached EOF, set *pbEof to true.
|
|
**
|
|
** If an error occurs, return an SQLite error code. Otherwise, return
|
|
** SQLITE_OK.
|
|
*/
|
|
static int incrPhraseTokenNext(
|
|
Fts3Table *pTab, /* Virtual table handle */
|
|
Fts3Phrase *pPhrase, /* Phrase to advance token of */
|
|
int iToken, /* Specific token to advance */
|
|
TokenDoclist *p, /* OUT: Docid and doclist for new entry */
|
|
u8 *pbEof /* OUT: True if iterator is at EOF */
|
|
){
|
|
int rc = SQLITE_OK;
|
|
|
|
if( pPhrase->iDoclistToken==iToken ){
|
|
assert( p->bIgnore==0 );
|
|
assert( pPhrase->aToken[iToken].pSegcsr==0 );
|
|
fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof);
|
|
p->pList = pPhrase->doclist.pList;
|
|
p->nList = pPhrase->doclist.nList;
|
|
p->iDocid = pPhrase->doclist.iDocid;
|
|
}else{
|
|
Fts3PhraseToken *pToken = &pPhrase->aToken[iToken];
|
|
assert( pToken->pDeferred==0 );
|
|
assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 );
|
|
if( pToken->pSegcsr ){
|
|
assert( p->bIgnore==0 );
|
|
rc = sqlite3Fts3MsrIncrNext(
|
|
pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList
|
|
);
|
|
if( p->pList==0 ) *pbEof = 1;
|
|
}else{
|
|
p->bIgnore = 1;
|
|
}
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
/*
|
|
** The phrase iterator passed as the second argument:
|
|
**
|
|
** * features at least one token that uses an incremental doclist, and
|
|
**
|
|
** * does not contain any deferred tokens.
|
|
**
|
|
** Advance it to the next matching documnent in the database and populate
|
|
** the Fts3Doclist.pList and nList fields.
|
|
**
|
|
** If there is no "next" entry and no error occurs, then *pbEof is set to
|
|
** 1 before returning. Otherwise, if no error occurs and the iterator is
|
|
** successfully advanced, *pbEof is set to 0.
|
|
**
|
|
** If an error occurs, return an SQLite error code. Otherwise, return
|
|
** SQLITE_OK.
|
|
*/
|
|
static int fts3EvalIncrPhraseNext(
|
|
Fts3Cursor *pCsr, /* FTS Cursor handle */
|
|
Fts3Phrase *p, /* Phrase object to advance to next docid */
|
|
u8 *pbEof /* OUT: Set to 1 if EOF */
|
|
){
|
|
int rc = SQLITE_OK;
|
|
Fts3Doclist *pDL = &p->doclist;
|
|
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
|
|
u8 bEof = 0;
|
|
|
|
/* This is only called if it is guaranteed that the phrase has at least
|
|
** one incremental token. In which case the bIncr flag is set. */
|
|
assert( p->bIncr==1 );
|
|
|
|
if( p->nToken==1 ){
|
|
rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr,
|
|
&pDL->iDocid, &pDL->pList, &pDL->nList
|
|
);
|
|
if( pDL->pList==0 ) bEof = 1;
|
|
}else{
|
|
int bDescDoclist = pCsr->bDesc;
|
|
struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS];
|
|
|
|
memset(a, 0, sizeof(a));
|
|
assert( p->nToken<=MAX_INCR_PHRASE_TOKENS );
|
|
assert( p->iDoclistToken<MAX_INCR_PHRASE_TOKENS );
|
|
|
|
while( bEof==0 ){
|
|
int bMaxSet = 0;
|
|
sqlite3_int64 iMax = 0; /* Largest docid for all iterators */
|
|
int i; /* Used to iterate through tokens */
|
|
|
|
/* Advance the iterator for each token in the phrase once. */
|
|
for(i=0; rc==SQLITE_OK && i<p->nToken && bEof==0; i++){
|
|
rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof);
|
|
if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){
|
|
iMax = a[i].iDocid;
|
|
bMaxSet = 1;
|
|
}
|
|
}
|
|
assert( rc!=SQLITE_OK || (p->nToken>=1 && a[p->nToken-1].bIgnore==0) );
|
|
assert( rc!=SQLITE_OK || bMaxSet );
|
|
|
|
/* Keep advancing iterators until they all point to the same document */
|
|
for(i=0; i<p->nToken; i++){
|
|
while( rc==SQLITE_OK && bEof==0
|
|
&& a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0
|
|
){
|
|
rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof);
|
|
if( DOCID_CMP(a[i].iDocid, iMax)>0 ){
|
|
iMax = a[i].iDocid;
|
|
i = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Check if the current entries really are a phrase match */
|
|
if( bEof==0 ){
|
|
int nList = 0;
|
|
int nByte = a[p->nToken-1].nList;
|
|
char *aDoclist = sqlite3_malloc64((i64)nByte+FTS3_BUFFER_PADDING);
|
|
if( !aDoclist ) return SQLITE_NOMEM;
|
|
memcpy(aDoclist, a[p->nToken-1].pList, nByte+1);
|
|
memset(&aDoclist[nByte], 0, FTS3_BUFFER_PADDING);
|
|
|
|
for(i=0; i<(p->nToken-1); i++){
|
|
if( a[i].bIgnore==0 ){
|
|
char *pL = a[i].pList;
|
|
char *pR = aDoclist;
|
|
char *pOut = aDoclist;
|
|
int nDist = p->nToken-1-i;
|
|
int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR);
|
|
if( res==0 ) break;
|
|
nList = (int)(pOut - aDoclist);
|
|
}
|
|
}
|
|
if( i==(p->nToken-1) ){
|
|
pDL->iDocid = iMax;
|
|
pDL->pList = aDoclist;
|
|
pDL->nList = nList;
|
|
pDL->bFreeList = 1;
|
|
break;
|
|
}
|
|
sqlite3_free(aDoclist);
|
|
}
|
|
}
|
|
}
|
|
|
|
*pbEof = bEof;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Attempt to move the phrase iterator to point to the next matching docid.
|
|
** If an error occurs, return an SQLite error code. Otherwise, return
|
|
** SQLITE_OK.
|
|
**
|
|
** If there is no "next" entry and no error occurs, then *pbEof is set to
|
|
** 1 before returning. Otherwise, if no error occurs and the iterator is
|
|
** successfully advanced, *pbEof is set to 0.
|
|
*/
|
|
static int fts3EvalPhraseNext(
|
|
Fts3Cursor *pCsr, /* FTS Cursor handle */
|
|
Fts3Phrase *p, /* Phrase object to advance to next docid */
|
|
u8 *pbEof /* OUT: Set to 1 if EOF */
|
|
){
|
|
int rc = SQLITE_OK;
|
|
Fts3Doclist *pDL = &p->doclist;
|
|
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
|
|
|
|
if( p->bIncr ){
|
|
rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof);
|
|
}else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){
|
|
sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll,
|
|
&pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof
|
|
);
|
|
pDL->pList = pDL->pNextDocid;
|
|
}else{
|
|
fts3EvalDlPhraseNext(pTab, pDL, pbEof);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
**
|
|
** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
|
|
** Otherwise, fts3EvalPhraseStart() is called on all phrases within the
|
|
** expression. Also the Fts3Expr.bDeferred variable is set to true for any
|
|
** expressions for which all descendent tokens are deferred.
|
|
**
|
|
** If parameter bOptOk is zero, then it is guaranteed that the
|
|
** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for
|
|
** each phrase in the expression (subject to deferred token processing).
|
|
** Or, if bOptOk is non-zero, then one or more tokens within the expression
|
|
** may be loaded incrementally, meaning doclist.aAll/nAll is not available.
|
|
**
|
|
** If an error occurs within this function, *pRc is set to an SQLite error
|
|
** code before returning.
|
|
*/
|
|
static void fts3EvalStartReaders(
|
|
Fts3Cursor *pCsr, /* FTS Cursor handle */
|
|
Fts3Expr *pExpr, /* Expression to initialize phrases in */
|
|
int *pRc /* IN/OUT: Error code */
|
|
){
|
|
if( pExpr && SQLITE_OK==*pRc ){
|
|
if( pExpr->eType==FTSQUERY_PHRASE ){
|
|
int nToken = pExpr->pPhrase->nToken;
|
|
if( nToken ){
|
|
int i;
|
|
for(i=0; i<nToken; i++){
|
|
if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
|
|
}
|
|
pExpr->bDeferred = (i==nToken);
|
|
}
|
|
*pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase);
|
|
}else{
|
|
fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc);
|
|
fts3EvalStartReaders(pCsr, pExpr->pRight, pRc);
|
|
pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** An array of the following structures is assembled as part of the process
|
|
** of selecting tokens to defer before the query starts executing (as part
|
|
** of the xFilter() method). There is one element in the array for each
|
|
** token in the FTS expression.
|
|
**
|
|
** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong
|
|
** to phrases that are connected only by AND and NEAR operators (not OR or
|
|
** NOT). When determining tokens to defer, each AND/NEAR cluster is considered
|
|
** separately. The root of a tokens AND/NEAR cluster is stored in
|
|
** Fts3TokenAndCost.pRoot.
|
|
*/
|
|
typedef struct Fts3TokenAndCost Fts3TokenAndCost;
|
|
struct Fts3TokenAndCost {
|
|
Fts3Phrase *pPhrase; /* The phrase the token belongs to */
|
|
int iToken; /* Position of token in phrase */
|
|
Fts3PhraseToken *pToken; /* The token itself */
|
|
Fts3Expr *pRoot; /* Root of NEAR/AND cluster */
|
|
int nOvfl; /* Number of overflow pages to load doclist */
|
|
int iCol; /* The column the token must match */
|
|
};
|
|
|
|
/*
|
|
** This function is used to populate an allocated Fts3TokenAndCost array.
|
|
**
|
|
** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
|
|
** Otherwise, if an error occurs during execution, *pRc is set to an
|
|
** SQLite error code.
|
|
*/
|
|
static void fts3EvalTokenCosts(
|
|
Fts3Cursor *pCsr, /* FTS Cursor handle */
|
|
Fts3Expr *pRoot, /* Root of current AND/NEAR cluster */
|
|
Fts3Expr *pExpr, /* Expression to consider */
|
|
Fts3TokenAndCost **ppTC, /* Write new entries to *(*ppTC)++ */
|
|
Fts3Expr ***ppOr, /* Write new OR root to *(*ppOr)++ */
|
|
int *pRc /* IN/OUT: Error code */
|
|
){
|
|
if( *pRc==SQLITE_OK ){
|
|
if( pExpr->eType==FTSQUERY_PHRASE ){
|
|
Fts3Phrase *pPhrase = pExpr->pPhrase;
|
|
int i;
|
|
for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){
|
|
Fts3TokenAndCost *pTC = (*ppTC)++;
|
|
pTC->pPhrase = pPhrase;
|
|
pTC->iToken = i;
|
|
pTC->pRoot = pRoot;
|
|
pTC->pToken = &pPhrase->aToken[i];
|
|
pTC->iCol = pPhrase->iColumn;
|
|
*pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl);
|
|
}
|
|
}else if( pExpr->eType!=FTSQUERY_NOT ){
|
|
assert( pExpr->eType==FTSQUERY_OR
|
|
|| pExpr->eType==FTSQUERY_AND
|
|
|| pExpr->eType==FTSQUERY_NEAR
|
|
);
|
|
assert( pExpr->pLeft && pExpr->pRight );
|
|
if( pExpr->eType==FTSQUERY_OR ){
|
|
pRoot = pExpr->pLeft;
|
|
**ppOr = pRoot;
|
|
(*ppOr)++;
|
|
}
|
|
fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc);
|
|
if( pExpr->eType==FTSQUERY_OR ){
|
|
pRoot = pExpr->pRight;
|
|
**ppOr = pRoot;
|
|
(*ppOr)++;
|
|
}
|
|
fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Determine the average document (row) size in pages. If successful,
|
|
** write this value to *pnPage and return SQLITE_OK. Otherwise, return
|
|
** an SQLite error code.
|
|
**
|
|
** The average document size in pages is calculated by first calculating
|
|
** determining the average size in bytes, B. If B is less than the amount
|
|
** of data that will fit on a single leaf page of an intkey table in
|
|
** this database, then the average docsize is 1. Otherwise, it is 1 plus
|
|
** the number of overflow pages consumed by a record B bytes in size.
|
|
*/
|
|
static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
|
|
int rc = SQLITE_OK;
|
|
if( pCsr->nRowAvg==0 ){
|
|
/* The average document size, which is required to calculate the cost
|
|
** of each doclist, has not yet been determined. Read the required
|
|
** data from the %_stat table to calculate it.
|
|
**
|
|
** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3
|
|
** varints, where nCol is the number of columns in the FTS3 table.
|
|
** The first varint is the number of documents currently stored in
|
|
** the table. The following nCol varints contain the total amount of
|
|
** data stored in all rows of each column of the table, from left
|
|
** to right.
|
|
*/
|
|
Fts3Table *p = (Fts3Table*)pCsr->base.pVtab;
|
|
sqlite3_stmt *pStmt;
|
|
sqlite3_int64 nDoc = 0;
|
|
sqlite3_int64 nByte = 0;
|
|
const char *pEnd;
|
|
const char *a;
|
|
|
|
rc = sqlite3Fts3SelectDoctotal(p, &pStmt);
|
|
if( rc!=SQLITE_OK ) return rc;
|
|
a = sqlite3_column_blob(pStmt, 0);
|
|
testcase( a==0 ); /* If %_stat.value set to X'' */
|
|
if( a ){
|
|
pEnd = &a[sqlite3_column_bytes(pStmt, 0)];
|
|
a += sqlite3Fts3GetVarintBounded(a, pEnd, &nDoc);
|
|
while( a<pEnd ){
|
|
a += sqlite3Fts3GetVarintBounded(a, pEnd, &nByte);
|
|
}
|
|
}
|
|
if( nDoc==0 || nByte==0 ){
|
|
sqlite3_reset(pStmt);
|
|
return FTS_CORRUPT_VTAB;
|
|
}
|
|
|
|
pCsr->nDoc = nDoc;
|
|
pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz);
|
|
assert( pCsr->nRowAvg>0 );
|
|
rc = sqlite3_reset(pStmt);
|
|
}
|
|
|
|
*pnPage = pCsr->nRowAvg;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** This function is called to select the tokens (if any) that will be
|
|
** deferred. The array aTC[] has already been populated when this is
|
|
** called.
|
|
**
|
|
** This function is called once for each AND/NEAR cluster in the
|
|
** expression. Each invocation determines which tokens to defer within
|
|
** the cluster with root node pRoot. See comments above the definition
|
|
** of struct Fts3TokenAndCost for more details.
|
|
**
|
|
** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken()
|
|
** called on each token to defer. Otherwise, an SQLite error code is
|
|
** returned.
|
|
*/
|
|
static int fts3EvalSelectDeferred(
|
|
Fts3Cursor *pCsr, /* FTS Cursor handle */
|
|
Fts3Expr *pRoot, /* Consider tokens with this root node */
|
|
Fts3TokenAndCost *aTC, /* Array of expression tokens and costs */
|
|
int nTC /* Number of entries in aTC[] */
|
|
){
|
|
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
|
|
int nDocSize = 0; /* Number of pages per doc loaded */
|
|
int rc = SQLITE_OK; /* Return code */
|
|
int ii; /* Iterator variable for various purposes */
|
|
int nOvfl = 0; /* Total overflow pages used by doclists */
|
|
int nToken = 0; /* Total number of tokens in cluster */
|
|
|
|
int nMinEst = 0; /* The minimum count for any phrase so far. */
|
|
int nLoad4 = 1; /* (Phrases that will be loaded)^4. */
|
|
|
|
/* Tokens are never deferred for FTS tables created using the content=xxx
|
|
** option. The reason being that it is not guaranteed that the content
|
|
** table actually contains the same data as the index. To prevent this from
|
|
** causing any problems, the deferred token optimization is completely
|
|
** disabled for content=xxx tables. */
|
|
if( pTab->zContentTbl ){
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/* Count the tokens in this AND/NEAR cluster. If none of the doclists
|
|
** associated with the tokens spill onto overflow pages, or if there is
|
|
** only 1 token, exit early. No tokens to defer in this case. */
|
|
for(ii=0; ii<nTC; ii++){
|
|
if( aTC[ii].pRoot==pRoot ){
|
|
nOvfl += aTC[ii].nOvfl;
|
|
nToken++;
|
|
}
|
|
}
|
|
if( nOvfl==0 || nToken<2 ) return SQLITE_OK;
|
|
|
|
/* Obtain the average docsize (in pages). */
|
|
rc = fts3EvalAverageDocsize(pCsr, &nDocSize);
|
|
assert( rc!=SQLITE_OK || nDocSize>0 );
|
|
|
|
|
|
/* Iterate through all tokens in this AND/NEAR cluster, in ascending order
|
|
** of the number of overflow pages that will be loaded by the pager layer
|
|
** to retrieve the entire doclist for the token from the full-text index.
|
|
** Load the doclists for tokens that are either:
|
|
**
|
|
** a. The cheapest token in the entire query (i.e. the one visited by the
|
|
** first iteration of this loop), or
|
|
**
|
|
** b. Part of a multi-token phrase.
|
|
**
|
|
** After each token doclist is loaded, merge it with the others from the
|
|
** same phrase and count the number of documents that the merged doclist
|
|
** contains. Set variable "nMinEst" to the smallest number of documents in
|
|
** any phrase doclist for which 1 or more token doclists have been loaded.
|
|
** Let nOther be the number of other phrases for which it is certain that
|
|
** one or more tokens will not be deferred.
|
|
**
|
|
** Then, for each token, defer it if loading the doclist would result in
|
|
** loading N or more overflow pages into memory, where N is computed as:
|
|
**
|
|
** (nMinEst + 4^nOther - 1) / (4^nOther)
|
|
*/
|
|
for(ii=0; ii<nToken && rc==SQLITE_OK; ii++){
|
|
int iTC; /* Used to iterate through aTC[] array. */
|
|
Fts3TokenAndCost *pTC = 0; /* Set to cheapest remaining token. */
|
|
|
|
/* Set pTC to point to the cheapest remaining token. */
|
|
for(iTC=0; iTC<nTC; iTC++){
|
|
if( aTC[iTC].pToken && aTC[iTC].pRoot==pRoot
|
|
&& (!pTC || aTC[iTC].nOvfl<pTC->nOvfl)
|
|
){
|
|
pTC = &aTC[iTC];
|
|
}
|
|
}
|
|
assert( pTC );
|
|
|
|
if( ii && pTC->nOvfl>=((nMinEst+(nLoad4/4)-1)/(nLoad4/4))*nDocSize ){
|
|
/* The number of overflow pages to load for this (and therefore all
|
|
** subsequent) tokens is greater than the estimated number of pages
|
|
** that will be loaded if all subsequent tokens are deferred.
|
|
*/
|
|
Fts3PhraseToken *pToken = pTC->pToken;
|
|
rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
|
|
fts3SegReaderCursorFree(pToken->pSegcsr);
|
|
pToken->pSegcsr = 0;
|
|
}else{
|
|
/* Set nLoad4 to the value of (4^nOther) for the next iteration of the
|
|
** for-loop. Except, limit the value to 2^24 to prevent it from
|
|
** overflowing the 32-bit integer it is stored in. */
|
|
if( ii<12 ) nLoad4 = nLoad4*4;
|
|
|
|
if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){
|
|
/* Either this is the cheapest token in the entire query, or it is
|
|
** part of a multi-token phrase. Either way, the entire doclist will
|
|
** (eventually) be loaded into memory. It may as well be now. */
|
|
Fts3PhraseToken *pToken = pTC->pToken;
|
|
int nList = 0;
|
|
char *pList = 0;
|
|
rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
|
|
assert( rc==SQLITE_OK || pList==0 );
|
|
if( rc==SQLITE_OK ){
|
|
rc = fts3EvalPhraseMergeToken(
|
|
pTab, pTC->pPhrase, pTC->iToken,pList,nList
|
|
);
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
int nCount;
|
|
nCount = fts3DoclistCountDocids(
|
|
pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll
|
|
);
|
|
if( ii==0 || nCount<nMinEst ) nMinEst = nCount;
|
|
}
|
|
}
|
|
}
|
|
pTC->pToken = 0;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** This function is called from within the xFilter method. It initializes
|
|
** the full-text query currently stored in pCsr->pExpr. To iterate through
|
|
** the results of a query, the caller does:
|
|
**
|
|
** fts3EvalStart(pCsr);
|
|
** while( 1 ){
|
|
** fts3EvalNext(pCsr);
|
|
** if( pCsr->bEof ) break;
|
|
** ... return row pCsr->iPrevId to the caller ...
|
|
** }
|
|
*/
|
|
static int fts3EvalStart(Fts3Cursor *pCsr){
|
|
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
|
|
int rc = SQLITE_OK;
|
|
int nToken = 0;
|
|
int nOr = 0;
|
|
|
|
/* Allocate a MultiSegReader for each token in the expression. */
|
|
fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc);
|
|
|
|
/* Determine which, if any, tokens in the expression should be deferred. */
|
|
#ifndef SQLITE_DISABLE_FTS4_DEFERRED
|
|
if( rc==SQLITE_OK && nToken>1 && pTab->bFts4 ){
|
|
Fts3TokenAndCost *aTC;
|
|
aTC = (Fts3TokenAndCost *)sqlite3_malloc64(
|
|
sizeof(Fts3TokenAndCost) * nToken
|
|
+ sizeof(Fts3Expr *) * nOr * 2
|
|
);
|
|
|
|
if( !aTC ){
|
|
rc = SQLITE_NOMEM;
|
|
}else{
|
|
Fts3Expr **apOr = (Fts3Expr **)&aTC[nToken];
|
|
int ii;
|
|
Fts3TokenAndCost *pTC = aTC;
|
|
Fts3Expr **ppOr = apOr;
|
|
|
|
fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc);
|
|
nToken = (int)(pTC-aTC);
|
|
nOr = (int)(ppOr-apOr);
|
|
|
|
if( rc==SQLITE_OK ){
|
|
rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken);
|
|
for(ii=0; rc==SQLITE_OK && ii<nOr; ii++){
|
|
rc = fts3EvalSelectDeferred(pCsr, apOr[ii], aTC, nToken);
|
|
}
|
|
}
|
|
|
|
sqlite3_free(aTC);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc);
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Invalidate the current position list for phrase pPhrase.
|
|
*/
|
|
static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){
|
|
if( pPhrase->doclist.bFreeList ){
|
|
sqlite3_free(pPhrase->doclist.pList);
|
|
}
|
|
pPhrase->doclist.pList = 0;
|
|
pPhrase->doclist.nList = 0;
|
|
pPhrase->doclist.bFreeList = 0;
|
|
}
|
|
|
|
/*
|
|
** This function is called to edit the position list associated with
|
|
** the phrase object passed as the fifth argument according to a NEAR
|
|
** condition. For example:
|
|
**
|
|
** abc NEAR/5 "def ghi"
|
|
**
|
|
** Parameter nNear is passed the NEAR distance of the expression (5 in
|
|
** the example above). When this function is called, *paPoslist points to
|
|
** the position list, and *pnToken is the number of phrase tokens in the
|
|
** phrase on the other side of the NEAR operator to pPhrase. For example,
|
|
** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to
|
|
** the position list associated with phrase "abc".
|
|
**
|
|
** All positions in the pPhrase position list that are not sufficiently
|
|
** close to a position in the *paPoslist position list are removed. If this
|
|
** leaves 0 positions, zero is returned. Otherwise, non-zero.
|
|
**
|
|
** Before returning, *paPoslist is set to point to the position lsit
|
|
** associated with pPhrase. And *pnToken is set to the number of tokens in
|
|
** pPhrase.
|
|
*/
|
|
static int fts3EvalNearTrim(
|
|
int nNear, /* NEAR distance. As in "NEAR/nNear". */
|
|
char *aTmp, /* Temporary space to use */
|
|
char **paPoslist, /* IN/OUT: Position list */
|
|
int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */
|
|
Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */
|
|
){
|
|
int nParam1 = nNear + pPhrase->nToken;
|
|
int nParam2 = nNear + *pnToken;
|
|
int nNew;
|
|
char *p2;
|
|
char *pOut;
|
|
int res;
|
|
|
|
assert( pPhrase->doclist.pList );
|
|
|
|
p2 = pOut = pPhrase->doclist.pList;
|
|
res = fts3PoslistNearMerge(
|
|
&pOut, aTmp, nParam1, nParam2, paPoslist, &p2
|
|
);
|
|
if( res ){
|
|
nNew = (int)(pOut - pPhrase->doclist.pList) - 1;
|
|
assert_fts3_nc( nNew<=pPhrase->doclist.nList && nNew>0 );
|
|
if( nNew>=0 && nNew<=pPhrase->doclist.nList ){
|
|
assert( pPhrase->doclist.pList[nNew]=='\0' );
|
|
memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew);
|
|
pPhrase->doclist.nList = nNew;
|
|
}
|
|
*paPoslist = pPhrase->doclist.pList;
|
|
*pnToken = pPhrase->nToken;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/*
|
|
** This function is a no-op if *pRc is other than SQLITE_OK when it is called.
|
|
** Otherwise, it advances the expression passed as the second argument to
|
|
** point to the next matching row in the database. Expressions iterate through
|
|
** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero,
|
|
** or descending if it is non-zero.
|
|
**
|
|
** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if
|
|
** successful, the following variables in pExpr are set:
|
|
**
|
|
** Fts3Expr.bEof (non-zero if EOF - there is no next row)
|
|
** Fts3Expr.iDocid (valid if bEof==0. The docid of the next row)
|
|
**
|
|
** If the expression is of type FTSQUERY_PHRASE, and the expression is not
|
|
** at EOF, then the following variables are populated with the position list
|
|
** for the phrase for the visited row:
|
|
**
|
|
** FTs3Expr.pPhrase->doclist.nList (length of pList in bytes)
|
|
** FTs3Expr.pPhrase->doclist.pList (pointer to position list)
|
|
**
|
|
** It says above that this function advances the expression to the next
|
|
** matching row. This is usually true, but there are the following exceptions:
|
|
**
|
|
** 1. Deferred tokens are not taken into account. If a phrase consists
|
|
** entirely of deferred tokens, it is assumed to match every row in
|
|
** the db. In this case the position-list is not populated at all.
|
|
**
|
|
** Or, if a phrase contains one or more deferred tokens and one or
|
|
** more non-deferred tokens, then the expression is advanced to the
|
|
** next possible match, considering only non-deferred tokens. In other
|
|
** words, if the phrase is "A B C", and "B" is deferred, the expression
|
|
** is advanced to the next row that contains an instance of "A * C",
|
|
** where "*" may match any single token. The position list in this case
|
|
** is populated as for "A * C" before returning.
|
|
**
|
|
** 2. NEAR is treated as AND. If the expression is "x NEAR y", it is
|
|
** advanced to point to the next row that matches "x AND y".
|
|
**
|
|
** See sqlite3Fts3EvalTestDeferred() for details on testing if a row is
|
|
** really a match, taking into account deferred tokens and NEAR operators.
|
|
*/
|
|
static void fts3EvalNextRow(
|
|
Fts3Cursor *pCsr, /* FTS Cursor handle */
|
|
Fts3Expr *pExpr, /* Expr. to advance to next matching row */
|
|
int *pRc /* IN/OUT: Error code */
|
|
){
|
|
if( *pRc==SQLITE_OK && pExpr->bEof==0 ){
|
|
int bDescDoclist = pCsr->bDesc; /* Used by DOCID_CMP() macro */
|
|
pExpr->bStart = 1;
|
|
|
|
switch( pExpr->eType ){
|
|
case FTSQUERY_NEAR:
|
|
case FTSQUERY_AND: {
|
|
Fts3Expr *pLeft = pExpr->pLeft;
|
|
Fts3Expr *pRight = pExpr->pRight;
|
|
assert( !pLeft->bDeferred || !pRight->bDeferred );
|
|
|
|
if( pLeft->bDeferred ){
|
|
/* LHS is entirely deferred. So we assume it matches every row.
|
|
** Advance the RHS iterator to find the next row visited. */
|
|
fts3EvalNextRow(pCsr, pRight, pRc);
|
|
pExpr->iDocid = pRight->iDocid;
|
|
pExpr->bEof = pRight->bEof;
|
|
}else if( pRight->bDeferred ){
|
|
/* RHS is entirely deferred. So we assume it matches every row.
|
|
** Advance the LHS iterator to find the next row visited. */
|
|
fts3EvalNextRow(pCsr, pLeft, pRc);
|
|
pExpr->iDocid = pLeft->iDocid;
|
|
pExpr->bEof = pLeft->bEof;
|
|
}else{
|
|
/* Neither the RHS or LHS are deferred. */
|
|
fts3EvalNextRow(pCsr, pLeft, pRc);
|
|
fts3EvalNextRow(pCsr, pRight, pRc);
|
|
while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){
|
|
sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
|
|
if( iDiff==0 ) break;
|
|
if( iDiff<0 ){
|
|
fts3EvalNextRow(pCsr, pLeft, pRc);
|
|
}else{
|
|
fts3EvalNextRow(pCsr, pRight, pRc);
|
|
}
|
|
}
|
|
pExpr->iDocid = pLeft->iDocid;
|
|
pExpr->bEof = (pLeft->bEof || pRight->bEof);
|
|
if( pExpr->eType==FTSQUERY_NEAR && pExpr->bEof ){
|
|
assert( pRight->eType==FTSQUERY_PHRASE );
|
|
if( pRight->pPhrase->doclist.aAll ){
|
|
Fts3Doclist *pDl = &pRight->pPhrase->doclist;
|
|
while( *pRc==SQLITE_OK && pRight->bEof==0 ){
|
|
memset(pDl->pList, 0, pDl->nList);
|
|
fts3EvalNextRow(pCsr, pRight, pRc);
|
|
}
|
|
}
|
|
if( pLeft->pPhrase && pLeft->pPhrase->doclist.aAll ){
|
|
Fts3Doclist *pDl = &pLeft->pPhrase->doclist;
|
|
while( *pRc==SQLITE_OK && pLeft->bEof==0 ){
|
|
memset(pDl->pList, 0, pDl->nList);
|
|
fts3EvalNextRow(pCsr, pLeft, pRc);
|
|
}
|
|
}
|
|
pRight->bEof = pLeft->bEof = 1;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
case FTSQUERY_OR: {
|
|
Fts3Expr *pLeft = pExpr->pLeft;
|
|
Fts3Expr *pRight = pExpr->pRight;
|
|
sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
|
|
|
|
assert_fts3_nc( pLeft->bStart || pLeft->iDocid==pRight->iDocid );
|
|
assert_fts3_nc( pRight->bStart || pLeft->iDocid==pRight->iDocid );
|
|
|
|
if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){
|
|
fts3EvalNextRow(pCsr, pLeft, pRc);
|
|
}else if( pLeft->bEof || iCmp>0 ){
|
|
fts3EvalNextRow(pCsr, pRight, pRc);
|
|
}else{
|
|
fts3EvalNextRow(pCsr, pLeft, pRc);
|
|
fts3EvalNextRow(pCsr, pRight, pRc);
|
|
}
|
|
|
|
pExpr->bEof = (pLeft->bEof && pRight->bEof);
|
|
iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
|
|
if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){
|
|
pExpr->iDocid = pLeft->iDocid;
|
|
}else{
|
|
pExpr->iDocid = pRight->iDocid;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case FTSQUERY_NOT: {
|
|
Fts3Expr *pLeft = pExpr->pLeft;
|
|
Fts3Expr *pRight = pExpr->pRight;
|
|
|
|
if( pRight->bStart==0 ){
|
|
fts3EvalNextRow(pCsr, pRight, pRc);
|
|
assert( *pRc!=SQLITE_OK || pRight->bStart );
|
|
}
|
|
|
|
fts3EvalNextRow(pCsr, pLeft, pRc);
|
|
if( pLeft->bEof==0 ){
|
|
while( !*pRc
|
|
&& !pRight->bEof
|
|
&& DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0
|
|
){
|
|
fts3EvalNextRow(pCsr, pRight, pRc);
|
|
}
|
|
}
|
|
pExpr->iDocid = pLeft->iDocid;
|
|
pExpr->bEof = pLeft->bEof;
|
|
break;
|
|
}
|
|
|
|
default: {
|
|
Fts3Phrase *pPhrase = pExpr->pPhrase;
|
|
fts3EvalInvalidatePoslist(pPhrase);
|
|
*pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof);
|
|
pExpr->iDocid = pPhrase->doclist.iDocid;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR
|
|
** cluster, then this function returns 1 immediately.
|
|
**
|
|
** Otherwise, it checks if the current row really does match the NEAR
|
|
** expression, using the data currently stored in the position lists
|
|
** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression.
|
|
**
|
|
** If the current row is a match, the position list associated with each
|
|
** phrase in the NEAR expression is edited in place to contain only those
|
|
** phrase instances sufficiently close to their peers to satisfy all NEAR
|
|
** constraints. In this case it returns 1. If the NEAR expression does not
|
|
** match the current row, 0 is returned. The position lists may or may not
|
|
** be edited if 0 is returned.
|
|
*/
|
|
static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
|
|
int res = 1;
|
|
|
|
/* The following block runs if pExpr is the root of a NEAR query.
|
|
** For example, the query:
|
|
**
|
|
** "w" NEAR "x" NEAR "y" NEAR "z"
|
|
**
|
|
** which is represented in tree form as:
|
|
**
|
|
** |
|
|
** +--NEAR--+ <-- root of NEAR query
|
|
** | |
|
|
** +--NEAR--+ "z"
|
|
** | |
|
|
** +--NEAR--+ "y"
|
|
** | |
|
|
** "w" "x"
|
|
**
|
|
** The right-hand child of a NEAR node is always a phrase. The
|
|
** left-hand child may be either a phrase or a NEAR node. There are
|
|
** no exceptions to this - it's the way the parser in fts3_expr.c works.
|
|
*/
|
|
if( *pRc==SQLITE_OK
|
|
&& pExpr->eType==FTSQUERY_NEAR
|
|
&& (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
|
|
){
|
|
Fts3Expr *p;
|
|
sqlite3_int64 nTmp = 0; /* Bytes of temp space */
|
|
char *aTmp; /* Temp space for PoslistNearMerge() */
|
|
|
|
/* Allocate temporary working space. */
|
|
for(p=pExpr; p->pLeft; p=p->pLeft){
|
|
assert( p->pRight->pPhrase->doclist.nList>0 );
|
|
nTmp += p->pRight->pPhrase->doclist.nList;
|
|
}
|
|
nTmp += p->pPhrase->doclist.nList;
|
|
aTmp = sqlite3_malloc64(nTmp*2);
|
|
if( !aTmp ){
|
|
*pRc = SQLITE_NOMEM;
|
|
res = 0;
|
|
}else{
|
|
char *aPoslist = p->pPhrase->doclist.pList;
|
|
int nToken = p->pPhrase->nToken;
|
|
|
|
for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
|
|
Fts3Phrase *pPhrase = p->pRight->pPhrase;
|
|
int nNear = p->nNear;
|
|
res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
|
|
}
|
|
|
|
aPoslist = pExpr->pRight->pPhrase->doclist.pList;
|
|
nToken = pExpr->pRight->pPhrase->nToken;
|
|
for(p=pExpr->pLeft; p && res; p=p->pLeft){
|
|
int nNear;
|
|
Fts3Phrase *pPhrase;
|
|
assert( p->pParent && p->pParent->pLeft==p );
|
|
nNear = p->pParent->nNear;
|
|
pPhrase = (
|
|
p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
|
|
);
|
|
res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
|
|
}
|
|
}
|
|
|
|
sqlite3_free(aTmp);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/*
|
|
** This function is a helper function for sqlite3Fts3EvalTestDeferred().
|
|
** Assuming no error occurs or has occurred, It returns non-zero if the
|
|
** expression passed as the second argument matches the row that pCsr
|
|
** currently points to, or zero if it does not.
|
|
**
|
|
** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
|
|
** If an error occurs during execution of this function, *pRc is set to
|
|
** the appropriate SQLite error code. In this case the returned value is
|
|
** undefined.
|
|
*/
|
|
static int fts3EvalTestExpr(
|
|
Fts3Cursor *pCsr, /* FTS cursor handle */
|
|
Fts3Expr *pExpr, /* Expr to test. May or may not be root. */
|
|
int *pRc /* IN/OUT: Error code */
|
|
){
|
|
int bHit = 1; /* Return value */
|
|
if( *pRc==SQLITE_OK ){
|
|
switch( pExpr->eType ){
|
|
case FTSQUERY_NEAR:
|
|
case FTSQUERY_AND:
|
|
bHit = (
|
|
fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
|
|
&& fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
|
|
&& fts3EvalNearTest(pExpr, pRc)
|
|
);
|
|
|
|
/* If the NEAR expression does not match any rows, zero the doclist for
|
|
** all phrases involved in the NEAR. This is because the snippet(),
|
|
** offsets() and matchinfo() functions are not supposed to recognize
|
|
** any instances of phrases that are part of unmatched NEAR queries.
|
|
** For example if this expression:
|
|
**
|
|
** ... MATCH 'a OR (b NEAR c)'
|
|
**
|
|
** is matched against a row containing:
|
|
**
|
|
** 'a b d e'
|
|
**
|
|
** then any snippet() should ony highlight the "a" term, not the "b"
|
|
** (as "b" is part of a non-matching NEAR clause).
|
|
*/
|
|
if( bHit==0
|
|
&& pExpr->eType==FTSQUERY_NEAR
|
|
&& (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
|
|
){
|
|
Fts3Expr *p;
|
|
for(p=pExpr; p->pPhrase==0; p=p->pLeft){
|
|
if( p->pRight->iDocid==pCsr->iPrevId ){
|
|
fts3EvalInvalidatePoslist(p->pRight->pPhrase);
|
|
}
|
|
}
|
|
if( p->iDocid==pCsr->iPrevId ){
|
|
fts3EvalInvalidatePoslist(p->pPhrase);
|
|
}
|
|
}
|
|
|
|
break;
|
|
|
|
case FTSQUERY_OR: {
|
|
int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc);
|
|
int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc);
|
|
bHit = bHit1 || bHit2;
|
|
break;
|
|
}
|
|
|
|
case FTSQUERY_NOT:
|
|
bHit = (
|
|
fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
|
|
&& !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
|
|
);
|
|
break;
|
|
|
|
default: {
|
|
#ifndef SQLITE_DISABLE_FTS4_DEFERRED
|
|
if( pCsr->pDeferred && (pExpr->bDeferred || (
|
|
pExpr->iDocid==pCsr->iPrevId && pExpr->pPhrase->doclist.pList
|
|
))){
|
|
Fts3Phrase *pPhrase = pExpr->pPhrase;
|
|
if( pExpr->bDeferred ){
|
|
fts3EvalInvalidatePoslist(pPhrase);
|
|
}
|
|
*pRc = fts3EvalDeferredPhrase(pCsr, pPhrase);
|
|
bHit = (pPhrase->doclist.pList!=0);
|
|
pExpr->iDocid = pCsr->iPrevId;
|
|
}else
|
|
#endif
|
|
{
|
|
bHit = (
|
|
pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId
|
|
&& pExpr->pPhrase->doclist.nList>0
|
|
);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return bHit;
|
|
}
|
|
|
|
/*
|
|
** This function is called as the second part of each xNext operation when
|
|
** iterating through the results of a full-text query. At this point the
|
|
** cursor points to a row that matches the query expression, with the
|
|
** following caveats:
|
|
**
|
|
** * Up until this point, "NEAR" operators in the expression have been
|
|
** treated as "AND".
|
|
**
|
|
** * Deferred tokens have not yet been considered.
|
|
**
|
|
** If *pRc is not SQLITE_OK when this function is called, it immediately
|
|
** returns 0. Otherwise, it tests whether or not after considering NEAR
|
|
** operators and deferred tokens the current row is still a match for the
|
|
** expression. It returns 1 if both of the following are true:
|
|
**
|
|
** 1. *pRc is SQLITE_OK when this function returns, and
|
|
**
|
|
** 2. After scanning the current FTS table row for the deferred tokens,
|
|
** it is determined that the row does *not* match the query.
|
|
**
|
|
** Or, if no error occurs and it seems the current row does match the FTS
|
|
** query, return 0.
|
|
*/
|
|
int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc){
|
|
int rc = *pRc;
|
|
int bMiss = 0;
|
|
if( rc==SQLITE_OK ){
|
|
|
|
/* If there are one or more deferred tokens, load the current row into
|
|
** memory and scan it to determine the position list for each deferred
|
|
** token. Then, see if this row is really a match, considering deferred
|
|
** tokens and NEAR operators (neither of which were taken into account
|
|
** earlier, by fts3EvalNextRow()).
|
|
*/
|
|
if( pCsr->pDeferred ){
|
|
rc = fts3CursorSeek(0, pCsr);
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3Fts3CacheDeferredDoclists(pCsr);
|
|
}
|
|
}
|
|
bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc));
|
|
|
|
/* Free the position-lists accumulated for each deferred token above. */
|
|
sqlite3Fts3FreeDeferredDoclists(pCsr);
|
|
*pRc = rc;
|
|
}
|
|
return (rc==SQLITE_OK && bMiss);
|
|
}
|
|
|
|
/*
|
|
** Advance to the next document that matches the FTS expression in
|
|
** Fts3Cursor.pExpr.
|
|
*/
|
|
static int fts3EvalNext(Fts3Cursor *pCsr){
|
|
int rc = SQLITE_OK; /* Return Code */
|
|
Fts3Expr *pExpr = pCsr->pExpr;
|
|
assert( pCsr->isEof==0 );
|
|
if( pExpr==0 ){
|
|
pCsr->isEof = 1;
|
|
}else{
|
|
do {
|
|
if( pCsr->isRequireSeek==0 ){
|
|
sqlite3_reset(pCsr->pStmt);
|
|
}
|
|
assert( sqlite3_data_count(pCsr->pStmt)==0 );
|
|
fts3EvalNextRow(pCsr, pExpr, &rc);
|
|
pCsr->isEof = pExpr->bEof;
|
|
pCsr->isRequireSeek = 1;
|
|
pCsr->isMatchinfoNeeded = 1;
|
|
pCsr->iPrevId = pExpr->iDocid;
|
|
}while( pCsr->isEof==0 && sqlite3Fts3EvalTestDeferred(pCsr, &rc) );
|
|
}
|
|
|
|
/* Check if the cursor is past the end of the docid range specified
|
|
** by Fts3Cursor.iMinDocid/iMaxDocid. If so, set the EOF flag. */
|
|
if( rc==SQLITE_OK && (
|
|
(pCsr->bDesc==0 && pCsr->iPrevId>pCsr->iMaxDocid)
|
|
|| (pCsr->bDesc!=0 && pCsr->iPrevId<pCsr->iMinDocid)
|
|
)){
|
|
pCsr->isEof = 1;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Restart interation for expression pExpr so that the next call to
|
|
** fts3EvalNext() visits the first row. Do not allow incremental
|
|
** loading or merging of phrase doclists for this iteration.
|
|
**
|
|
** If *pRc is other than SQLITE_OK when this function is called, it is
|
|
** a no-op. If an error occurs within this function, *pRc is set to an
|
|
** SQLite error code before returning.
|
|
*/
|
|
static void fts3EvalRestart(
|
|
Fts3Cursor *pCsr,
|
|
Fts3Expr *pExpr,
|
|
int *pRc
|
|
){
|
|
if( pExpr && *pRc==SQLITE_OK ){
|
|
Fts3Phrase *pPhrase = pExpr->pPhrase;
|
|
|
|
if( pPhrase ){
|
|
fts3EvalInvalidatePoslist(pPhrase);
|
|
if( pPhrase->bIncr ){
|
|
int i;
|
|
for(i=0; i<pPhrase->nToken; i++){
|
|
Fts3PhraseToken *pToken = &pPhrase->aToken[i];
|
|
assert( pToken->pDeferred==0 );
|
|
if( pToken->pSegcsr ){
|
|
sqlite3Fts3MsrIncrRestart(pToken->pSegcsr);
|
|
}
|
|
}
|
|
*pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
|
|
}
|
|
pPhrase->doclist.pNextDocid = 0;
|
|
pPhrase->doclist.iDocid = 0;
|
|
pPhrase->pOrPoslist = 0;
|
|
}
|
|
|
|
pExpr->iDocid = 0;
|
|
pExpr->bEof = 0;
|
|
pExpr->bStart = 0;
|
|
|
|
fts3EvalRestart(pCsr, pExpr->pLeft, pRc);
|
|
fts3EvalRestart(pCsr, pExpr->pRight, pRc);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** After allocating the Fts3Expr.aMI[] array for each phrase in the
|
|
** expression rooted at pExpr, the cursor iterates through all rows matched
|
|
** by pExpr, calling this function for each row. This function increments
|
|
** the values in Fts3Expr.aMI[] according to the position-list currently
|
|
** found in Fts3Expr.pPhrase->doclist.pList for each of the phrase
|
|
** expression nodes.
|
|
*/
|
|
static void fts3EvalUpdateCounts(Fts3Expr *pExpr, int nCol){
|
|
if( pExpr ){
|
|
Fts3Phrase *pPhrase = pExpr->pPhrase;
|
|
if( pPhrase && pPhrase->doclist.pList ){
|
|
int iCol = 0;
|
|
char *p = pPhrase->doclist.pList;
|
|
|
|
do{
|
|
u8 c = 0;
|
|
int iCnt = 0;
|
|
while( 0xFE & (*p | c) ){
|
|
if( (c&0x80)==0 ) iCnt++;
|
|
c = *p++ & 0x80;
|
|
}
|
|
|
|
/* aMI[iCol*3 + 1] = Number of occurrences
|
|
** aMI[iCol*3 + 2] = Number of rows containing at least one instance
|
|
*/
|
|
pExpr->aMI[iCol*3 + 1] += iCnt;
|
|
pExpr->aMI[iCol*3 + 2] += (iCnt>0);
|
|
if( *p==0x00 ) break;
|
|
p++;
|
|
p += fts3GetVarint32(p, &iCol);
|
|
}while( iCol<nCol );
|
|
}
|
|
|
|
fts3EvalUpdateCounts(pExpr->pLeft, nCol);
|
|
fts3EvalUpdateCounts(pExpr->pRight, nCol);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** This is an sqlite3Fts3ExprIterate() callback. If the Fts3Expr.aMI[] array
|
|
** has not yet been allocated, allocate and zero it. Otherwise, just zero
|
|
** it.
|
|
*/
|
|
static int fts3AllocateMSI(Fts3Expr *pExpr, int iPhrase, void *pCtx){
|
|
Fts3Table *pTab = (Fts3Table*)pCtx;
|
|
UNUSED_PARAMETER(iPhrase);
|
|
if( pExpr->aMI==0 ){
|
|
pExpr->aMI = (u32 *)sqlite3_malloc64(pTab->nColumn * 3 * sizeof(u32));
|
|
if( pExpr->aMI==0 ) return SQLITE_NOMEM;
|
|
}
|
|
memset(pExpr->aMI, 0, pTab->nColumn * 3 * sizeof(u32));
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Expression pExpr must be of type FTSQUERY_PHRASE.
|
|
**
|
|
** If it is not already allocated and populated, this function allocates and
|
|
** populates the Fts3Expr.aMI[] array for expression pExpr. If pExpr is part
|
|
** of a NEAR expression, then it also allocates and populates the same array
|
|
** for all other phrases that are part of the NEAR expression.
|
|
**
|
|
** SQLITE_OK is returned if the aMI[] array is successfully allocated and
|
|
** populated. Otherwise, if an error occurs, an SQLite error code is returned.
|
|
*/
|
|
static int fts3EvalGatherStats(
|
|
Fts3Cursor *pCsr, /* Cursor object */
|
|
Fts3Expr *pExpr /* FTSQUERY_PHRASE expression */
|
|
){
|
|
int rc = SQLITE_OK; /* Return code */
|
|
|
|
assert( pExpr->eType==FTSQUERY_PHRASE );
|
|
if( pExpr->aMI==0 ){
|
|
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
|
|
Fts3Expr *pRoot; /* Root of NEAR expression */
|
|
|
|
sqlite3_int64 iPrevId = pCsr->iPrevId;
|
|
sqlite3_int64 iDocid;
|
|
u8 bEof;
|
|
|
|
/* Find the root of the NEAR expression */
|
|
pRoot = pExpr;
|
|
while( pRoot->pParent
|
|
&& (pRoot->pParent->eType==FTSQUERY_NEAR || pRoot->bDeferred)
|
|
){
|
|
pRoot = pRoot->pParent;
|
|
}
|
|
iDocid = pRoot->iDocid;
|
|
bEof = pRoot->bEof;
|
|
assert( pRoot->bStart );
|
|
|
|
/* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */
|
|
rc = sqlite3Fts3ExprIterate(pRoot, fts3AllocateMSI, (void*)pTab);
|
|
if( rc!=SQLITE_OK ) return rc;
|
|
fts3EvalRestart(pCsr, pRoot, &rc);
|
|
|
|
while( pCsr->isEof==0 && rc==SQLITE_OK ){
|
|
|
|
do {
|
|
/* Ensure the %_content statement is reset. */
|
|
if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt);
|
|
assert( sqlite3_data_count(pCsr->pStmt)==0 );
|
|
|
|
/* Advance to the next document */
|
|
fts3EvalNextRow(pCsr, pRoot, &rc);
|
|
pCsr->isEof = pRoot->bEof;
|
|
pCsr->isRequireSeek = 1;
|
|
pCsr->isMatchinfoNeeded = 1;
|
|
pCsr->iPrevId = pRoot->iDocid;
|
|
}while( pCsr->isEof==0
|
|
&& pRoot->eType==FTSQUERY_NEAR
|
|
&& sqlite3Fts3EvalTestDeferred(pCsr, &rc)
|
|
);
|
|
|
|
if( rc==SQLITE_OK && pCsr->isEof==0 ){
|
|
fts3EvalUpdateCounts(pRoot, pTab->nColumn);
|
|
}
|
|
}
|
|
|
|
pCsr->isEof = 0;
|
|
pCsr->iPrevId = iPrevId;
|
|
|
|
if( bEof ){
|
|
pRoot->bEof = bEof;
|
|
}else{
|
|
/* Caution: pRoot may iterate through docids in ascending or descending
|
|
** order. For this reason, even though it seems more defensive, the
|
|
** do loop can not be written:
|
|
**
|
|
** do {...} while( pRoot->iDocid<iDocid && rc==SQLITE_OK );
|
|
*/
|
|
fts3EvalRestart(pCsr, pRoot, &rc);
|
|
do {
|
|
fts3EvalNextRow(pCsr, pRoot, &rc);
|
|
assert_fts3_nc( pRoot->bEof==0 );
|
|
if( pRoot->bEof ) rc = FTS_CORRUPT_VTAB;
|
|
}while( pRoot->iDocid!=iDocid && rc==SQLITE_OK );
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** This function is used by the matchinfo() module to query a phrase
|
|
** expression node for the following information:
|
|
**
|
|
** 1. The total number of occurrences of the phrase in each column of
|
|
** the FTS table (considering all rows), and
|
|
**
|
|
** 2. For each column, the number of rows in the table for which the
|
|
** column contains at least one instance of the phrase.
|
|
**
|
|
** If no error occurs, SQLITE_OK is returned and the values for each column
|
|
** written into the array aiOut as follows:
|
|
**
|
|
** aiOut[iCol*3 + 1] = Number of occurrences
|
|
** aiOut[iCol*3 + 2] = Number of rows containing at least one instance
|
|
**
|
|
** Caveats:
|
|
**
|
|
** * If a phrase consists entirely of deferred tokens, then all output
|
|
** values are set to the number of documents in the table. In other
|
|
** words we assume that very common tokens occur exactly once in each
|
|
** column of each row of the table.
|
|
**
|
|
** * If a phrase contains some deferred tokens (and some non-deferred
|
|
** tokens), count the potential occurrence identified by considering
|
|
** the non-deferred tokens instead of actual phrase occurrences.
|
|
**
|
|
** * If the phrase is part of a NEAR expression, then only phrase instances
|
|
** that meet the NEAR constraint are included in the counts.
|
|
*/
|
|
int sqlite3Fts3EvalPhraseStats(
|
|
Fts3Cursor *pCsr, /* FTS cursor handle */
|
|
Fts3Expr *pExpr, /* Phrase expression */
|
|
u32 *aiOut /* Array to write results into (see above) */
|
|
){
|
|
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
|
|
int rc = SQLITE_OK;
|
|
int iCol;
|
|
|
|
if( pExpr->bDeferred && pExpr->pParent->eType!=FTSQUERY_NEAR ){
|
|
assert( pCsr->nDoc>0 );
|
|
for(iCol=0; iCol<pTab->nColumn; iCol++){
|
|
aiOut[iCol*3 + 1] = (u32)pCsr->nDoc;
|
|
aiOut[iCol*3 + 2] = (u32)pCsr->nDoc;
|
|
}
|
|
}else{
|
|
rc = fts3EvalGatherStats(pCsr, pExpr);
|
|
if( rc==SQLITE_OK ){
|
|
assert( pExpr->aMI );
|
|
for(iCol=0; iCol<pTab->nColumn; iCol++){
|
|
aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1];
|
|
aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2];
|
|
}
|
|
}
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** The expression pExpr passed as the second argument to this function
|
|
** must be of type FTSQUERY_PHRASE.
|
|
**
|
|
** The returned value is either NULL or a pointer to a buffer containing
|
|
** a position-list indicating the occurrences of the phrase in column iCol
|
|
** of the current row.
|
|
**
|
|
** More specifically, the returned buffer contains 1 varint for each
|
|
** occurrence of the phrase in the column, stored using the normal (delta+2)
|
|
** compression and is terminated by either an 0x01 or 0x00 byte. For example,
|
|
** if the requested column contains "a b X c d X X" and the position-list
|
|
** for 'X' is requested, the buffer returned may contain:
|
|
**
|
|
** 0x04 0x05 0x03 0x01 or 0x04 0x05 0x03 0x00
|
|
**
|
|
** This function works regardless of whether or not the phrase is deferred,
|
|
** incremental, or neither.
|
|
*/
|
|
int sqlite3Fts3EvalPhrasePoslist(
|
|
Fts3Cursor *pCsr, /* FTS3 cursor object */
|
|
Fts3Expr *pExpr, /* Phrase to return doclist for */
|
|
int iCol, /* Column to return position list for */
|
|
char **ppOut /* OUT: Pointer to position list */
|
|
){
|
|
Fts3Phrase *pPhrase = pExpr->pPhrase;
|
|
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
|
|
char *pIter;
|
|
int iThis;
|
|
sqlite3_int64 iDocid;
|
|
|
|
/* If this phrase is applies specifically to some column other than
|
|
** column iCol, return a NULL pointer. */
|
|
*ppOut = 0;
|
|
assert( iCol>=0 && iCol<pTab->nColumn );
|
|
if( (pPhrase->iColumn<pTab->nColumn && pPhrase->iColumn!=iCol) ){
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
iDocid = pExpr->iDocid;
|
|
pIter = pPhrase->doclist.pList;
|
|
if( iDocid!=pCsr->iPrevId || pExpr->bEof ){
|
|
int rc = SQLITE_OK;
|
|
int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */
|
|
int bOr = 0;
|
|
u8 bTreeEof = 0;
|
|
Fts3Expr *p; /* Used to iterate from pExpr to root */
|
|
Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */
|
|
Fts3Expr *pRun; /* Closest non-deferred ancestor of pNear */
|
|
int bMatch;
|
|
|
|
/* Check if this phrase descends from an OR expression node. If not,
|
|
** return NULL. Otherwise, the entry that corresponds to docid
|
|
** pCsr->iPrevId may lie earlier in the doclist buffer. Or, if the
|
|
** tree that the node is part of has been marked as EOF, but the node
|
|
** itself is not EOF, then it may point to an earlier entry. */
|
|
pNear = pExpr;
|
|
for(p=pExpr->pParent; p; p=p->pParent){
|
|
if( p->eType==FTSQUERY_OR ) bOr = 1;
|
|
if( p->eType==FTSQUERY_NEAR ) pNear = p;
|
|
if( p->bEof ) bTreeEof = 1;
|
|
}
|
|
if( bOr==0 ) return SQLITE_OK;
|
|
pRun = pNear;
|
|
while( pRun->bDeferred ){
|
|
assert( pRun->pParent );
|
|
pRun = pRun->pParent;
|
|
}
|
|
|
|
/* This is the descendent of an OR node. In this case we cannot use
|
|
** an incremental phrase. Load the entire doclist for the phrase
|
|
** into memory in this case. */
|
|
if( pPhrase->bIncr ){
|
|
int bEofSave = pRun->bEof;
|
|
fts3EvalRestart(pCsr, pRun, &rc);
|
|
while( rc==SQLITE_OK && !pRun->bEof ){
|
|
fts3EvalNextRow(pCsr, pRun, &rc);
|
|
if( bEofSave==0 && pRun->iDocid==iDocid ) break;
|
|
}
|
|
assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );
|
|
if( rc==SQLITE_OK && pRun->bEof!=bEofSave ){
|
|
rc = FTS_CORRUPT_VTAB;
|
|
}
|
|
}
|
|
if( bTreeEof ){
|
|
while( rc==SQLITE_OK && !pRun->bEof ){
|
|
fts3EvalNextRow(pCsr, pRun, &rc);
|
|
}
|
|
}
|
|
if( rc!=SQLITE_OK ) return rc;
|
|
|
|
bMatch = 1;
|
|
for(p=pNear; p; p=p->pLeft){
|
|
u8 bEof = 0;
|
|
Fts3Expr *pTest = p;
|
|
Fts3Phrase *pPh;
|
|
assert( pTest->eType==FTSQUERY_NEAR || pTest->eType==FTSQUERY_PHRASE );
|
|
if( pTest->eType==FTSQUERY_NEAR ) pTest = pTest->pRight;
|
|
assert( pTest->eType==FTSQUERY_PHRASE );
|
|
pPh = pTest->pPhrase;
|
|
|
|
pIter = pPh->pOrPoslist;
|
|
iDocid = pPh->iOrDocid;
|
|
if( pCsr->bDesc==bDescDoclist ){
|
|
bEof = !pPh->doclist.nAll ||
|
|
(pIter >= (pPh->doclist.aAll + pPh->doclist.nAll));
|
|
while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){
|
|
sqlite3Fts3DoclistNext(
|
|
bDescDoclist, pPh->doclist.aAll, pPh->doclist.nAll,
|
|
&pIter, &iDocid, &bEof
|
|
);
|
|
}
|
|
}else{
|
|
bEof = !pPh->doclist.nAll || (pIter && pIter<=pPh->doclist.aAll);
|
|
while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){
|
|
int dummy;
|
|
sqlite3Fts3DoclistPrev(
|
|
bDescDoclist, pPh->doclist.aAll, pPh->doclist.nAll,
|
|
&pIter, &iDocid, &dummy, &bEof
|
|
);
|
|
}
|
|
}
|
|
pPh->pOrPoslist = pIter;
|
|
pPh->iOrDocid = iDocid;
|
|
if( bEof || iDocid!=pCsr->iPrevId ) bMatch = 0;
|
|
}
|
|
|
|
if( bMatch ){
|
|
pIter = pPhrase->pOrPoslist;
|
|
}else{
|
|
pIter = 0;
|
|
}
|
|
}
|
|
if( pIter==0 ) return SQLITE_OK;
|
|
|
|
if( *pIter==0x01 ){
|
|
pIter++;
|
|
pIter += fts3GetVarint32(pIter, &iThis);
|
|
}else{
|
|
iThis = 0;
|
|
}
|
|
while( iThis<iCol ){
|
|
fts3ColumnlistCopy(0, &pIter);
|
|
if( *pIter==0x00 ) return SQLITE_OK;
|
|
pIter++;
|
|
pIter += fts3GetVarint32(pIter, &iThis);
|
|
}
|
|
if( *pIter==0x00 ){
|
|
pIter = 0;
|
|
}
|
|
|
|
*ppOut = ((iCol==iThis)?pIter:0);
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Free all components of the Fts3Phrase structure that were allocated by
|
|
** the eval module. Specifically, this means to free:
|
|
**
|
|
** * the contents of pPhrase->doclist, and
|
|
** * any Fts3MultiSegReader objects held by phrase tokens.
|
|
*/
|
|
void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){
|
|
if( pPhrase ){
|
|
int i;
|
|
sqlite3_free(pPhrase->doclist.aAll);
|
|
fts3EvalInvalidatePoslist(pPhrase);
|
|
memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist));
|
|
for(i=0; i<pPhrase->nToken; i++){
|
|
fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr);
|
|
pPhrase->aToken[i].pSegcsr = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
** Return SQLITE_CORRUPT_VTAB.
|
|
*/
|
|
#ifdef SQLITE_DEBUG
|
|
int sqlite3Fts3Corrupt(){
|
|
return SQLITE_CORRUPT_VTAB;
|
|
}
|
|
#endif
|
|
|
|
#if !SQLITE_CORE
|
|
/*
|
|
** Initialize API pointer table, if required.
|
|
*/
|
|
#ifdef _WIN32
|
|
__declspec(dllexport)
|
|
#endif
|
|
int sqlite3_fts3_init(
|
|
sqlite3 *db,
|
|
char **pzErrMsg,
|
|
const sqlite3_api_routines *pApi
|
|
){
|
|
SQLITE_EXTENSION_INIT2(pApi)
|
|
return sqlite3Fts3Init(db);
|
|
}
|
|
#endif
|
|
|
|
#endif
|