mirror of
https://github.com/tursodatabase/libsql.git
synced 2025-05-12 19:23:08 +00:00
* Remove unused elements from the json_tree() cursor. FossilOrigin-Name: 914a50117d477b2cd30d58388fb8d1b71ff7ff6842ba025f38efc6e9647d06d0 * Same results as the legacy JsonNode implementation on a small set of test cases. FossilOrigin-Name: c3da4b079a1a15a4c0b1a6e71f876648b1d9eb32eddc67b9946c2475c7b6d085 * Fix corner-case error conditions. FossilOrigin-Name: ec23d34ab75e1d7e9366e59c633e0d30def8759f6d4717583ebeb4c90aeccf0d * All tests passing. FossilOrigin-Name: b5a5660ca22437640c9bf32c44d92c76a7293dafcbaf4fa6a4c171128d64871d * Give the json_valid() function an optional second argument that determines what is meant by "valid". FossilOrigin-Name: a4e19ad43dac81e7655ec03ff69bb99d1d02b0c227034c90fb41415fd4793fe3 * Enhance the (SQLITE_DEBUG-only) json_parse() routine so that it shows a decoding of JSONB when given a BLOB argument. FossilOrigin-Name: af267868562e0799ad691dccad05f17afbc34d609eede8c55f57d209290246ef * In SQLITE_ENABLE_SETLK_TIMEOUT builds, use blocking locks in place of sleep() when opening a read-transaction. FossilOrigin-Name: a51ef39998e25e86bd0600e71d15011b12e05f4319608018293bdaecb09e8c97 * Have SQLITE_ENABLE_SETLK_TIMEOUT builds block when locking a read-lock slot. FossilOrigin-Name: f797baf47cf7859cfd8ce248f4f3087af4551a7040af990333426e5a7c269504 * Add untested (#ifdefed-out) code for the MergePatch algorithm against JSONB. Add (and test) the jsonBlobEdit() routine that is needed by the new MergePatch. FossilOrigin-Name: 4d353387fc10e1038cfdd86e66007bf728c231a928e588897bbee0fbfe76f225 * More aggressive use of jsonBlobEdit(). Improvements to the MergePatch implementation sketch. FossilOrigin-Name: fbca9570fd2e1465739e4d3a8d9bb40fad594fd78ab49b2cb34efa27ebdd8361 * The json_patch() code for JSONB compiles and works sometimes, but there are still issues. Incremental check-in. FossilOrigin-Name: e0099464a0045a04f4ccf29bc2b8325fc8c7f39ccf4847e74818f928c9153588 * All legacy tests are passing. FossilOrigin-Name: 2c436806b8d5f57de99c00f6154b038454fb9ae427d00d7b4a46ab9c7c69bcb9 * Handle an SQLITE_BUSY_TIMEOUT error if one occurs while attempting a shared lock on a read-lock slot. FossilOrigin-Name: 5fbf3906d272df3eb981f67455eb35f649ad2774cba9fc3f077b28d9bef3f0cb * The json_remove() function now uses only JSONB, never JsonNodes, internally. FossilOrigin-Name: b69786e746ae2b927b64d9871fd120b7f8f06cc53739fd46a4da51aa16cf8576 * Attempt to get json_extract() working with pure JSONB only, and without the use of JsonNode. Mostly working, but there are some differences from legacy in corner cases. FossilOrigin-Name: 8c324af1eca27e86adc45622af4f3b06a67a3f968596ac58aa7434b1f6f05f3c * Preserve flexibility in the format of the RHS of -> and ->> operators found in legacy. FossilOrigin-Name: 6231ec43adb7436195eb1497de39a6c13c6b4f1c5032e6ea52515d214e61fdbc * Do not set the J subtype when the output is JSONB. FossilOrigin-Name: 4f106b64fe8988435872806bd0a6c223b61f53af0dd1c47c847bb4eec4e03e27 * Convert the json_array_length() function to use JSONB instead of JsonNodes. FossilOrigin-Name: 5ab790736d943e08f097efcee5cfbf0d83c65b0a53f273060330ba719affa5e5 * The assertion change at check-in [7946c79567b0ccd3] is insufficient to fix the problem of a Table object being deleted out from under the OP_VCheck opcode. We need to reference count the Table, which is accomplished here. FossilOrigin-Name: cad269d5e274443c39203a56603b991accc0399135d436996fc039d1d28ec9db * In the recovery extension, if a payload size is unreasonably large, it is probably corrupt, so truncate it. FossilOrigin-Name: 988c3179e978a3a6d42541e9c7a2ab98150383671810926503376ed808f150ff * Fix signed integer overflow in fts5. FossilOrigin-Name: 60e46c7ec68fd8caaed960ca06d98fb06855b2d0bb860dd2fb7b5e89a5e9c7b4 * The json_patch() function now operates exclusively on JSONB. This patch also includes improvements to JSONB debug printing routines. FossilOrigin-Name: fee19d0098242110d2c44ec7b9620c1210ef3f87913305f66ec85d277dd96ab6 * Convert the json_error_position() routine to use only JSONB internally. FossilOrigin-Name: e7a8ba35bff6fde55827f978de5b343b6c134c7fa53827f5c63915a9dc2598ad * Convert json_insert(), json_replace(), json_set() to use JSONB internally. Mostly working, but some corner cases are still not quite right. FossilOrigin-Name: 99c8f6bd5c9a31b6d00f92e383bec8a8235ed553916ad59adbb1b7663f6ebff1 * Update some OPFS-related help text in WASM tests. Minor cleanups in speedtest1-worker.js. FossilOrigin-Name: 263f6d3a7784ef7d032dbf7a3265aca8dd70bf50797f28f6b2e8ddb6a301f83a * New test cases for insert/set/replace with paths that indicate substructure that does not yet exist. FossilOrigin-Name: 146c717c51940b2139befc45ac74e7a1c36ef3c32fd3cfe35b334488eebe6298 * New JSON test cases showing insert or set with missing substructure. FossilOrigin-Name: 6802b6459d0d16c961ff41d240a6c88287f197d8f609090f79308707490a49c2 * Simplification of the new JSON insert/set test cases. FossilOrigin-Name: 04c0d5644372446c924a2e31a26edf51ddc563a1990d170b0ed4739e3e8b239b * Enhance json_set() and json_insert() so that they create missing substructure. FossilOrigin-Name: cc7a641ab5ae739d31c24f0ad0caeb15a481a63fa8f13720718ea922c25862ff * Convert json_type() to use JSONB internally. FossilOrigin-Name: 83074835b900ce85cf67059e674ce959801505c37592671af25ca0af7ed483f1 * Add a basic batch-mode SQL runner for the SAH Pool VFS, for use in comparing it against WebSQL. Bring the WebSQL batch runner up to date, noting that it cannot run without addition of an "origin trial" activation key from Google because that's now the only way to enable WebSQL in Chrome (that part is not checked in because that key is private). Minor code-adjacent cleanups. FossilOrigin-Name: 883990e7938c1f63906300a6113f0fadce143913b7c384e8aeb5f886f0be7c62 * Convert json_valid() over to using only JSONB as its internal format. FossilOrigin-Name: 7b5756fa6d00b093bf083a8d7a5ef5485f7a09e4eac473785c8380688f861a1b * Remove all trace of JsonNode from the JSON implementation. The JSONB format is used as the internal binary encoding for searching and editing. FossilOrigin-Name: 11ebb5f712cc7a515e2e0f2be8c1d71de20c97fe5b74c4f4d72c84fd21182d35 * First attempt to get the JSON text-to-binary cache working. All test cases pass, but the cache seems not to help much. FossilOrigin-Name: 25ed295f300fea6185104a73721076bccd2b2a6e411c78564266fa6dca4ff70c * Cache is working better, but does not preserve the hasJson5 flag. FossilOrigin-Name: a12add7ab9f5aee5bb2ede0c4d22e599dd28f7a107dce72b2ea48ef92d233e8a * Fix up the JSON cache to work better. FossilOrigin-Name: 1fdbc39521f63aedc6f08ecaafa54ea467b8c6316a692a18ad01eecbf22a0977 * Different approach to querying a tokendata=1 table. Saves cpu and memory. FossilOrigin-Name: c523f40895866e6fc979a26483dbea8206126b4bbdf4b73b77263c09e13c855e * Remove old code for tokendata=1 queries. FossilOrigin-Name: b0a489e8e1bf0290c2117ab32d78b1cc7d67bcb226b55ec044c8367ebde3815b * Performance optimization in the JSON parser. FossilOrigin-Name: 68d191f40e708962ec88e0c245b4496bc4a671300484b1cc0f3fc7e6d199a6e6 * Fix harmless compiler warnings and enhance performance the parser. FossilOrigin-Name: 285633da6d188547e52f07779e209c9e5f3dc33ce0668e14858f3337889ef4b8 * Unroll a loop in the parser for a performance increase. FossilOrigin-Name: a6dc29e4d5e13949e0fcd9d5dde575c2670eb10a230ab9df3806fc8c3016c540 * Remove a NEVER that can be true if a virtual table column is declared to have a DEFAULT. See [forum:/forumpost/3d4de8917627d058|forum post 3d4de8917627d058]. FossilOrigin-Name: 8abc2ccaf8106f20243568cd7fa74174386eb85d7ea381201e97e2fd527033e0 * Simplification and optimization of the JSON parser. FossilOrigin-Name: f5ec9485119a2a6cb33eb864c7ca9b41d4a2ed08ab6ad9a6b0dd9358ab253576 * Performance optimization in jsonAppendString(). FossilOrigin-Name: fdf00e96239c73fb67e2acecc5b95f55a1fc51c3deed4512613c0d6070ce5805 * Minor fix to the header comment on jsonXlateTextToBlob(). FossilOrigin-Name: c3677ba410208c07b711f5f526eb5cf039a8eee49f632c7ae04fa55cdfbb9058 * Fix potential unsigned integer underflow in jsonAppendString(). FossilOrigin-Name: d2fba2cbdc3870d34228c1a9446eced884325acc183900d7dd0b96132570fb4a * Do not allow a JsonParse object to be considered "editable" after an OOM. FossilOrigin-Name: c6bacf57bd6fe0fee00c9d41163a270b60997c20659949971bbf5c6c62622bfe * Protect a memcpy() against OOM conditions. FossilOrigin-Name: 26144d1c25ae0435db568009ba05e485d23d146f2b1f29f3a426c87860316aed * Ensure that tokendata=1 queries avoid loading large doclists for queries like "common AND uncommon", just as tokendata=0 queries do. FossilOrigin-Name: 7bda09ab404a110d57449e149a3281fca8dc4cacf7bd9832ea2a1356ad20fe8e * Take extra care to ensure that JSONB values that are in cache are actually owned by the JSON subsystem, and that ownership of such values is not handed back to the bytecode engine. FossilOrigin-Name: 1304534001e9ef66c6b12752b69d790bfa3427cc803f87cc48ca22ae12df0fdf * When tokendata=1 queries require multiple segment-cursors, allow those cursors to share a single array of in-memory tombstone pages. FossilOrigin-Name: e0175d07e4094db5ea4b0378a5ff480dafb6ba9da86a113fa767c4c89c3c866f * Fix harmless compiler warnings. Refactor some identifier names for clearer presentation. FossilOrigin-Name: 7e3941502789c5afaf19b08112f464abf5e3cba7f92fc9290af2a0f96127ad9a * Code and comment cleanup. Everything should work the same. FossilOrigin-Name: c640754df0d3ffdad994745f0d0e10c8f19f424b87f6a6e6e269491a0350b950 * Fix various compiler warnings and other problems with the new code on this branch. FossilOrigin-Name: 3a623cfa173b4035c759cb84985d11d8727053beb383648503987d6ab15c0ef0 * Fix harmless compiler warnings reported by MSVC. FossilOrigin-Name: 419652c0c82980bd043584dcd2976f91dfff7b926b216d597698299850b855c0 * Implement strict JSONB checking in the json_valid() function. FossilOrigin-Name: 0f26d38880fcbc207abcc94dbc170a7428bab1b4f0b7731aaf5bee0224000994 * Minor code changes for consistency and to simplify testing. FossilOrigin-Name: df272bd837910ad9e03e222716a1201a601399664365f1dcf73d5932372518ed * Do not let bad hexadecimal digits in malformed JSONB cause an assertion fault. FossilOrigin-Name: 8dec1ba1e5076ff596756e00c1e2ada0245f168a503dd1cadadf848331acfac3 * Enable incorrect JSONB to be rendered into text without hitting an assertion for a bad whitespace escape in a string. FossilOrigin-Name: 4d6a9a217df6792b41766b774fb0c0553b45f9104c26a0955bf4a30862d7d7bf * Ensure that OOM conditions in the generation of the "bad JSON path" error message result in an SQLITE_NOMEM error. FossilOrigin-Name: aa0e02b5c26a2ef3d6216a0ed8bc01382be43173485f898cb63f2a8c559f2e74 * Avoid problems when the path argument to json_tree() contains embedded U+0000 characters. FossilOrigin-Name: 9f055091af01a5dddba1a7e9868ad030c8f206237e1569215cb161e53e54aa71 * Remove dead code. Improved reporting of errors in JSON inputs. FossilOrigin-Name: 2eaa738e6b5c1b67b3e57c868d9c3a30eea38a0b3b8b02482f06d57a45b10921 * Back off on the use of strlen() for situations where sqlite3_value_bytes() will work as well, for performance. FossilOrigin-Name: 79fb54fbb8b9c30f47cdbd437d24a21542716241e822749e5e28c9fbc449bfa8 * Better pre-scan size estimations for objects in the JSON parser resulting in fewer reallocations and memmove operations. FossilOrigin-Name: 526b27f90897f5e35dfff7257daf6c4ce4798d649b09b8aecfb02df0449e3c51 * Repair issues and inefficiencies found during testing. FossilOrigin-Name: ae973cb1515f9d76409c92a2ca2ffd6b71f32b0b490a4886770e7c1b90f12611 * Add tests for using tokendata=1 and contentless_delete=1 together. FossilOrigin-Name: a2506b8c9718054912270055638204753c4156bbc115e55194e6df9d7e76cb10 * Two new NEVER macros. FossilOrigin-Name: 52632c92cb06faf0e804654b3490fd6c199521107bd30c8fcbc3a2a5a488098f * Remove reachable ALWAYS and NEVER macros. FossilOrigin-Name: f601de3eeabd85993c1f5ee96b62de6fdabbeae2fe8950e00d08feb48d42c498 * Fix bug in xInstToken() causing the wrong token to be returned. FossilOrigin-Name: da78d07e77cbc783fbc725758911c230fd6a1c1885d9576125de955dcc2bd37f * Continuing simplifications and code cleanup. FossilOrigin-Name: ddf92b5059a9106753fd18b82ba8daa269a62af947561c460790107b83416f0b * Fix a problem with the xInstCount() API and "ORDER BY rank" queries. FossilOrigin-Name: 317a50563d9e8586fda136e513727241b414e7267d50a06571c8ebd0eae710bc * Fix memory leak in new code on this branch. FossilOrigin-Name: ebc160b9a05568df66f86e30804399ee29d34b44a60c57e062f98cb92826353f * Fixes for xInstToken() with tokendata=0 tables. And with prefix queries. FossilOrigin-Name: 78fbb71598b1ca756acc078253880a1d0f7983a5a26b9efc683e6488122505a1 * Fix errors in rendering JSON5 escape sequences embedded in JSONB. FossilOrigin-Name: f1a51ae3863557526a51c6e98e71fcdf4f1ed14a36212b3c90f7408f926345e4 * Do not make the input JSONB editable in json_remove() if there are no PATH argument. FossilOrigin-Name: 66594544f3ba9977475a3e3f74404eb2b2fb845053b28bd24c2b52c7df94e9d7 * Fixes to error handling in json_array_length(). FossilOrigin-Name: aa85df2d26b74c171c55bde19ef17c4f11f40b8af7181bbf7162f87cdea7e88b * Add further tests for xInstToken(). FossilOrigin-Name: 8582707f16133f003a6687f68cbea03d4eb6c2a0e2e07746b7cace0c44e84fa4 * Rename the internal routine jsonMergePatchBlob() to just jsonMergePatch(). FossilOrigin-Name: ebf667b616235bb64b83832008342ba5e7b10b2c170d7cebc431f040fef7ecfb * Fix OOM and corrupt JSONB handling in json_patch(). FossilOrigin-Name: 1910feb0b7d5cc2b810c3322f6cca281d8730182d30d162bd7bb56800979ea91 * Use an assert() to fix a harmless static analyzer warning. FossilOrigin-Name: a249ca657e624028bc6b3d2c2bcedd7162d118addb7d62ce519920cecebf1860 * Clean up the JSONB performance test script. FossilOrigin-Name: 905301075a7fc1010ee7e754867b1b698c9b8576d50e98125def32a5dfb7ee9d * Small performance gain by unwinding the string literal delimiter search loop in the JSON parser by one more level. FossilOrigin-Name: 4c587feac153e8ebe526559ec3d254f545f81e8d1ed3126f91a5ff25ec4aa72e * Use strspn() to accelerate whitespace bypass in the JSON parser. FossilOrigin-Name: 843197df08352bdff4b87be91d160e574572aded0d0c66142fd960000c0b4701 * Miscellaneous comment cleanup and typo fixes. FossilOrigin-Name: 59446dc0bd0091572122a3c8b4653d7a2dc867d16c4a5919f79b81bc3a673ce3 * Further tests for the new code on this branch. FossilOrigin-Name: 59d008b6c23ab900377bc696ee19381feb7614bac80546eae361e401c3620c4e * Use extra assert() statement to silence harmless static analyzer warnings. FossilOrigin-Name: 174c2b2eef5fecd96a5fc89b81032fe81f7801f12097cea10e7e7f0a02114813 * README.md typo fix reported in the forum and update all links from http: to https:. FossilOrigin-Name: 5c48acdbb44185b352b54911a57a6986d6c7e624bdeba2af48b985d29f0292bf * Increased rigor in comparisons between object labels in JSON. FossilOrigin-Name: 2bc86d145fccc07107b7753cb1a69122676d4096fe59c454497bd81a6142d45e * The rule for the RHS of the ->> and -> operators when the RHS does not begin with $ is that it must be (1) all digits, or (2) all alphanumerics, or (3) contained within [..] or else it will become a quoted label. FossilOrigin-Name: 0e059a546ec11fa5c6d007bd65c249ee2422f1facbdb2792c53e0bc0ccc97e14 * Test cases for object label matching with escape sequences. FossilOrigin-Name: c6f2aa38e95b7888650cfa7bb773b18a28e01d883033ac77be6d504ffe417d18 * In CLI, move -interactive flag handling back to arg-loop pass 2. FossilOrigin-Name: 63cb05a862532d2d56e9e81fe32ced09bf58f03146587a118f11c2a84e195e69 * Fix the routine that determines the json_tree.path value for the first row so that it correctly takes into account escape sequences in the path argument. FossilOrigin-Name: b9243ee8a37c62eb8848e765bd4af83bc1b3d3eb24fb4268a1357ad1f8b2e1fb * Correctly handle 8-byte sizes in the JSONB format. [forum:/forumpost/283daf08e91183fc|Forum post 283daf08e91183fc]. FossilOrigin-Name: 73d390f39c0bbbc017e01544e4d43c76761f2599bd57f900131c706270dfd202 * Update documentation comments in fts5.h. FossilOrigin-Name: 38c50e22c98607e6c1fd78d7615cda534773b6d4fd85c712b54749fcd7af0c83 * Work around LLVM's newfound hatred of function pointer casts. [forum:/forumpost/1a7d257346636292|Forum post 1a7d257346636292]. FossilOrigin-Name: ec0ae4030968c782af48d1c776351c14b2ada21d40aeb97915f33df30706e18f * Fix compiler warning about shadowed variable in fts5_index.c. FossilOrigin-Name: ee70e4c1c9c41617850228e48d8df44f105cf2fbbe789340ceca6f27ad6ce5eb * Improved detection of corrupt JSONB in the jsonReturnFromBlob() function. FossilOrigin-Name: b014736c1f80ccc46fb4b24ac04310a6ce5cb5b6653665efff366cb3bc742257 * Add ALWAYS() on branches added in [ec0ae4030968c782] that are always true. FossilOrigin-Name: 451cef8609e96dd9244818adc5c6f240544694bcb4ae620e88f90e403e59d70f * Rework the jsonEachPathLength() routine in json_tree() so that it is less susceptible to problems due to goofy object labels. FossilOrigin-Name: 858b76a00e8ff55215f7a2e6a4cd77fc4d4f98dea7224cd90488744f5ce246a4 * Different fix for the fts5 COMMIT-following-OOM problem first fixed by [fba3129d]. This one does not cause problems if an fts5 table is renamed and then dropped within the same transaction. FossilOrigin-Name: d8c6b246944934a7a6e027b3f5b986fd64a19dd5c5c5175f4ea8586da59a6764 * Fix a problem with handling OOM and other errors in fts5 when querying tokendata=1 tables. FossilOrigin-Name: bc911ab5953532956510c199be72b1d3c556f2d0ddbd7fc0ae6f5f917b337b48 * Fix a null-pointer dereference in fts5 tokendata=1 code. FossilOrigin-Name: d69fa8f0504887f968d9a190ecb889ddb40bb1b56d0d4479f9819c106aec719b * Avoid an assert() failure when querying an fts5vocab table that accesses a tokendata=1 fts5 table with corrupt %_data records. FossilOrigin-Name: 386ba9e20423fb2f623d6adc9d3c310fb1b135f54a1dad15ef3b593d97886926 * Ensure an fts5vocab table never uses a special tokendata=1 merge cursor. FossilOrigin-Name: 1e26510e83b40c9bd2e8bfa2a0e81f2cb915e78fed773204ef537683e48b61dc * Avoid dropping an error code in new fts5 tokendata=1 code. FossilOrigin-Name: a66596e33dc9aa4bab2ec3ff45546e1321d0a11bdc764f8381b315292ca92423 * Fix a harmless compiler warning about "confusing indentation". FossilOrigin-Name: 34f9e9a8c4bea13f60f43062e25cd7d9422f2e7f5b371ed0ddadc9abeb3ca256 * Fix a potential problem RCStr access on a JsonString object that is not really and RCStr. Fuzzer/UBSAN find. FossilOrigin-Name: d2f2174ce2cc89606034e158149a2d05fc3627ec4d5cdb772add7a2250f29d78 * Fix a harmless UBSAN warning. FossilOrigin-Name: 1503cba6d17e9bade7a5c103ddd23241ff4741f9a2e3032ffe2987af243dae65 * Fix a potential use of uninitialized value in json_valid() with 2nd argument of 8. FossilOrigin-Name: fa102036fe46eeb71b7df3e265be1935ae5c78e0b939b08841bcfb8abadbc77a * Work toward enhanced functionality for json_valid() with deep checking of the JSONB (second argument has bit 0x08). FossilOrigin-Name: c370d573198b151767f04e91bf8baa4ae0076751ae468c5709742a0b0ed16770 * Add SQLITE_TESTCTRL_VALIDATE_JSONB, which if enabled under SQLITE_DEBUG causes cross-checking of generate JSONB. FossilOrigin-Name: b410a4db74a650003539ffaaea18519d5159b504daac47db6a4874b730f40ac8 * Rename the new test-control to SQLITE_TESTCTRL_JSON_SELFCHECK. Make it so that the current value of the setting can be interrogated. FossilOrigin-Name: 7aff1d9a4cb17ecd5abab21ab032f35a78741dd641ddd8cbcc85fc4a81a0707d * Activate JSON_SELFCHECK within fuzzcheck. FossilOrigin-Name: 4d14e733bb521aed65e98533969d2303738232ae87dab70fdf7962e6513195f5 * json_valid(*,8) allows minus-signs on hexadecimal literals. FossilOrigin-Name: c0d7f4520d839a268b3fd2474d0897a9832aa608bd6238b3e287fabecf07a350 * json_error_position() now uses jsonValidityCheck() to find the approximate position of an error in a JSONB blob. FossilOrigin-Name: c3d60cf7028a333b825d5b89516945a73e0c158ac81d8bcc117d21bfd98602c8 * The json_error_position() function now reports an approximate byte offset to the problem in a JSONB if there is a problem. FossilOrigin-Name: 80d5d94dff6a2d2557039be3d7d47c1a6003c4b98defe0bd411acfeb963ad5dd * Validity checking of text nodes in JSONB. FossilOrigin-Name: fa5160687c2f970d407e8af73c246f7cd806bb4ce35f29a79ac534a8646a6c8e * Improvements to JSONB validation - catch more cases where the input does not conform to spec. FossilOrigin-Name: be1864eac4eb75cc30bf98f73092c8608467f4bd956240df6a0cbea9f1e09e85 * Add NEVER to two unreachable branches in JSON. FossilOrigin-Name: c96ebb086feb89341565cc52b970ae7799ce1327fe1ad4fc790f1b0dcaa6e229 * Worker1 Promiser API: when multiple db connections are active then use the requested connection instead of always the first-opened connection. Bug reported in [forum:894c330e7f23b177|forum post 894c330e7f23b177]. FossilOrigin-Name: 194276e18e0268829061c09317e7f9f527a703eb45f1755ff1dd30bd99dc1b68 * Fix the JSON object label comparison object so that it works correctly even if the label ends with escaped whitespace. FossilOrigin-Name: 4d5353cadd7b7c5f105bc197f3ec739e2d041472d6b3e939654c9f9cfc2749ae * Improvements to UTF8 handling, and especially the handling of invalid UTF8, in the JSON routines. FossilOrigin-Name: 1b229c1101d6c384a30f343c5e47b471ab084b2d8e81170eb8f642afc1c67e3b * Bug fix in the JSONB validator. dbsqlfuzz ac6fa521a08609a642198e7decf64180e750b3c4 FossilOrigin-Name: 3e940a6a08b0a0434650cd3d8dd286e09ad8ab805b0a4d515e57bba5d3608577 * Avoid invoking sqlite3ExprColUsage() on an unresolve column reference. dbsqlfuzz fc34aa62df4de103705d11b807074687ffafbda5. FossilOrigin-Name: ac9314c0e335694b48c613145f5397247bb88c51806cd0dc3ed4ec306db4bbad * In CLI, fix .read inability to open 2GB+ files on WIN32. FossilOrigin-Name: 56c80a62d2e033d64ba5d545ae9cbe3ed7c9d046c0a3fafb6cfa2f0b562d1ef0 * Pass subtype information through the aggregate ORDER BY sorter for aggregate functions that use subtype information. FossilOrigin-Name: 3536f4030eab6d650b7ed729d2f71eb6cc3b5fbe16b4e96b99008d66522aaccb * Improve the error message returned by an fts5 'rebuild' command on an external content table if there is a problem with the content table or view. FossilOrigin-Name: 0fbf4b8a58fde1c187908934da6f59999b146f32e07ac255cc531c5c4d7007fd * Fix harmless compiler warnings in JSON and FTS5. FossilOrigin-Name: 90135efccfeb1046f002bfcbd8dfec9a1a3b40cbe1b5c714ae065b06368e354f * Add assert()s to FTS5 to fix static analyzer warnings. FossilOrigin-Name: 27d4a89a5ff96b7b7fc5dc9650e1269f7c7edf91de9b9aafce40be9ecc8b95e9 * Use SQLITE_STRICT_SUBTYPE=1 by default for the JNI and WASM builds unless they're explicitly built with SQLITE_STRICT_SUBTYPE=0. FossilOrigin-Name: 990211357badf0ab08bd34cf6d25b58849d0fd8503e289c1839fc837a74e1909 * Correct --enable-sab flag in ext/wasm/GNUmakefile to fix a silent alhttpd args-parsing error. FossilOrigin-Name: 7b9b757d872a31395b0f6454e2309a6a4664b8bdd8749f6a15371cbe72c05b60 * Avoid running the "no_mutex_try" tests with SQLITE_ENABLE_SETLK_TIMEOUT builds as part of the release test. FossilOrigin-Name: 6b4e1344a28c213cbe8fb97f7f3f6688de93fb73ed96bf460ff74c959da1a712 * Do not run test script fts5origintest4.test with either "memsubsys1" or "mmap" permutations. FossilOrigin-Name: 05a63d9603ef42cbee6dadff72d97583a9c78e549f70e9a808534d5c1ae7c28a * Fix a new JSON test case so that it works even if SQLITE_OMIT_VIRTUALTABLE is defined. FossilOrigin-Name: b995aae510888a9746b46545d176a0885d4738e1f1bc0b7ad7937ed023efd7d6 * Add mention of --buildonly and --dryrun to the testrunner.tcl usage screen. FossilOrigin-Name: 23b92d915c12ee768857e2c3c961832f390cad9b53b8bcfc2b97664baab25bb7 * Avoid expiring prepared statements in the middle of an integrity-check. FossilOrigin-Name: 88beb48472da4667c0727c8ebabe046ea526450ff837fe789d041ed3f1ff105e * In the count-of-view optimization, deferring freeing obsolete parts of the parse tree, on the off-chance that some other part of the code might be holding a pointer to those parts. FossilOrigin-Name: da442578856c87137eb1677d9b13b7c1cf15828cc41d4756572b278060f69bae * New test case based on Chromium bug report 1511689. FossilOrigin-Name: 2c7ef4b4d215f99f8d6787adb64e2037ae96e5dd6cb49c8b81634249f5e1b328 * Enable SQLITE_STRICT_SUBTYPE for default builds of the shell, fuzzcheck, and testfixture. FossilOrigin-Name: 5a0c517ed7e46c0f8a3db752cf5b9f8010c60f35084606abe9e7c1c4f993b4a7 * Enhancements to the "randomjson.c" extension. Automatically load that extension into fuzzcheck. FossilOrigin-Name: 70620405ab01d6a5d38bafa9ae175fd6e4eabaf2efb7854734278dafd7b05c99 * Enhancements to ext/misc/randomjson.c. FossilOrigin-Name: a4e6d1f86f3a502e4170f5a90031e269e48363e95114a66b84d373e3ce0b2704 * Bug fix in the randomjson.c extension. FossilOrigin-Name: 1f3a33df530dbe330ea8b14a69369b807b413b25a167d1a3938f8f0faf97cc91 * Ensure that all object labels for individual objects generated by randomjson.c are unique. FossilOrigin-Name: 29c46aca231b3f1e997ef306a5a651408185bf3ad09ab9fc1fe21ed18caa4d02 * Add randomjson.c to testfixture. Use it for a new set of invariant tests against JSON functions. FossilOrigin-Name: f1c040606bfe784804134d8f3ca130908fad5212b47e3c32792baab977470943 * Ensure that the insert/delete size delta on JSONB objects in the JSON cache are always set to zero. FossilOrigin-Name: 4b4581668a908473dbf1322a3e98bc7cca122998c44518ea183af7f0d1ba9f95 * Fix JSON to JSONB translation so that it deals correctly with Infinity and NaN. FossilOrigin-Name: 178cb84f36bdb45ba17511900d6d8ea8dfa14912fc5bf7094a20348174a36c95 * Add NEVER() to an unfalsifiable branch. FossilOrigin-Name: 9a0c67db366d38a0b0741f6a1ae333cf27cfe6f6b7c6eed94bdec9686f9f9f8a * New JSON invariant test cases. FossilOrigin-Name: a6a1367b0bf364b1a2e20e153c5f4a578624b8846f9ec0b7c9c3cba0ea2ec346 * Remove a stray comment in the JSON code. FossilOrigin-Name: 6618bdf0679405b43911ea8cd94050b12a5dc469f3dfe4759ee3ff850a55229e * Extra ALWAYS() macros to verify state in the sqlite3ExprCanBeNull() routine. FossilOrigin-Name: be19b84c9f3fe127165809908add148dbe9a827a55608b0490de7e69b7f7f191 * Always make the sqlite_dbdata virtual table available in the CLI. FossilOrigin-Name: e5fd3b32ad87586a7413570e568c9c1859a37a4f836cca074126471b125fb682 * When unable to resolve an identifier, change the Expr node into TK_NULL rather than TK_COLUMN, to prevent any downstream misuse of the non-existent column. dbsqlfuzz 71869261db80a95e4733afa10ff5724bf3c78592. FossilOrigin-Name: d2e6117e4f97ab98b01deb5fcad5520f8181d00bed8d904d34963c01d73df857 * Test case for the previous check-in. FossilOrigin-Name: df5a07e1a5122e08c2fa6076ac08adb2820f997ee11dd88b84863666899dfb57 * Ignore COLLATE operators when determining whether the result of a subexpression should be shallow-copied or deep-copied. FossilOrigin-Name: 34ae36a45e814bed7c8340412c7ef3fc849b82357656d0eb5f0f805e59d846d0 * Add ALWAYS() and NEVER() on branches made unreachable by recent changes. FossilOrigin-Name: c50e6c2ace49d0928b05cbfd877c621e9a0f77dc4e056ccb1dbe5cf118a00d00 * More precise computation of the size of data structures in the query planner. Response to [forum:/forumpost/7d8685d49d|Forum post 7d8685d49d]. FossilOrigin-Name: 0c8d88e41167ea92341dd1129be01b596a73f46bdcd5b0dd931441a979c013d0 * Fix harmless compiler warning in the randomjson.c extension. FossilOrigin-Name: debe7060b16669ada7304ffb9bf7616c8fa30bd286d8be871ed17fd6d64a3d4c * On second thought, we don't really need sqlite_dbdata accessible to the CLI. FossilOrigin-Name: 36fe6a61ef8fb393281a5e15119d716521219c7b971fbfd63bdea07d27a78ac9 * Remove redundant conditional from sqlite3ExprCanBeNull(). FossilOrigin-Name: 257f96a2d22c605885fa66220c28cf7dc5941c330bccee3f132b9e7b70d89d30 * In JSON - minor code cleanup and refactoring with a small size reduction and performance increase. FossilOrigin-Name: 215fabda38daecdbd38b1eca5a6aafbc61b6a36a8303f1d7164d5a1138e63134 * Avoid harmless integer overflow in pager status statistics gathering. Response to [forum:/forumpost/7f4cdf23f9|forum post 7f4cdf23f9]. FossilOrigin-Name: 206d8c650d937bc700946c40a82a62ea6bc4a80e5f3fb42d0ae2968de25f0644 * Fix SQLITE_ENABLE_SETLK_TIMEOUT assert() statements in os_unix.c to avoid reading past the end of the unixShmNode.aMutex[] array. FossilOrigin-Name: 029a05cd2928d43d81e4549cce5388c432e2c9e75e3fa0b2fe6e91021b2fb9ac * Add internal core-developer-only documentation of the JSONB format. FossilOrigin-Name: 4d30478863b2a60512010de9ec6e3099bfaf75d4afee20acec536713fe94334d * Add a new comment to debugging output routine sqlite3WhereLoopPrint() to remind us of what the various fields of the debug output mean. No changes to code. FossilOrigin-Name: da5f34fd4052432b1ae27bb12e56b358cdc5c1282653d60ed0f0fe62f727e4ee * Fix a usan complaint about signed integer overflow. FossilOrigin-Name: e65907e0279f4814ec957f0790777d8b94a86926cd27c52442b311b27efc0185 * Update #ifdef checks in pager.c and util.c to account for [0462a2612d1fc1d0] to resolve the build problem reported in [forum:9819032aac|forum post 9819032aac]. FossilOrigin-Name: 0f22d809a1c6c80e381f6bcd931fe4ec36dca0e28d07ab4f4f7f83c813424f60 * Add the -fno-sanitize-recover=undefined to the sanitizer builds used for sdevtest and release testing. To ensure that any test that provokes undefined behaviour fails. FossilOrigin-Name: 89563311adb0ab7c7a3eadb11c2e27fbca50c56fce8ca616628facbc00d72b88 * Change parameters on a debugging function to include "const". FossilOrigin-Name: 94c3e1110c6590261bd30ba317fba4dd94023d69b81a94f4b216cce748fe7489 * Add debugging output routines sqlite3ShowWhereLoop(X) and sqlite3ShowWhereLoopList(X) that can be invoked from a debugger to show a summary of the content of a single WhereLoop object or a list of WhereLoop objects. No change in release builds. FossilOrigin-Name: 5db30bcc338aac1cf081de2deec7e60749ae012e2b6f95ccf745623adb4a31dc * Improvements to the query planner to address the inefficiency described by [forum/forumpost/2568d1f6e6|forum post 2568d1f6e6]. FossilOrigin-Name: 72fcc12cda910a0e3f7875eb3d117b2a5608705c97703985427a02960f1ab5c5 * Avoid signed integer overflow during integrity_check of FTS5. FossilOrigin-Name: 5937df3b25799eceaadfb04d7226c9995d44c8d8edb5ac3ad02af9d7e3570726 * Fix harmless compiler warnings associated with [5db30bcc338aac1c] FossilOrigin-Name: e55d1c2333f35fc20615aa83a7843d08cae7945710a2156d44eee0cc37d90ade * Remove an ALWAYS() added in [c50e6c2ace49d092] because it is sometimes false. dbsqlfuzz c393a4f783d42efd9552772110aff7e5d937f15e. FossilOrigin-Name: b9daf37e57cde12c4de271a2b1995e8e91b6411f8c2e8882e536241929609b3a * Improved handling of malformed unicode within JSON strings. FossilOrigin-Name: e252bdf5f5de26ba8e2bcc6b0ad94121ed6fc4d86c02fe4a2a058ada93747beb * Ensure that the xColumnText(), xQueryPhrase() and xPhraseFirstColumn() APIs all return SQLITE_RANGE if they are passed a bad column or phrase number. FossilOrigin-Name: 1a8a9b1c89519d265869251e8b6d3c5db733f0d3a7dea6c7962811a8f1157dff * Fix a problem in the shell tool (not library) causing an out-of-bounds write if an ".open" command failed, then the user pressed ctrl-c to interrupt a query running on the substitute in-memory database. FossilOrigin-Name: 026618b9e321576f616a32e41329066ba629814170c6cfeef35430343f5003f3 * Enhance the (undocumented, debug-only) json_parse() SQL function so that it returns the text rendering of the JSONB parse of the input, rather than printing the rendering on stdout. FossilOrigin-Name: 056de8d551dcbdf1d162e2db15ed418fa9c786f900cd3972ef8a1dea3f4f3aa1 * Fix harmless compiler warnings in FTS5. FossilOrigin-Name: 3cd5ef44e40570c357f913a9483fa1cd72e7f2827a5ed5826bff99febae213b1 * Performance improvement by unwinding a loop in jsonAppendString(). FossilOrigin-Name: 190ab3c08431a0ba24d76392eab251f5c1792add05e4ec780998b299208eca95 * Update fts5origintext4.test to work with SQLITE_DIRECT_OVERFLOW_READ. FossilOrigin-Name: 15ed002aed12556aeb9bbe537c4ba839f0c95bac65a69d03401b37cc3fd11b92 * Enable SQLITE_DIRECT_OVERFLOW_READ unless it is specifically disabled using the -DSQLITE_DIRECT_OVERFLOW_READ=0 compile-time option. FossilOrigin-Name: 630604a4e604bfb36c31602917bfa8d42c10c82966d0819932bf8f827b9158b8 * Minor doc touchup in the JS bits. FossilOrigin-Name: 8d2120c35425081e2158d6a8a6b083c4adf8d694046b2d98f5fd235520920432 * Use SQLITE_ENABLE_STAT4 in both the WASM and JNI builds. FossilOrigin-Name: 99d11e6d0ae687ff6bac5119027f7b04d5e7185214e79cf8c56289cfa809b0f9 * WASM: various build cleanups and add initial infrastructure for a build which elides the oo1 API and its dependents (worker1 and promiser). Sidebar: an attempt was made to move generation of the build rules to an external script, but the mixed-mode make/script was even less legible than the $(eval) indirection going on in the makefile. FossilOrigin-Name: 563d313163c02b398ae85b7c2ed231019a14e006726f09a7c1f294a58bf4363f * JNI: move the ByteBuffer-using APIs from public to package visibility for the time being because they have UB-inducing possibilities which need to be worked out. Update test code to account for a change in custom FTS5 columntext() impls. FossilOrigin-Name: dc501275fcfab3ad9b6ebbadf7588b225a9dd07a0abac5be83d96f15bfba99e9 * Extra steps taken to avoid using low-quality indexes in a query plan. This branch accomplishes the same end as the nearby enhanced-stat1 branch, but with much less change and hence less risk. FossilOrigin-Name: c030e646262fee43a59b45fdc1630d972f8bf88ac3c142b6bdaf4cbb36695a4f * Remove some unnecessary computations from ANALYZE so that ANALYZE runs with fewer CPU cycles. These changes were spotted while working on the nearby enhanced-stat1 branch. So even if enhanced-stat1 is abandoned, that effort put into it will not have been in vain. FossilOrigin-Name: 5527e8c4abb904b1a438ec1c353d4a960bf82faaf3a2c742af1df7c613850441 * Back out [99d11e6d0ae6] (enabling of STAT4 in WASM/JNI), per /chat discussion. FossilOrigin-Name: cd7929ee2e2c305475fa5a4dff2edaccf90067126ef04a1c2714cf464925453f * Update and clean up the in-makefile docs for ext/wasm. FossilOrigin-Name: 7a7b295e6d7e95ee4a46cc42761895d11700ab295870c5a4380072bb4a5b7099 * Elaborate on the various build flavors used by ext/wasm/. Doc changes only. FossilOrigin-Name: d489232aa492618d4c8e5817addb2323d0ca067742d7140216914239a66fb221 * Increase the default "max_page_count" to its theoretical maximum of 4294967294. FossilOrigin-Name: ffb35f1784a4305b979a850485f57f56938104a3a03f4a7aececde92864c4879 * Fix a problem in fts5 caused by a COMMIT involving fts5 data that immediately follows a ROLLBACK TO that does not. FossilOrigin-Name: 55c61f6a8d6a1bc79497b05669beac5c5397b06382bf24b6bec54845962d219b * Adjust the sqlite3PagerDirectReadOk() routine (part of the SQLITE_DIRECT_OVERFLOW_READ optimization) to use less code and to be more easily testable. FossilOrigin-Name: eed670ea2a9424f7df4eeb01c152fc38f7190a5e39aa891651b28dc91fcdc019 * Back out [b517a52fa36df0a0] which is no longer reachable due to early error detection enhancements in [166e82dd20efbfd3]. FossilOrigin-Name: 704943e96f2620b99260667ac9922c2f72bc3e92e2dfe1d9c2a91c7b704564d9 * Update the sqldiff.exe utility program so that it uses the sqlite3_str string interface, and so that it does console output using the ext/consio extension. FossilOrigin-Name: 4443b7e592da97d1cb1b3b79ed0559452d8057a33aba4d184c2fffbf200e05f5 * Enhance sqlite3_analyzer.exe so that it uses the ext/consio extension. FossilOrigin-Name: 769de0b98e136e4a0945b80216d0c9583c1ccd9de69cb0494875c2300e172646 * Change a constant from decimal to hex to avoid a compiler warning on Mac. FossilOrigin-Name: e3acb8a43ad544fd5b5341058276bd3b61b6bdb6b719790476a90e0de4320f90 * Convert the JSON functions to use lookaside memory allocation whenever feasible, to avoid hitting the global memory allocator mutex. FossilOrigin-Name: a79a244954f728596da3c0e28fa3b887258d1bd831f53881970f418f3fba84c7 * Fix a #ifdef in sqlite3_test_control() that was preventing builds with SQLITE_OMIT_WSD. FossilOrigin-Name: d546a9c94caf7408cc6e4530ec190d3a13fae09dc15b71b03d6369e02ee62abd * Restructure some code to fix what appears to be a false-positive UBSAN warning. FossilOrigin-Name: fe952c12903ea2150880c8bb57cda2efc00ce9fa801568a68c619e0745f30567 * Avoid errors with SQLITE_OMIT_VIRTUALTABLE builds in json106.test and unionall.test. FossilOrigin-Name: 90e8a233549a2d31e6959ce3fec927693b772ab3c0abce65e81d7350d2ca5cc6 * Update extension ext/misc/totext.c to avoid both ubsan warnings and dubious real->integer conversions. FossilOrigin-Name: c626aa108a7a30cef54af8d93ac9e45749568ed38e4e06623a6bad6b4bf6e8ec * Update JSON performance testing procedures for clarity and to describe how to do performance testing of JSONB. FossilOrigin-Name: b115b4f75bc7c4e6d9bab5edf13297f27a36f30083c80d2c502b01208da5dfc0 * Ensure that SQLITE_PROTOCOL is not returned too early when a SQLITE_ENABLE_SETLK_TIMEOUT build fails to open a transaction on a wal mode database in cases where blocking locks are not being used. FossilOrigin-Name: b934a33671d8a0190082ad7e5e68c78fe0c558d102404eafc1de26e4e7d65b92 * Updates to RTREE to facility testing. FossilOrigin-Name: 7a5b42ff74882c58493dc8b710fde73d4ff251f5d42271d84be73ceaabc01698 * Remove an ALWAYS() from RTREE. Dbsqlfuzz found a way to make it false. FossilOrigin-Name: 40f0a29e6dd90fcb969d7c0e49728ba0ee8f31d9e8f502b9a21469620a8ad283 * Minor change to os_unix.c to facilitate 100% MC/DC testing. FossilOrigin-Name: 0dfa7b4da134db281c3c4eddb4569c53a450f955f0af2f410e13db801aff4ea2 * Automatically turn off DEFENSIVE mode in the shell tool when executing scripts generated by the ".dump" command against an empty database. Add a warning to the top of generated ".dump" scripts that populate virtual tables. FossilOrigin-Name: 6e9e96b7e7afb9420110f4b93d10b945c9eadfde5e9c81e59ae9ee8167e75707 * Fix date on new file shell9.test. FossilOrigin-Name: c82da712113d5dcd63b764dbc68842026989627abc840acb4a33f3a4972b832a * Improved resolution of unqualified names in the REINDEX command. [forum:/info/74cd0ceabd|Forum thread 74cd0ceabd]. FossilOrigin-Name: 97709ce2a1f5ae05495e412ca27108048e5b8a63a1e3bca4be13933f7527da7b * Put an SQLITE_ENABLE_SETLK_TIMEOUT branch inside the appropriate ifdef with an assert on the else since the condition is always false if SETLK_TIMEOUT is not available. FossilOrigin-Name: d81e7a036ac5d70b6a6ee6ab7d81e041c1f5fc04b70bcee47e203d521caf7e93 * In fts5, flush the contents of the in-memory hash table whenever the secure-delete option is toggled. This prevents spurious corruption reports under some circumstances. FossilOrigin-Name: ccf552319a62bfb329820a3bc1f490bacbaa6e90694a257fc65a568a605542c3 * Fix a comment in sessions. No functional changes. [forum:/forumpost/8c20dc935b|Forum post 8c20dc935b]. FossilOrigin-Name: b0eb6d3628c1f70399a22d9fd3b79a796bc343adfeba50515440db609565961a * Have the shell tool automatically enable SQLITE_CONFIG_DQS_DDL when executing a ".dump" script against an empty db. FossilOrigin-Name: f47a5f4e0ce078e6cc1183e6cbb3c4013af379b496efae94863a42e5c39928ed * Version 3.45.0 FossilOrigin-Name: 1066602b2b1976fe58b5150777cced894af17c803e068f5918390d6915b46e1d * wasm build: reformulate an awk invocation to account for awks which do not support the -e flag. Problem reported on the forum via a docker-hosted build. FossilOrigin-Name: 90dd51153fd0a6197e2ee49b5492ad120f0bfc324b60651f3d4f47c286887b46 * When backing out a character in a constructed string in JSON, first make sure the string has not been reset by on OOM. FossilOrigin-Name: 950bf9fe7829864e0abe6d71ca0495f346feb5d7943d76c95e55a6b86ea855da * Ensure that the xIntegrity methods of fts3 and fts5 work on read-only databases. FossilOrigin-Name: e79b97369fa740f62f695057d4a2cf8dae48a683982ec879f04a19039c9cb418 * When a JSON input is a blob, but it looks like valid JSON when cast to text, then accept it as valid JSON. This replicates a long-standing bug in the behavior of JSON routines, and thus avoids breaking legacy apps. FossilOrigin-Name: 4c2c1b97dce46a279846380c937ac6de5c367927c6843516641eead7ea6db472 * Bump the version number to 3.45.1 FossilOrigin-Name: 54d34edb89430b266221b7e6eea0afbd2c9dafbe774344469473abc8ad1e13fd * Fix harmless "unused parameter" compiler warning in the new fts3IntegrityMethod implementation. FossilOrigin-Name: 9d459f6b50fb6f995e6284a0815c5e211cacac44aad0b96bf01ba68af97f51fc * In os_unix.c and os_win.c, do not allow xFetch() to return a pointer to a page buffer that is right at the end of the mapped region - if the database is corrupted in a specific way such a page buffer might be overread by several bytes. FossilOrigin-Name: d131cab652ac11795322af13d0b330e7e44ab91587a1a3e73fe7b9a14b2dd531 * Slight adjustment to test results for Windows in mmap1.test due to the previous check-in. FossilOrigin-Name: a8043eaed899285b5cf4aab0c23c3dabb8975910c353cb579fd1f1655db390f6 * Apply the same fix found in [99057383acc8f920] to descending scans. FossilOrigin-Name: 593d6a1c2e9256d797f160e867278414e882a3d04d7fea269bea86965eaa7576 * Automatically disable the DISTINCT optimization during query planning if the ORDER BY clause exceeds 63 terms. FossilOrigin-Name: 6edbdcc02d18727f68f0236e15dde4ecfc77e6f452b522eb4e1e895929b1fb63 * When rendering JSONB back into text JSON, report an error if a zero-length integer or floating-point node is encountered. Otherwise, if the node occurs at the very end of the JSONB, the rendering logic might read one byte past the end of the initialized part of the BLOB byte array. OSSFuzz 66284. FossilOrigin-Name: 3ab08ac75d97ffd9920f5c924362a4819560b40faa8a4f9100068057f5fa420a * Avoid a potential buffer overread when handling corrupt json blobs. FossilOrigin-Name: ac402cc551b2cbe3f8fbbc9c711a04942eab5eeb9d2f4a394e9370d2380427b5 * Detect malformed nested JSONB earlier and stop rendering to avoid long delays. FossilOrigin-Name: ab40e282465c989bf249453d7c6f60072a38b691f579411cdf9aad234b20f0f7 * Version 3.45.1 FossilOrigin-Name: e876e51a0ed5c5b3126f52e532044363a014bc594cfefa87ffb5b82257cc467a --------- Co-authored-by: drh <> Co-authored-by: dan <Dan Kennedy> Co-authored-by: stephan <stephan@noemail.net> Co-authored-by: larrybr <larrybr@noemail.net>
9067 lines
277 KiB
C
9067 lines
277 KiB
C
/*
|
|
** 2014 May 31
|
|
**
|
|
** The author disclaims copyright to this source code. In place of
|
|
** a legal notice, here is a blessing:
|
|
**
|
|
** May you do good and not evil.
|
|
** May you find forgiveness for yourself and forgive others.
|
|
** May you share freely, never taking more than you give.
|
|
**
|
|
******************************************************************************
|
|
**
|
|
** Low level access to the FTS index stored in the database file. The
|
|
** routines in this file file implement all read and write access to the
|
|
** %_data table. Other parts of the system access this functionality via
|
|
** the interface defined in fts5Int.h.
|
|
*/
|
|
|
|
|
|
#include "fts5Int.h"
|
|
|
|
/*
|
|
** Overview:
|
|
**
|
|
** The %_data table contains all the FTS indexes for an FTS5 virtual table.
|
|
** As well as the main term index, there may be up to 31 prefix indexes.
|
|
** The format is similar to FTS3/4, except that:
|
|
**
|
|
** * all segment b-tree leaf data is stored in fixed size page records
|
|
** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
|
|
** taken to ensure it is possible to iterate in either direction through
|
|
** the entries in a doclist, or to seek to a specific entry within a
|
|
** doclist, without loading it into memory.
|
|
**
|
|
** * large doclists that span many pages have associated "doclist index"
|
|
** records that contain a copy of the first rowid on each page spanned by
|
|
** the doclist. This is used to speed up seek operations, and merges of
|
|
** large doclists with very small doclists.
|
|
**
|
|
** * extra fields in the "structure record" record the state of ongoing
|
|
** incremental merge operations.
|
|
**
|
|
*/
|
|
|
|
|
|
#define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */
|
|
#define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
|
|
|
|
#define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
|
|
|
|
#define FTS5_MAIN_PREFIX '0'
|
|
|
|
#if FTS5_MAX_PREFIX_INDEXES > 31
|
|
# error "FTS5_MAX_PREFIX_INDEXES is too large"
|
|
#endif
|
|
|
|
#define FTS5_MAX_LEVEL 64
|
|
|
|
/*
|
|
** There are two versions of the format used for the structure record:
|
|
**
|
|
** 1. the legacy format, that may be read by all fts5 versions, and
|
|
**
|
|
** 2. the V2 format, which is used by contentless_delete=1 databases.
|
|
**
|
|
** Both begin with a 4-byte "configuration cookie" value. Then, a legacy
|
|
** format structure record contains a varint - the number of levels in
|
|
** the structure. Whereas a V2 structure record contains the constant
|
|
** 4 bytes [0xff 0x00 0x00 0x01]. This is unambiguous as the value of a
|
|
** varint has to be at least 16256 to begin with "0xFF". And the default
|
|
** maximum number of levels is 64.
|
|
**
|
|
** See below for more on structure record formats.
|
|
*/
|
|
#define FTS5_STRUCTURE_V2 "\xFF\x00\x00\x01"
|
|
|
|
/*
|
|
** Details:
|
|
**
|
|
** The %_data table managed by this module,
|
|
**
|
|
** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
|
|
**
|
|
** , contains the following 6 types of records. See the comments surrounding
|
|
** the FTS5_*_ROWID macros below for a description of how %_data rowids are
|
|
** assigned to each fo them.
|
|
**
|
|
** 1. Structure Records:
|
|
**
|
|
** The set of segments that make up an index - the index structure - are
|
|
** recorded in a single record within the %_data table. The record consists
|
|
** of a single 32-bit configuration cookie value followed by a list of
|
|
** SQLite varints.
|
|
**
|
|
** If the structure record is a V2 record, the configuration cookie is
|
|
** followed by the following 4 bytes: [0xFF 0x00 0x00 0x01].
|
|
**
|
|
** Next, the record continues with three varints:
|
|
**
|
|
** + number of levels,
|
|
** + total number of segments on all levels,
|
|
** + value of write counter.
|
|
**
|
|
** Then, for each level from 0 to nMax:
|
|
**
|
|
** + number of input segments in ongoing merge.
|
|
** + total number of segments in level.
|
|
** + for each segment from oldest to newest:
|
|
** + segment id (always > 0)
|
|
** + first leaf page number (often 1, always greater than 0)
|
|
** + final leaf page number
|
|
**
|
|
** Then, for V2 structures only:
|
|
**
|
|
** + lower origin counter value,
|
|
** + upper origin counter value,
|
|
** + the number of tombstone hash pages.
|
|
**
|
|
** 2. The Averages Record:
|
|
**
|
|
** A single record within the %_data table. The data is a list of varints.
|
|
** The first value is the number of rows in the index. Then, for each column
|
|
** from left to right, the total number of tokens in the column for all
|
|
** rows of the table.
|
|
**
|
|
** 3. Segment leaves:
|
|
**
|
|
** TERM/DOCLIST FORMAT:
|
|
**
|
|
** Most of each segment leaf is taken up by term/doclist data. The
|
|
** general format of term/doclist, starting with the first term
|
|
** on the leaf page, is:
|
|
**
|
|
** varint : size of first term
|
|
** blob: first term data
|
|
** doclist: first doclist
|
|
** zero-or-more {
|
|
** varint: number of bytes in common with previous term
|
|
** varint: number of bytes of new term data (nNew)
|
|
** blob: nNew bytes of new term data
|
|
** doclist: next doclist
|
|
** }
|
|
**
|
|
** doclist format:
|
|
**
|
|
** varint: first rowid
|
|
** poslist: first poslist
|
|
** zero-or-more {
|
|
** varint: rowid delta (always > 0)
|
|
** poslist: next poslist
|
|
** }
|
|
**
|
|
** poslist format:
|
|
**
|
|
** varint: size of poslist in bytes multiplied by 2, not including
|
|
** this field. Plus 1 if this entry carries the "delete" flag.
|
|
** collist: collist for column 0
|
|
** zero-or-more {
|
|
** 0x01 byte
|
|
** varint: column number (I)
|
|
** collist: collist for column I
|
|
** }
|
|
**
|
|
** collist format:
|
|
**
|
|
** varint: first offset + 2
|
|
** zero-or-more {
|
|
** varint: offset delta + 2
|
|
** }
|
|
**
|
|
** PAGE FORMAT
|
|
**
|
|
** Each leaf page begins with a 4-byte header containing 2 16-bit
|
|
** unsigned integer fields in big-endian format. They are:
|
|
**
|
|
** * The byte offset of the first rowid on the page, if it exists
|
|
** and occurs before the first term (otherwise 0).
|
|
**
|
|
** * The byte offset of the start of the page footer. If the page
|
|
** footer is 0 bytes in size, then this field is the same as the
|
|
** size of the leaf page in bytes.
|
|
**
|
|
** The page footer consists of a single varint for each term located
|
|
** on the page. Each varint is the byte offset of the current term
|
|
** within the page, delta-compressed against the previous value. In
|
|
** other words, the first varint in the footer is the byte offset of
|
|
** the first term, the second is the byte offset of the second less that
|
|
** of the first, and so on.
|
|
**
|
|
** The term/doclist format described above is accurate if the entire
|
|
** term/doclist data fits on a single leaf page. If this is not the case,
|
|
** the format is changed in two ways:
|
|
**
|
|
** + if the first rowid on a page occurs before the first term, it
|
|
** is stored as a literal value:
|
|
**
|
|
** varint: first rowid
|
|
**
|
|
** + the first term on each page is stored in the same way as the
|
|
** very first term of the segment:
|
|
**
|
|
** varint : size of first term
|
|
** blob: first term data
|
|
**
|
|
** 5. Segment doclist indexes:
|
|
**
|
|
** Doclist indexes are themselves b-trees, however they usually consist of
|
|
** a single leaf record only. The format of each doclist index leaf page
|
|
** is:
|
|
**
|
|
** * Flags byte. Bits are:
|
|
** 0x01: Clear if leaf is also the root page, otherwise set.
|
|
**
|
|
** * Page number of fts index leaf page. As a varint.
|
|
**
|
|
** * First rowid on page indicated by previous field. As a varint.
|
|
**
|
|
** * A list of varints, one for each subsequent termless page. A
|
|
** positive delta if the termless page contains at least one rowid,
|
|
** or an 0x00 byte otherwise.
|
|
**
|
|
** Internal doclist index nodes are:
|
|
**
|
|
** * Flags byte. Bits are:
|
|
** 0x01: Clear for root page, otherwise set.
|
|
**
|
|
** * Page number of first child page. As a varint.
|
|
**
|
|
** * Copy of first rowid on page indicated by previous field. As a varint.
|
|
**
|
|
** * A list of delta-encoded varints - the first rowid on each subsequent
|
|
** child page.
|
|
**
|
|
** 6. Tombstone Hash Page
|
|
**
|
|
** These records are only ever present in contentless_delete=1 tables.
|
|
** There are zero or more of these associated with each segment. They
|
|
** are used to store the tombstone rowids for rows contained in the
|
|
** associated segments.
|
|
**
|
|
** The set of nHashPg tombstone hash pages associated with a single
|
|
** segment together form a single hash table containing tombstone rowids.
|
|
** To find the page of the hash on which a key might be stored:
|
|
**
|
|
** iPg = (rowid % nHashPg)
|
|
**
|
|
** Then, within page iPg, which has nSlot slots:
|
|
**
|
|
** iSlot = (rowid / nHashPg) % nSlot
|
|
**
|
|
** Each tombstone hash page begins with an 8 byte header:
|
|
**
|
|
** 1-byte: Key-size (the size in bytes of each slot). Either 4 or 8.
|
|
** 1-byte: rowid-0-tombstone flag. This flag is only valid on the
|
|
** first tombstone hash page for each segment (iPg=0). If set,
|
|
** the hash table contains rowid 0. If clear, it does not.
|
|
** Rowid 0 is handled specially.
|
|
** 2-bytes: unused.
|
|
** 4-bytes: Big-endian integer containing number of entries on page.
|
|
**
|
|
** Following this are nSlot 4 or 8 byte slots (depending on the key-size
|
|
** in the first byte of the page header). The number of slots may be
|
|
** determined based on the size of the page record and the key-size:
|
|
**
|
|
** nSlot = (nByte - 8) / key-size
|
|
*/
|
|
|
|
/*
|
|
** Rowids for the averages and structure records in the %_data table.
|
|
*/
|
|
#define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */
|
|
#define FTS5_STRUCTURE_ROWID 10 /* The structure record */
|
|
|
|
/*
|
|
** Macros determining the rowids used by segment leaves and dlidx leaves
|
|
** and nodes. All nodes and leaves are stored in the %_data table with large
|
|
** positive rowids.
|
|
**
|
|
** Each segment has a unique non-zero 16-bit id.
|
|
**
|
|
** The rowid for each segment leaf is found by passing the segment id and
|
|
** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
|
|
** sequentially starting from 1.
|
|
*/
|
|
#define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
|
|
#define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
|
|
#define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
|
|
#define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
|
|
|
|
#define fts5_dri(segid, dlidx, height, pgno) ( \
|
|
((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \
|
|
((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \
|
|
((i64)(height) << (FTS5_DATA_PAGE_B)) + \
|
|
((i64)(pgno)) \
|
|
)
|
|
|
|
#define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
|
|
#define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
|
|
#define FTS5_TOMBSTONE_ROWID(segid,ipg) fts5_dri(segid+(1<<16), 0, 0, ipg)
|
|
|
|
#ifdef SQLITE_DEBUG
|
|
int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
|
|
#endif
|
|
|
|
|
|
/*
|
|
** Each time a blob is read from the %_data table, it is padded with this
|
|
** many zero bytes. This makes it easier to decode the various record formats
|
|
** without overreading if the records are corrupt.
|
|
*/
|
|
#define FTS5_DATA_ZERO_PADDING 8
|
|
#define FTS5_DATA_PADDING 20
|
|
|
|
typedef struct Fts5Data Fts5Data;
|
|
typedef struct Fts5DlidxIter Fts5DlidxIter;
|
|
typedef struct Fts5DlidxLvl Fts5DlidxLvl;
|
|
typedef struct Fts5DlidxWriter Fts5DlidxWriter;
|
|
typedef struct Fts5Iter Fts5Iter;
|
|
typedef struct Fts5PageWriter Fts5PageWriter;
|
|
typedef struct Fts5SegIter Fts5SegIter;
|
|
typedef struct Fts5DoclistIter Fts5DoclistIter;
|
|
typedef struct Fts5SegWriter Fts5SegWriter;
|
|
typedef struct Fts5Structure Fts5Structure;
|
|
typedef struct Fts5StructureLevel Fts5StructureLevel;
|
|
typedef struct Fts5StructureSegment Fts5StructureSegment;
|
|
typedef struct Fts5TokenDataIter Fts5TokenDataIter;
|
|
typedef struct Fts5TokenDataMap Fts5TokenDataMap;
|
|
typedef struct Fts5TombstoneArray Fts5TombstoneArray;
|
|
|
|
struct Fts5Data {
|
|
u8 *p; /* Pointer to buffer containing record */
|
|
int nn; /* Size of record in bytes */
|
|
int szLeaf; /* Size of leaf without page-index */
|
|
};
|
|
|
|
/*
|
|
** One object per %_data table.
|
|
**
|
|
** nContentlessDelete:
|
|
** The number of contentless delete operations since the most recent
|
|
** call to fts5IndexFlush() or fts5IndexDiscardData(). This is tracked
|
|
** so that extra auto-merge work can be done by fts5IndexFlush() to
|
|
** account for the delete operations.
|
|
*/
|
|
struct Fts5Index {
|
|
Fts5Config *pConfig; /* Virtual table configuration */
|
|
char *zDataTbl; /* Name of %_data table */
|
|
int nWorkUnit; /* Leaf pages in a "unit" of work */
|
|
|
|
/*
|
|
** Variables related to the accumulation of tokens and doclists within the
|
|
** in-memory hash tables before they are flushed to disk.
|
|
*/
|
|
Fts5Hash *pHash; /* Hash table for in-memory data */
|
|
int nPendingData; /* Current bytes of pending data */
|
|
i64 iWriteRowid; /* Rowid for current doc being written */
|
|
int bDelete; /* Current write is a delete */
|
|
int nContentlessDelete; /* Number of contentless delete ops */
|
|
int nPendingRow; /* Number of INSERT in hash table */
|
|
|
|
/* Error state. */
|
|
int rc; /* Current error code */
|
|
int flushRc;
|
|
|
|
/* State used by the fts5DataXXX() functions. */
|
|
sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
|
|
sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
|
|
sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
|
|
sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
|
|
sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */
|
|
sqlite3_stmt *pIdxSelect;
|
|
sqlite3_stmt *pIdxNextSelect;
|
|
int nRead; /* Total number of blocks read */
|
|
|
|
sqlite3_stmt *pDeleteFromIdx;
|
|
|
|
sqlite3_stmt *pDataVersion;
|
|
i64 iStructVersion; /* data_version when pStruct read */
|
|
Fts5Structure *pStruct; /* Current db structure (or NULL) */
|
|
};
|
|
|
|
struct Fts5DoclistIter {
|
|
u8 *aEof; /* Pointer to 1 byte past end of doclist */
|
|
|
|
/* Output variables. aPoslist==0 at EOF */
|
|
i64 iRowid;
|
|
u8 *aPoslist;
|
|
int nPoslist;
|
|
int nSize;
|
|
};
|
|
|
|
/*
|
|
** The contents of the "structure" record for each index are represented
|
|
** using an Fts5Structure record in memory. Which uses instances of the
|
|
** other Fts5StructureXXX types as components.
|
|
**
|
|
** nOriginCntr:
|
|
** This value is set to non-zero for structure records created for
|
|
** contentlessdelete=1 tables only. In that case it represents the
|
|
** origin value to apply to the next top-level segment created.
|
|
*/
|
|
struct Fts5StructureSegment {
|
|
int iSegid; /* Segment id */
|
|
int pgnoFirst; /* First leaf page number in segment */
|
|
int pgnoLast; /* Last leaf page number in segment */
|
|
|
|
/* contentlessdelete=1 tables only: */
|
|
u64 iOrigin1;
|
|
u64 iOrigin2;
|
|
int nPgTombstone; /* Number of tombstone hash table pages */
|
|
u64 nEntryTombstone; /* Number of tombstone entries that "count" */
|
|
u64 nEntry; /* Number of rows in this segment */
|
|
};
|
|
struct Fts5StructureLevel {
|
|
int nMerge; /* Number of segments in incr-merge */
|
|
int nSeg; /* Total number of segments on level */
|
|
Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */
|
|
};
|
|
struct Fts5Structure {
|
|
int nRef; /* Object reference count */
|
|
u64 nWriteCounter; /* Total leaves written to level 0 */
|
|
u64 nOriginCntr; /* Origin value for next top-level segment */
|
|
int nSegment; /* Total segments in this structure */
|
|
int nLevel; /* Number of levels in this index */
|
|
Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */
|
|
};
|
|
|
|
/*
|
|
** An object of type Fts5SegWriter is used to write to segments.
|
|
*/
|
|
struct Fts5PageWriter {
|
|
int pgno; /* Page number for this page */
|
|
int iPrevPgidx; /* Previous value written into pgidx */
|
|
Fts5Buffer buf; /* Buffer containing leaf data */
|
|
Fts5Buffer pgidx; /* Buffer containing page-index */
|
|
Fts5Buffer term; /* Buffer containing previous term on page */
|
|
};
|
|
struct Fts5DlidxWriter {
|
|
int pgno; /* Page number for this page */
|
|
int bPrevValid; /* True if iPrev is valid */
|
|
i64 iPrev; /* Previous rowid value written to page */
|
|
Fts5Buffer buf; /* Buffer containing page data */
|
|
};
|
|
struct Fts5SegWriter {
|
|
int iSegid; /* Segid to write to */
|
|
Fts5PageWriter writer; /* PageWriter object */
|
|
i64 iPrevRowid; /* Previous rowid written to current leaf */
|
|
u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
|
|
u8 bFirstRowidInPage; /* True if next rowid is first in page */
|
|
/* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
|
|
u8 bFirstTermInPage; /* True if next term will be first in leaf */
|
|
int nLeafWritten; /* Number of leaf pages written */
|
|
int nEmpty; /* Number of contiguous term-less nodes */
|
|
|
|
int nDlidx; /* Allocated size of aDlidx[] array */
|
|
Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
|
|
|
|
/* Values to insert into the %_idx table */
|
|
Fts5Buffer btterm; /* Next term to insert into %_idx table */
|
|
int iBtPage; /* Page number corresponding to btterm */
|
|
};
|
|
|
|
typedef struct Fts5CResult Fts5CResult;
|
|
struct Fts5CResult {
|
|
u16 iFirst; /* aSeg[] index of firstest iterator */
|
|
u8 bTermEq; /* True if the terms are equal */
|
|
};
|
|
|
|
/*
|
|
** Object for iterating through a single segment, visiting each term/rowid
|
|
** pair in the segment.
|
|
**
|
|
** pSeg:
|
|
** The segment to iterate through.
|
|
**
|
|
** iLeafPgno:
|
|
** Current leaf page number within segment.
|
|
**
|
|
** iLeafOffset:
|
|
** Byte offset within the current leaf that is the first byte of the
|
|
** position list data (one byte passed the position-list size field).
|
|
**
|
|
** pLeaf:
|
|
** Buffer containing current leaf page data. Set to NULL at EOF.
|
|
**
|
|
** iTermLeafPgno, iTermLeafOffset:
|
|
** Leaf page number containing the last term read from the segment. And
|
|
** the offset immediately following the term data.
|
|
**
|
|
** flags:
|
|
** Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
|
|
**
|
|
** FTS5_SEGITER_ONETERM:
|
|
** If set, set the iterator to point to EOF after the current doclist
|
|
** has been exhausted. Do not proceed to the next term in the segment.
|
|
**
|
|
** FTS5_SEGITER_REVERSE:
|
|
** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
|
|
** it is set, iterate through rowid in descending order instead of the
|
|
** default ascending order.
|
|
**
|
|
** iRowidOffset/nRowidOffset/aRowidOffset:
|
|
** These are used if the FTS5_SEGITER_REVERSE flag is set.
|
|
**
|
|
** For each rowid on the page corresponding to the current term, the
|
|
** corresponding aRowidOffset[] entry is set to the byte offset of the
|
|
** start of the "position-list-size" field within the page.
|
|
**
|
|
** iTermIdx:
|
|
** Index of current term on iTermLeafPgno.
|
|
**
|
|
** apTombstone/nTombstone:
|
|
** These are used for contentless_delete=1 tables only. When the cursor
|
|
** is first allocated, the apTombstone[] array is allocated so that it
|
|
** is large enough for all tombstones hash pages associated with the
|
|
** segment. The pages themselves are loaded lazily from the database as
|
|
** they are required.
|
|
*/
|
|
struct Fts5SegIter {
|
|
Fts5StructureSegment *pSeg; /* Segment to iterate through */
|
|
int flags; /* Mask of configuration flags */
|
|
int iLeafPgno; /* Current leaf page number */
|
|
Fts5Data *pLeaf; /* Current leaf data */
|
|
Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
|
|
i64 iLeafOffset; /* Byte offset within current leaf */
|
|
Fts5TombstoneArray *pTombArray; /* Array of tombstone pages */
|
|
|
|
/* Next method */
|
|
void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
|
|
|
|
/* The page and offset from which the current term was read. The offset
|
|
** is the offset of the first rowid in the current doclist. */
|
|
int iTermLeafPgno;
|
|
int iTermLeafOffset;
|
|
|
|
int iPgidxOff; /* Next offset in pgidx */
|
|
int iEndofDoclist;
|
|
|
|
/* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
|
|
int iRowidOffset; /* Current entry in aRowidOffset[] */
|
|
int nRowidOffset; /* Allocated size of aRowidOffset[] array */
|
|
int *aRowidOffset; /* Array of offset to rowid fields */
|
|
|
|
Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
|
|
|
|
/* Variables populated based on current entry. */
|
|
Fts5Buffer term; /* Current term */
|
|
i64 iRowid; /* Current rowid */
|
|
int nPos; /* Number of bytes in current position list */
|
|
u8 bDel; /* True if the delete flag is set */
|
|
};
|
|
|
|
/*
|
|
** Array of tombstone pages. Reference counted.
|
|
*/
|
|
struct Fts5TombstoneArray {
|
|
int nRef; /* Number of pointers to this object */
|
|
int nTombstone;
|
|
Fts5Data *apTombstone[1]; /* Array of tombstone pages */
|
|
};
|
|
|
|
/*
|
|
** Argument is a pointer to an Fts5Data structure that contains a
|
|
** leaf page.
|
|
*/
|
|
#define ASSERT_SZLEAF_OK(x) assert( \
|
|
(x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
|
|
)
|
|
|
|
#define FTS5_SEGITER_ONETERM 0x01
|
|
#define FTS5_SEGITER_REVERSE 0x02
|
|
|
|
/*
|
|
** Argument is a pointer to an Fts5Data structure that contains a leaf
|
|
** page. This macro evaluates to true if the leaf contains no terms, or
|
|
** false if it contains at least one term.
|
|
*/
|
|
#define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
|
|
|
|
#define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
|
|
|
|
#define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
|
|
|
|
/*
|
|
** Object for iterating through the merged results of one or more segments,
|
|
** visiting each term/rowid pair in the merged data.
|
|
**
|
|
** nSeg is always a power of two greater than or equal to the number of
|
|
** segments that this object is merging data from. Both the aSeg[] and
|
|
** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
|
|
** with zeroed objects - these are handled as if they were iterators opened
|
|
** on empty segments.
|
|
**
|
|
** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
|
|
** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
|
|
** comparison in this context is the index of the iterator that currently
|
|
** points to the smaller term/rowid combination. Iterators at EOF are
|
|
** considered to be greater than all other iterators.
|
|
**
|
|
** aFirst[1] contains the index in aSeg[] of the iterator that points to
|
|
** the smallest key overall. aFirst[0] is unused.
|
|
**
|
|
** poslist:
|
|
** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
|
|
** There is no way to tell if this is populated or not.
|
|
**
|
|
** pColset:
|
|
** If not NULL, points to an object containing a set of column indices.
|
|
** Only matches that occur in one of these columns will be returned.
|
|
** The Fts5Iter does not own the Fts5Colset object, and so it is not
|
|
** freed when the iterator is closed - it is owned by the upper layer.
|
|
*/
|
|
struct Fts5Iter {
|
|
Fts5IndexIter base; /* Base class containing output vars */
|
|
Fts5TokenDataIter *pTokenDataIter;
|
|
|
|
Fts5Index *pIndex; /* Index that owns this iterator */
|
|
Fts5Buffer poslist; /* Buffer containing current poslist */
|
|
Fts5Colset *pColset; /* Restrict matches to these columns */
|
|
|
|
/* Invoked to set output variables. */
|
|
void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
|
|
|
|
int nSeg; /* Size of aSeg[] array */
|
|
int bRev; /* True to iterate in reverse order */
|
|
u8 bSkipEmpty; /* True to skip deleted entries */
|
|
|
|
i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
|
|
Fts5CResult *aFirst; /* Current merge state (see above) */
|
|
Fts5SegIter aSeg[1]; /* Array of segment iterators */
|
|
};
|
|
|
|
/*
|
|
** An instance of the following type is used to iterate through the contents
|
|
** of a doclist-index record.
|
|
**
|
|
** pData:
|
|
** Record containing the doclist-index data.
|
|
**
|
|
** bEof:
|
|
** Set to true once iterator has reached EOF.
|
|
**
|
|
** iOff:
|
|
** Set to the current offset within record pData.
|
|
*/
|
|
struct Fts5DlidxLvl {
|
|
Fts5Data *pData; /* Data for current page of this level */
|
|
int iOff; /* Current offset into pData */
|
|
int bEof; /* At EOF already */
|
|
int iFirstOff; /* Used by reverse iterators */
|
|
|
|
/* Output variables */
|
|
int iLeafPgno; /* Page number of current leaf page */
|
|
i64 iRowid; /* First rowid on leaf iLeafPgno */
|
|
};
|
|
struct Fts5DlidxIter {
|
|
int nLvl;
|
|
int iSegid;
|
|
Fts5DlidxLvl aLvl[1];
|
|
};
|
|
|
|
static void fts5PutU16(u8 *aOut, u16 iVal){
|
|
aOut[0] = (iVal>>8);
|
|
aOut[1] = (iVal&0xFF);
|
|
}
|
|
|
|
static u16 fts5GetU16(const u8 *aIn){
|
|
return ((u16)aIn[0] << 8) + aIn[1];
|
|
}
|
|
|
|
/*
|
|
** The only argument points to a buffer at least 8 bytes in size. This
|
|
** function interprets the first 8 bytes of the buffer as a 64-bit big-endian
|
|
** unsigned integer and returns the result.
|
|
*/
|
|
static u64 fts5GetU64(u8 *a){
|
|
return ((u64)a[0] << 56)
|
|
+ ((u64)a[1] << 48)
|
|
+ ((u64)a[2] << 40)
|
|
+ ((u64)a[3] << 32)
|
|
+ ((u64)a[4] << 24)
|
|
+ ((u64)a[5] << 16)
|
|
+ ((u64)a[6] << 8)
|
|
+ ((u64)a[7] << 0);
|
|
}
|
|
|
|
/*
|
|
** The only argument points to a buffer at least 4 bytes in size. This
|
|
** function interprets the first 4 bytes of the buffer as a 32-bit big-endian
|
|
** unsigned integer and returns the result.
|
|
*/
|
|
static u32 fts5GetU32(const u8 *a){
|
|
return ((u32)a[0] << 24)
|
|
+ ((u32)a[1] << 16)
|
|
+ ((u32)a[2] << 8)
|
|
+ ((u32)a[3] << 0);
|
|
}
|
|
|
|
/*
|
|
** Write iVal, formated as a 64-bit big-endian unsigned integer, to the
|
|
** buffer indicated by the first argument.
|
|
*/
|
|
static void fts5PutU64(u8 *a, u64 iVal){
|
|
a[0] = ((iVal >> 56) & 0xFF);
|
|
a[1] = ((iVal >> 48) & 0xFF);
|
|
a[2] = ((iVal >> 40) & 0xFF);
|
|
a[3] = ((iVal >> 32) & 0xFF);
|
|
a[4] = ((iVal >> 24) & 0xFF);
|
|
a[5] = ((iVal >> 16) & 0xFF);
|
|
a[6] = ((iVal >> 8) & 0xFF);
|
|
a[7] = ((iVal >> 0) & 0xFF);
|
|
}
|
|
|
|
/*
|
|
** Write iVal, formated as a 32-bit big-endian unsigned integer, to the
|
|
** buffer indicated by the first argument.
|
|
*/
|
|
static void fts5PutU32(u8 *a, u32 iVal){
|
|
a[0] = ((iVal >> 24) & 0xFF);
|
|
a[1] = ((iVal >> 16) & 0xFF);
|
|
a[2] = ((iVal >> 8) & 0xFF);
|
|
a[3] = ((iVal >> 0) & 0xFF);
|
|
}
|
|
|
|
/*
|
|
** Allocate and return a buffer at least nByte bytes in size.
|
|
**
|
|
** If an OOM error is encountered, return NULL and set the error code in
|
|
** the Fts5Index handle passed as the first argument.
|
|
*/
|
|
static void *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){
|
|
return sqlite3Fts5MallocZero(&p->rc, nByte);
|
|
}
|
|
|
|
/*
|
|
** Compare the contents of the pLeft buffer with the pRight/nRight blob.
|
|
**
|
|
** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
|
|
** +ve if pRight is smaller than pLeft. In other words:
|
|
**
|
|
** res = *pLeft - *pRight
|
|
*/
|
|
#ifdef SQLITE_DEBUG
|
|
static int fts5BufferCompareBlob(
|
|
Fts5Buffer *pLeft, /* Left hand side of comparison */
|
|
const u8 *pRight, int nRight /* Right hand side of comparison */
|
|
){
|
|
int nCmp = MIN(pLeft->n, nRight);
|
|
int res = memcmp(pLeft->p, pRight, nCmp);
|
|
return (res==0 ? (pLeft->n - nRight) : res);
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
** Compare the contents of the two buffers using memcmp(). If one buffer
|
|
** is a prefix of the other, it is considered the lesser.
|
|
**
|
|
** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
|
|
** +ve if pRight is smaller than pLeft. In other words:
|
|
**
|
|
** res = *pLeft - *pRight
|
|
*/
|
|
static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
|
|
int nCmp, res;
|
|
nCmp = MIN(pLeft->n, pRight->n);
|
|
assert( nCmp<=0 || pLeft->p!=0 );
|
|
assert( nCmp<=0 || pRight->p!=0 );
|
|
res = fts5Memcmp(pLeft->p, pRight->p, nCmp);
|
|
return (res==0 ? (pLeft->n - pRight->n) : res);
|
|
}
|
|
|
|
static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
|
|
int ret;
|
|
fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
** Close the read-only blob handle, if it is open.
|
|
*/
|
|
void sqlite3Fts5IndexCloseReader(Fts5Index *p){
|
|
if( p->pReader ){
|
|
sqlite3_blob *pReader = p->pReader;
|
|
p->pReader = 0;
|
|
sqlite3_blob_close(pReader);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Retrieve a record from the %_data table.
|
|
**
|
|
** If an error occurs, NULL is returned and an error left in the
|
|
** Fts5Index object.
|
|
*/
|
|
static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
|
|
Fts5Data *pRet = 0;
|
|
if( p->rc==SQLITE_OK ){
|
|
int rc = SQLITE_OK;
|
|
|
|
if( p->pReader ){
|
|
/* This call may return SQLITE_ABORT if there has been a savepoint
|
|
** rollback since it was last used. In this case a new blob handle
|
|
** is required. */
|
|
sqlite3_blob *pBlob = p->pReader;
|
|
p->pReader = 0;
|
|
rc = sqlite3_blob_reopen(pBlob, iRowid);
|
|
assert( p->pReader==0 );
|
|
p->pReader = pBlob;
|
|
if( rc!=SQLITE_OK ){
|
|
sqlite3Fts5IndexCloseReader(p);
|
|
}
|
|
if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
|
|
}
|
|
|
|
/* If the blob handle is not open at this point, open it and seek
|
|
** to the requested entry. */
|
|
if( p->pReader==0 && rc==SQLITE_OK ){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
rc = sqlite3_blob_open(pConfig->db,
|
|
pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
|
|
);
|
|
}
|
|
|
|
/* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
|
|
** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
|
|
** All the reasons those functions might return SQLITE_ERROR - missing
|
|
** table, missing row, non-blob/text in block column - indicate
|
|
** backing store corruption. */
|
|
if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
|
|
|
|
if( rc==SQLITE_OK ){
|
|
u8 *aOut = 0; /* Read blob data into this buffer */
|
|
int nByte = sqlite3_blob_bytes(p->pReader);
|
|
sqlite3_int64 nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
|
|
pRet = (Fts5Data*)sqlite3_malloc64(nAlloc);
|
|
if( pRet ){
|
|
pRet->nn = nByte;
|
|
aOut = pRet->p = (u8*)&pRet[1];
|
|
}else{
|
|
rc = SQLITE_NOMEM;
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
|
|
}
|
|
if( rc!=SQLITE_OK ){
|
|
sqlite3_free(pRet);
|
|
pRet = 0;
|
|
}else{
|
|
/* TODO1: Fix this */
|
|
pRet->p[nByte] = 0x00;
|
|
pRet->p[nByte+1] = 0x00;
|
|
pRet->szLeaf = fts5GetU16(&pRet->p[2]);
|
|
}
|
|
}
|
|
p->rc = rc;
|
|
p->nRead++;
|
|
}
|
|
|
|
assert( (pRet==0)==(p->rc!=SQLITE_OK) );
|
|
return pRet;
|
|
}
|
|
|
|
|
|
/*
|
|
** Release a reference to data record returned by an earlier call to
|
|
** fts5DataRead().
|
|
*/
|
|
static void fts5DataRelease(Fts5Data *pData){
|
|
sqlite3_free(pData);
|
|
}
|
|
|
|
static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){
|
|
Fts5Data *pRet = fts5DataRead(p, iRowid);
|
|
if( pRet ){
|
|
if( pRet->nn<4 || pRet->szLeaf>pRet->nn ){
|
|
p->rc = FTS5_CORRUPT;
|
|
fts5DataRelease(pRet);
|
|
pRet = 0;
|
|
}
|
|
}
|
|
return pRet;
|
|
}
|
|
|
|
static int fts5IndexPrepareStmt(
|
|
Fts5Index *p,
|
|
sqlite3_stmt **ppStmt,
|
|
char *zSql
|
|
){
|
|
if( p->rc==SQLITE_OK ){
|
|
if( zSql ){
|
|
p->rc = sqlite3_prepare_v3(p->pConfig->db, zSql, -1,
|
|
SQLITE_PREPARE_PERSISTENT|SQLITE_PREPARE_NO_VTAB,
|
|
ppStmt, 0);
|
|
}else{
|
|
p->rc = SQLITE_NOMEM;
|
|
}
|
|
}
|
|
sqlite3_free(zSql);
|
|
return p->rc;
|
|
}
|
|
|
|
|
|
/*
|
|
** INSERT OR REPLACE a record into the %_data table.
|
|
*/
|
|
static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
|
|
if( p->rc!=SQLITE_OK ) return;
|
|
|
|
if( p->pWriter==0 ){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
|
|
"REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
|
|
pConfig->zDb, pConfig->zName
|
|
));
|
|
if( p->rc ) return;
|
|
}
|
|
|
|
sqlite3_bind_int64(p->pWriter, 1, iRowid);
|
|
sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
|
|
sqlite3_step(p->pWriter);
|
|
p->rc = sqlite3_reset(p->pWriter);
|
|
sqlite3_bind_null(p->pWriter, 2);
|
|
}
|
|
|
|
/*
|
|
** Execute the following SQL:
|
|
**
|
|
** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
|
|
*/
|
|
static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
|
|
if( p->rc!=SQLITE_OK ) return;
|
|
|
|
if( p->pDeleter==0 ){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
char *zSql = sqlite3_mprintf(
|
|
"DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
|
|
pConfig->zDb, pConfig->zName
|
|
);
|
|
if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return;
|
|
}
|
|
|
|
sqlite3_bind_int64(p->pDeleter, 1, iFirst);
|
|
sqlite3_bind_int64(p->pDeleter, 2, iLast);
|
|
sqlite3_step(p->pDeleter);
|
|
p->rc = sqlite3_reset(p->pDeleter);
|
|
}
|
|
|
|
/*
|
|
** Remove all records associated with segment iSegid.
|
|
*/
|
|
static void fts5DataRemoveSegment(Fts5Index *p, Fts5StructureSegment *pSeg){
|
|
int iSegid = pSeg->iSegid;
|
|
i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
|
|
i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
|
|
fts5DataDelete(p, iFirst, iLast);
|
|
|
|
if( pSeg->nPgTombstone ){
|
|
i64 iTomb1 = FTS5_TOMBSTONE_ROWID(iSegid, 0);
|
|
i64 iTomb2 = FTS5_TOMBSTONE_ROWID(iSegid, pSeg->nPgTombstone-1);
|
|
fts5DataDelete(p, iTomb1, iTomb2);
|
|
}
|
|
if( p->pIdxDeleter==0 ){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
|
|
"DELETE FROM '%q'.'%q_idx' WHERE segid=?",
|
|
pConfig->zDb, pConfig->zName
|
|
));
|
|
}
|
|
if( p->rc==SQLITE_OK ){
|
|
sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
|
|
sqlite3_step(p->pIdxDeleter);
|
|
p->rc = sqlite3_reset(p->pIdxDeleter);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Release a reference to an Fts5Structure object returned by an earlier
|
|
** call to fts5StructureRead() or fts5StructureDecode().
|
|
*/
|
|
static void fts5StructureRelease(Fts5Structure *pStruct){
|
|
if( pStruct && 0>=(--pStruct->nRef) ){
|
|
int i;
|
|
assert( pStruct->nRef==0 );
|
|
for(i=0; i<pStruct->nLevel; i++){
|
|
sqlite3_free(pStruct->aLevel[i].aSeg);
|
|
}
|
|
sqlite3_free(pStruct);
|
|
}
|
|
}
|
|
|
|
static void fts5StructureRef(Fts5Structure *pStruct){
|
|
pStruct->nRef++;
|
|
}
|
|
|
|
void *sqlite3Fts5StructureRef(Fts5Index *p){
|
|
fts5StructureRef(p->pStruct);
|
|
return (void*)p->pStruct;
|
|
}
|
|
void sqlite3Fts5StructureRelease(void *p){
|
|
if( p ){
|
|
fts5StructureRelease((Fts5Structure*)p);
|
|
}
|
|
}
|
|
int sqlite3Fts5StructureTest(Fts5Index *p, void *pStruct){
|
|
if( p->pStruct!=(Fts5Structure*)pStruct ){
|
|
return SQLITE_ABORT;
|
|
}
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Ensure that structure object (*pp) is writable.
|
|
**
|
|
** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If
|
|
** an error occurs, (*pRc) is set to an SQLite error code before returning.
|
|
*/
|
|
static void fts5StructureMakeWritable(int *pRc, Fts5Structure **pp){
|
|
Fts5Structure *p = *pp;
|
|
if( *pRc==SQLITE_OK && p->nRef>1 ){
|
|
i64 nByte = sizeof(Fts5Structure)+(p->nLevel-1)*sizeof(Fts5StructureLevel);
|
|
Fts5Structure *pNew;
|
|
pNew = (Fts5Structure*)sqlite3Fts5MallocZero(pRc, nByte);
|
|
if( pNew ){
|
|
int i;
|
|
memcpy(pNew, p, nByte);
|
|
for(i=0; i<p->nLevel; i++) pNew->aLevel[i].aSeg = 0;
|
|
for(i=0; i<p->nLevel; i++){
|
|
Fts5StructureLevel *pLvl = &pNew->aLevel[i];
|
|
nByte = sizeof(Fts5StructureSegment) * pNew->aLevel[i].nSeg;
|
|
pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(pRc, nByte);
|
|
if( pLvl->aSeg==0 ){
|
|
for(i=0; i<p->nLevel; i++){
|
|
sqlite3_free(pNew->aLevel[i].aSeg);
|
|
}
|
|
sqlite3_free(pNew);
|
|
return;
|
|
}
|
|
memcpy(pLvl->aSeg, p->aLevel[i].aSeg, nByte);
|
|
}
|
|
p->nRef--;
|
|
pNew->nRef = 1;
|
|
}
|
|
*pp = pNew;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Deserialize and return the structure record currently stored in serialized
|
|
** form within buffer pData/nData.
|
|
**
|
|
** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
|
|
** are over-allocated by one slot. This allows the structure contents
|
|
** to be more easily edited.
|
|
**
|
|
** If an error occurs, *ppOut is set to NULL and an SQLite error code
|
|
** returned. Otherwise, *ppOut is set to point to the new object and
|
|
** SQLITE_OK returned.
|
|
*/
|
|
static int fts5StructureDecode(
|
|
const u8 *pData, /* Buffer containing serialized structure */
|
|
int nData, /* Size of buffer pData in bytes */
|
|
int *piCookie, /* Configuration cookie value */
|
|
Fts5Structure **ppOut /* OUT: Deserialized object */
|
|
){
|
|
int rc = SQLITE_OK;
|
|
int i = 0;
|
|
int iLvl;
|
|
int nLevel = 0;
|
|
int nSegment = 0;
|
|
sqlite3_int64 nByte; /* Bytes of space to allocate at pRet */
|
|
Fts5Structure *pRet = 0; /* Structure object to return */
|
|
int bStructureV2 = 0; /* True for FTS5_STRUCTURE_V2 */
|
|
u64 nOriginCntr = 0; /* Largest origin value seen so far */
|
|
|
|
/* Grab the cookie value */
|
|
if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
|
|
i = 4;
|
|
|
|
/* Check if this is a V2 structure record. Set bStructureV2 if it is. */
|
|
if( 0==memcmp(&pData[i], FTS5_STRUCTURE_V2, 4) ){
|
|
i += 4;
|
|
bStructureV2 = 1;
|
|
}
|
|
|
|
/* Read the total number of levels and segments from the start of the
|
|
** structure record. */
|
|
i += fts5GetVarint32(&pData[i], nLevel);
|
|
i += fts5GetVarint32(&pData[i], nSegment);
|
|
if( nLevel>FTS5_MAX_SEGMENT || nLevel<0
|
|
|| nSegment>FTS5_MAX_SEGMENT || nSegment<0
|
|
){
|
|
return FTS5_CORRUPT;
|
|
}
|
|
nByte = (
|
|
sizeof(Fts5Structure) + /* Main structure */
|
|
sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */
|
|
);
|
|
pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
|
|
|
|
if( pRet ){
|
|
pRet->nRef = 1;
|
|
pRet->nLevel = nLevel;
|
|
pRet->nSegment = nSegment;
|
|
i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
|
|
|
|
for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
|
|
Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
|
|
int nTotal = 0;
|
|
int iSeg;
|
|
|
|
if( i>=nData ){
|
|
rc = FTS5_CORRUPT;
|
|
}else{
|
|
i += fts5GetVarint32(&pData[i], pLvl->nMerge);
|
|
i += fts5GetVarint32(&pData[i], nTotal);
|
|
if( nTotal<pLvl->nMerge ) rc = FTS5_CORRUPT;
|
|
pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
|
|
nTotal * sizeof(Fts5StructureSegment)
|
|
);
|
|
nSegment -= nTotal;
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
pLvl->nSeg = nTotal;
|
|
for(iSeg=0; iSeg<nTotal; iSeg++){
|
|
Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
|
|
if( i>=nData ){
|
|
rc = FTS5_CORRUPT;
|
|
break;
|
|
}
|
|
assert( pSeg!=0 );
|
|
i += fts5GetVarint32(&pData[i], pSeg->iSegid);
|
|
i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst);
|
|
i += fts5GetVarint32(&pData[i], pSeg->pgnoLast);
|
|
if( bStructureV2 ){
|
|
i += fts5GetVarint(&pData[i], &pSeg->iOrigin1);
|
|
i += fts5GetVarint(&pData[i], &pSeg->iOrigin2);
|
|
i += fts5GetVarint32(&pData[i], pSeg->nPgTombstone);
|
|
i += fts5GetVarint(&pData[i], &pSeg->nEntryTombstone);
|
|
i += fts5GetVarint(&pData[i], &pSeg->nEntry);
|
|
nOriginCntr = MAX(nOriginCntr, pSeg->iOrigin2);
|
|
}
|
|
if( pSeg->pgnoLast<pSeg->pgnoFirst ){
|
|
rc = FTS5_CORRUPT;
|
|
break;
|
|
}
|
|
}
|
|
if( iLvl>0 && pLvl[-1].nMerge && nTotal==0 ) rc = FTS5_CORRUPT;
|
|
if( iLvl==nLevel-1 && pLvl->nMerge ) rc = FTS5_CORRUPT;
|
|
}
|
|
}
|
|
if( nSegment!=0 && rc==SQLITE_OK ) rc = FTS5_CORRUPT;
|
|
if( bStructureV2 ){
|
|
pRet->nOriginCntr = nOriginCntr+1;
|
|
}
|
|
|
|
if( rc!=SQLITE_OK ){
|
|
fts5StructureRelease(pRet);
|
|
pRet = 0;
|
|
}
|
|
}
|
|
|
|
*ppOut = pRet;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Add a level to the Fts5Structure.aLevel[] array of structure object
|
|
** (*ppStruct).
|
|
*/
|
|
static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
|
|
fts5StructureMakeWritable(pRc, ppStruct);
|
|
assert( (ppStruct!=0 && (*ppStruct)!=0) || (*pRc)!=SQLITE_OK );
|
|
if( *pRc==SQLITE_OK ){
|
|
Fts5Structure *pStruct = *ppStruct;
|
|
int nLevel = pStruct->nLevel;
|
|
sqlite3_int64 nByte = (
|
|
sizeof(Fts5Structure) + /* Main structure */
|
|
sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */
|
|
);
|
|
|
|
pStruct = sqlite3_realloc64(pStruct, nByte);
|
|
if( pStruct ){
|
|
memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
|
|
pStruct->nLevel++;
|
|
*ppStruct = pStruct;
|
|
}else{
|
|
*pRc = SQLITE_NOMEM;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Extend level iLvl so that there is room for at least nExtra more
|
|
** segments.
|
|
*/
|
|
static void fts5StructureExtendLevel(
|
|
int *pRc,
|
|
Fts5Structure *pStruct,
|
|
int iLvl,
|
|
int nExtra,
|
|
int bInsert
|
|
){
|
|
if( *pRc==SQLITE_OK ){
|
|
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
|
|
Fts5StructureSegment *aNew;
|
|
sqlite3_int64 nByte;
|
|
|
|
nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
|
|
aNew = sqlite3_realloc64(pLvl->aSeg, nByte);
|
|
if( aNew ){
|
|
if( bInsert==0 ){
|
|
memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
|
|
}else{
|
|
int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
|
|
memmove(&aNew[nExtra], aNew, nMove);
|
|
memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
|
|
}
|
|
pLvl->aSeg = aNew;
|
|
}else{
|
|
*pRc = SQLITE_NOMEM;
|
|
}
|
|
}
|
|
}
|
|
|
|
static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){
|
|
Fts5Structure *pRet = 0;
|
|
Fts5Config *pConfig = p->pConfig;
|
|
int iCookie; /* Configuration cookie */
|
|
Fts5Data *pData;
|
|
|
|
pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
|
|
if( p->rc==SQLITE_OK ){
|
|
/* TODO: Do we need this if the leaf-index is appended? Probably... */
|
|
memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
|
|
p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
|
|
if( p->rc==SQLITE_OK && (pConfig->pgsz==0 || pConfig->iCookie!=iCookie) ){
|
|
p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
|
|
}
|
|
fts5DataRelease(pData);
|
|
if( p->rc!=SQLITE_OK ){
|
|
fts5StructureRelease(pRet);
|
|
pRet = 0;
|
|
}
|
|
}
|
|
|
|
return pRet;
|
|
}
|
|
|
|
static i64 fts5IndexDataVersion(Fts5Index *p){
|
|
i64 iVersion = 0;
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
if( p->pDataVersion==0 ){
|
|
p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
|
|
sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
|
|
);
|
|
if( p->rc ) return 0;
|
|
}
|
|
|
|
if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){
|
|
iVersion = sqlite3_column_int64(p->pDataVersion, 0);
|
|
}
|
|
p->rc = sqlite3_reset(p->pDataVersion);
|
|
}
|
|
|
|
return iVersion;
|
|
}
|
|
|
|
/*
|
|
** Read, deserialize and return the structure record.
|
|
**
|
|
** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
|
|
** are over-allocated as described for function fts5StructureDecode()
|
|
** above.
|
|
**
|
|
** If an error occurs, NULL is returned and an error code left in the
|
|
** Fts5Index handle. If an error has already occurred when this function
|
|
** is called, it is a no-op.
|
|
*/
|
|
static Fts5Structure *fts5StructureRead(Fts5Index *p){
|
|
|
|
if( p->pStruct==0 ){
|
|
p->iStructVersion = fts5IndexDataVersion(p);
|
|
if( p->rc==SQLITE_OK ){
|
|
p->pStruct = fts5StructureReadUncached(p);
|
|
}
|
|
}
|
|
|
|
#if 0
|
|
else{
|
|
Fts5Structure *pTest = fts5StructureReadUncached(p);
|
|
if( pTest ){
|
|
int i, j;
|
|
assert_nc( p->pStruct->nSegment==pTest->nSegment );
|
|
assert_nc( p->pStruct->nLevel==pTest->nLevel );
|
|
for(i=0; i<pTest->nLevel; i++){
|
|
assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
|
|
assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
|
|
for(j=0; j<pTest->aLevel[i].nSeg; j++){
|
|
Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
|
|
Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
|
|
assert_nc( p1->iSegid==p2->iSegid );
|
|
assert_nc( p1->pgnoFirst==p2->pgnoFirst );
|
|
assert_nc( p1->pgnoLast==p2->pgnoLast );
|
|
}
|
|
}
|
|
fts5StructureRelease(pTest);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
if( p->rc!=SQLITE_OK ) return 0;
|
|
assert( p->iStructVersion!=0 );
|
|
assert( p->pStruct!=0 );
|
|
fts5StructureRef(p->pStruct);
|
|
return p->pStruct;
|
|
}
|
|
|
|
static void fts5StructureInvalidate(Fts5Index *p){
|
|
if( p->pStruct ){
|
|
fts5StructureRelease(p->pStruct);
|
|
p->pStruct = 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Return the total number of segments in index structure pStruct. This
|
|
** function is only ever used as part of assert() conditions.
|
|
*/
|
|
#ifdef SQLITE_DEBUG
|
|
static int fts5StructureCountSegments(Fts5Structure *pStruct){
|
|
int nSegment = 0; /* Total number of segments */
|
|
if( pStruct ){
|
|
int iLvl; /* Used to iterate through levels */
|
|
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
|
|
nSegment += pStruct->aLevel[iLvl].nSeg;
|
|
}
|
|
}
|
|
|
|
return nSegment;
|
|
}
|
|
#endif
|
|
|
|
#define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
|
|
assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \
|
|
memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \
|
|
(pBuf)->n += nBlob; \
|
|
}
|
|
|
|
#define fts5BufferSafeAppendVarint(pBuf, iVal) { \
|
|
(pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \
|
|
assert( (pBuf)->nSpace>=(pBuf)->n ); \
|
|
}
|
|
|
|
|
|
/*
|
|
** Serialize and store the "structure" record.
|
|
**
|
|
** If an error occurs, leave an error code in the Fts5Index object. If an
|
|
** error has already occurred, this function is a no-op.
|
|
*/
|
|
static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
|
|
if( p->rc==SQLITE_OK ){
|
|
Fts5Buffer buf; /* Buffer to serialize record into */
|
|
int iLvl; /* Used to iterate through levels */
|
|
int iCookie; /* Cookie value to store */
|
|
int nHdr = (pStruct->nOriginCntr>0 ? (4+4+9+9+9) : (4+9+9));
|
|
|
|
assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
|
|
memset(&buf, 0, sizeof(Fts5Buffer));
|
|
|
|
/* Append the current configuration cookie */
|
|
iCookie = p->pConfig->iCookie;
|
|
if( iCookie<0 ) iCookie = 0;
|
|
|
|
if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, nHdr) ){
|
|
sqlite3Fts5Put32(buf.p, iCookie);
|
|
buf.n = 4;
|
|
if( pStruct->nOriginCntr>0 ){
|
|
fts5BufferSafeAppendBlob(&buf, FTS5_STRUCTURE_V2, 4);
|
|
}
|
|
fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
|
|
fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
|
|
fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
|
|
}
|
|
|
|
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
|
|
int iSeg; /* Used to iterate through segments */
|
|
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
|
|
fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
|
|
fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
|
|
assert( pLvl->nMerge<=pLvl->nSeg );
|
|
|
|
for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
|
|
Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
|
|
fts5BufferAppendVarint(&p->rc, &buf, pSeg->iSegid);
|
|
fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoFirst);
|
|
fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoLast);
|
|
if( pStruct->nOriginCntr>0 ){
|
|
fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin1);
|
|
fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin2);
|
|
fts5BufferAppendVarint(&p->rc, &buf, pSeg->nPgTombstone);
|
|
fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntryTombstone);
|
|
fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntry);
|
|
}
|
|
}
|
|
}
|
|
|
|
fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
|
|
fts5BufferFree(&buf);
|
|
}
|
|
}
|
|
|
|
#if 0
|
|
static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
|
|
static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
|
|
int rc = SQLITE_OK;
|
|
Fts5Buffer buf;
|
|
memset(&buf, 0, sizeof(buf));
|
|
fts5DebugStructure(&rc, &buf, pStruct);
|
|
fprintf(stdout, "%s: %s\n", zCaption, buf.p);
|
|
fflush(stdout);
|
|
fts5BufferFree(&buf);
|
|
}
|
|
#else
|
|
# define fts5PrintStructure(x,y)
|
|
#endif
|
|
|
|
static int fts5SegmentSize(Fts5StructureSegment *pSeg){
|
|
return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
|
|
}
|
|
|
|
/*
|
|
** Return a copy of index structure pStruct. Except, promote as many
|
|
** segments as possible to level iPromote. If an OOM occurs, NULL is
|
|
** returned.
|
|
*/
|
|
static void fts5StructurePromoteTo(
|
|
Fts5Index *p,
|
|
int iPromote,
|
|
int szPromote,
|
|
Fts5Structure *pStruct
|
|
){
|
|
int il, is;
|
|
Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
|
|
|
|
if( pOut->nMerge==0 ){
|
|
for(il=iPromote+1; il<pStruct->nLevel; il++){
|
|
Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
|
|
if( pLvl->nMerge ) return;
|
|
for(is=pLvl->nSeg-1; is>=0; is--){
|
|
int sz = fts5SegmentSize(&pLvl->aSeg[is]);
|
|
if( sz>szPromote ) return;
|
|
fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
|
|
if( p->rc ) return;
|
|
memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
|
|
pOut->nSeg++;
|
|
pLvl->nSeg--;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** A new segment has just been written to level iLvl of index structure
|
|
** pStruct. This function determines if any segments should be promoted
|
|
** as a result. Segments are promoted in two scenarios:
|
|
**
|
|
** a) If the segment just written is smaller than one or more segments
|
|
** within the previous populated level, it is promoted to the previous
|
|
** populated level.
|
|
**
|
|
** b) If the segment just written is larger than the newest segment on
|
|
** the next populated level, then that segment, and any other adjacent
|
|
** segments that are also smaller than the one just written, are
|
|
** promoted.
|
|
**
|
|
** If one or more segments are promoted, the structure object is updated
|
|
** to reflect this.
|
|
*/
|
|
static void fts5StructurePromote(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
int iLvl, /* Index level just updated */
|
|
Fts5Structure *pStruct /* Index structure */
|
|
){
|
|
if( p->rc==SQLITE_OK ){
|
|
int iTst;
|
|
int iPromote = -1;
|
|
int szPromote = 0; /* Promote anything this size or smaller */
|
|
Fts5StructureSegment *pSeg; /* Segment just written */
|
|
int szSeg; /* Size of segment just written */
|
|
int nSeg = pStruct->aLevel[iLvl].nSeg;
|
|
|
|
if( nSeg==0 ) return;
|
|
pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
|
|
szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
|
|
|
|
/* Check for condition (a) */
|
|
for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
|
|
if( iTst>=0 ){
|
|
int i;
|
|
int szMax = 0;
|
|
Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
|
|
assert( pTst->nMerge==0 );
|
|
for(i=0; i<pTst->nSeg; i++){
|
|
int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
|
|
if( sz>szMax ) szMax = sz;
|
|
}
|
|
if( szMax>=szSeg ){
|
|
/* Condition (a) is true. Promote the newest segment on level
|
|
** iLvl to level iTst. */
|
|
iPromote = iTst;
|
|
szPromote = szMax;
|
|
}
|
|
}
|
|
|
|
/* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
|
|
** is a no-op if it is not. */
|
|
if( iPromote<0 ){
|
|
iPromote = iLvl;
|
|
szPromote = szSeg;
|
|
}
|
|
fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
** Advance the iterator passed as the only argument. If the end of the
|
|
** doclist-index page is reached, return non-zero.
|
|
*/
|
|
static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
|
|
Fts5Data *pData = pLvl->pData;
|
|
|
|
if( pLvl->iOff==0 ){
|
|
assert( pLvl->bEof==0 );
|
|
pLvl->iOff = 1;
|
|
pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
|
|
pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
|
|
pLvl->iFirstOff = pLvl->iOff;
|
|
}else{
|
|
int iOff;
|
|
for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
|
|
if( pData->p[iOff] ) break;
|
|
}
|
|
|
|
if( iOff<pData->nn ){
|
|
u64 iVal;
|
|
pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
|
|
iOff += fts5GetVarint(&pData->p[iOff], &iVal);
|
|
pLvl->iRowid += iVal;
|
|
pLvl->iOff = iOff;
|
|
}else{
|
|
pLvl->bEof = 1;
|
|
}
|
|
}
|
|
|
|
return pLvl->bEof;
|
|
}
|
|
|
|
/*
|
|
** Advance the iterator passed as the only argument.
|
|
*/
|
|
static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
|
|
Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
|
|
|
|
assert( iLvl<pIter->nLvl );
|
|
if( fts5DlidxLvlNext(pLvl) ){
|
|
if( (iLvl+1) < pIter->nLvl ){
|
|
fts5DlidxIterNextR(p, pIter, iLvl+1);
|
|
if( pLvl[1].bEof==0 ){
|
|
fts5DataRelease(pLvl->pData);
|
|
memset(pLvl, 0, sizeof(Fts5DlidxLvl));
|
|
pLvl->pData = fts5DataRead(p,
|
|
FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
|
|
);
|
|
if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
|
|
}
|
|
}
|
|
}
|
|
|
|
return pIter->aLvl[0].bEof;
|
|
}
|
|
static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
|
|
return fts5DlidxIterNextR(p, pIter, 0);
|
|
}
|
|
|
|
/*
|
|
** The iterator passed as the first argument has the following fields set
|
|
** as follows. This function sets up the rest of the iterator so that it
|
|
** points to the first rowid in the doclist-index.
|
|
**
|
|
** pData:
|
|
** pointer to doclist-index record,
|
|
**
|
|
** When this function is called pIter->iLeafPgno is the page number the
|
|
** doclist is associated with (the one featuring the term).
|
|
*/
|
|
static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
|
|
int i;
|
|
for(i=0; i<pIter->nLvl; i++){
|
|
fts5DlidxLvlNext(&pIter->aLvl[i]);
|
|
}
|
|
return pIter->aLvl[0].bEof;
|
|
}
|
|
|
|
|
|
static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
|
|
return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
|
|
}
|
|
|
|
static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
|
|
int i;
|
|
|
|
/* Advance each level to the last entry on the last page */
|
|
for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
|
|
Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
|
|
while( fts5DlidxLvlNext(pLvl)==0 );
|
|
pLvl->bEof = 0;
|
|
|
|
if( i>0 ){
|
|
Fts5DlidxLvl *pChild = &pLvl[-1];
|
|
fts5DataRelease(pChild->pData);
|
|
memset(pChild, 0, sizeof(Fts5DlidxLvl));
|
|
pChild->pData = fts5DataRead(p,
|
|
FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Move the iterator passed as the only argument to the previous entry.
|
|
*/
|
|
static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
|
|
int iOff = pLvl->iOff;
|
|
|
|
assert( pLvl->bEof==0 );
|
|
if( iOff<=pLvl->iFirstOff ){
|
|
pLvl->bEof = 1;
|
|
}else{
|
|
u8 *a = pLvl->pData->p;
|
|
|
|
pLvl->iOff = 0;
|
|
fts5DlidxLvlNext(pLvl);
|
|
while( 1 ){
|
|
int nZero = 0;
|
|
int ii = pLvl->iOff;
|
|
u64 delta = 0;
|
|
|
|
while( a[ii]==0 ){
|
|
nZero++;
|
|
ii++;
|
|
}
|
|
ii += sqlite3Fts5GetVarint(&a[ii], &delta);
|
|
|
|
if( ii>=iOff ) break;
|
|
pLvl->iLeafPgno += nZero+1;
|
|
pLvl->iRowid += delta;
|
|
pLvl->iOff = ii;
|
|
}
|
|
}
|
|
|
|
return pLvl->bEof;
|
|
}
|
|
|
|
static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
|
|
Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
|
|
|
|
assert( iLvl<pIter->nLvl );
|
|
if( fts5DlidxLvlPrev(pLvl) ){
|
|
if( (iLvl+1) < pIter->nLvl ){
|
|
fts5DlidxIterPrevR(p, pIter, iLvl+1);
|
|
if( pLvl[1].bEof==0 ){
|
|
fts5DataRelease(pLvl->pData);
|
|
memset(pLvl, 0, sizeof(Fts5DlidxLvl));
|
|
pLvl->pData = fts5DataRead(p,
|
|
FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
|
|
);
|
|
if( pLvl->pData ){
|
|
while( fts5DlidxLvlNext(pLvl)==0 );
|
|
pLvl->bEof = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return pIter->aLvl[0].bEof;
|
|
}
|
|
static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
|
|
return fts5DlidxIterPrevR(p, pIter, 0);
|
|
}
|
|
|
|
/*
|
|
** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
|
|
*/
|
|
static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
|
|
if( pIter ){
|
|
int i;
|
|
for(i=0; i<pIter->nLvl; i++){
|
|
fts5DataRelease(pIter->aLvl[i].pData);
|
|
}
|
|
sqlite3_free(pIter);
|
|
}
|
|
}
|
|
|
|
static Fts5DlidxIter *fts5DlidxIterInit(
|
|
Fts5Index *p, /* Fts5 Backend to iterate within */
|
|
int bRev, /* True for ORDER BY ASC */
|
|
int iSegid, /* Segment id */
|
|
int iLeafPg /* Leaf page number to load dlidx for */
|
|
){
|
|
Fts5DlidxIter *pIter = 0;
|
|
int i;
|
|
int bDone = 0;
|
|
|
|
for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
|
|
sqlite3_int64 nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
|
|
Fts5DlidxIter *pNew;
|
|
|
|
pNew = (Fts5DlidxIter*)sqlite3_realloc64(pIter, nByte);
|
|
if( pNew==0 ){
|
|
p->rc = SQLITE_NOMEM;
|
|
}else{
|
|
i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
|
|
Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
|
|
pIter = pNew;
|
|
memset(pLvl, 0, sizeof(Fts5DlidxLvl));
|
|
pLvl->pData = fts5DataRead(p, iRowid);
|
|
if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
|
|
bDone = 1;
|
|
}
|
|
pIter->nLvl = i+1;
|
|
}
|
|
}
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
pIter->iSegid = iSegid;
|
|
if( bRev==0 ){
|
|
fts5DlidxIterFirst(pIter);
|
|
}else{
|
|
fts5DlidxIterLast(p, pIter);
|
|
}
|
|
}
|
|
|
|
if( p->rc!=SQLITE_OK ){
|
|
fts5DlidxIterFree(pIter);
|
|
pIter = 0;
|
|
}
|
|
|
|
return pIter;
|
|
}
|
|
|
|
static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
|
|
return pIter->aLvl[0].iRowid;
|
|
}
|
|
static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
|
|
return pIter->aLvl[0].iLeafPgno;
|
|
}
|
|
|
|
/*
|
|
** Load the next leaf page into the segment iterator.
|
|
*/
|
|
static void fts5SegIterNextPage(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5SegIter *pIter /* Iterator to advance to next page */
|
|
){
|
|
Fts5Data *pLeaf;
|
|
Fts5StructureSegment *pSeg = pIter->pSeg;
|
|
fts5DataRelease(pIter->pLeaf);
|
|
pIter->iLeafPgno++;
|
|
if( pIter->pNextLeaf ){
|
|
pIter->pLeaf = pIter->pNextLeaf;
|
|
pIter->pNextLeaf = 0;
|
|
}else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
|
|
pIter->pLeaf = fts5LeafRead(p,
|
|
FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
|
|
);
|
|
}else{
|
|
pIter->pLeaf = 0;
|
|
}
|
|
pLeaf = pIter->pLeaf;
|
|
|
|
if( pLeaf ){
|
|
pIter->iPgidxOff = pLeaf->szLeaf;
|
|
if( fts5LeafIsTermless(pLeaf) ){
|
|
pIter->iEndofDoclist = pLeaf->nn+1;
|
|
}else{
|
|
pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
|
|
pIter->iEndofDoclist
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Argument p points to a buffer containing a varint to be interpreted as a
|
|
** position list size field. Read the varint and return the number of bytes
|
|
** read. Before returning, set *pnSz to the number of bytes in the position
|
|
** list, and *pbDel to true if the delete flag is set, or false otherwise.
|
|
*/
|
|
static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
|
|
int nSz;
|
|
int n = 0;
|
|
fts5FastGetVarint32(p, n, nSz);
|
|
assert_nc( nSz>=0 );
|
|
*pnSz = nSz/2;
|
|
*pbDel = nSz & 0x0001;
|
|
return n;
|
|
}
|
|
|
|
/*
|
|
** Fts5SegIter.iLeafOffset currently points to the first byte of a
|
|
** position-list size field. Read the value of the field and store it
|
|
** in the following variables:
|
|
**
|
|
** Fts5SegIter.nPos
|
|
** Fts5SegIter.bDel
|
|
**
|
|
** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
|
|
** position list content (if any).
|
|
*/
|
|
static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
|
|
if( p->rc==SQLITE_OK ){
|
|
int iOff = pIter->iLeafOffset; /* Offset to read at */
|
|
ASSERT_SZLEAF_OK(pIter->pLeaf);
|
|
if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
|
|
int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
|
|
pIter->bDel = 0;
|
|
pIter->nPos = 1;
|
|
if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
|
|
pIter->bDel = 1;
|
|
iOff++;
|
|
if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
|
|
pIter->nPos = 1;
|
|
iOff++;
|
|
}else{
|
|
pIter->nPos = 0;
|
|
}
|
|
}
|
|
}else{
|
|
int nSz;
|
|
fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
|
|
pIter->bDel = (nSz & 0x0001);
|
|
pIter->nPos = nSz>>1;
|
|
assert_nc( pIter->nPos>=0 );
|
|
}
|
|
pIter->iLeafOffset = iOff;
|
|
}
|
|
}
|
|
|
|
static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
|
|
u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
|
|
i64 iOff = pIter->iLeafOffset;
|
|
|
|
ASSERT_SZLEAF_OK(pIter->pLeaf);
|
|
while( iOff>=pIter->pLeaf->szLeaf ){
|
|
fts5SegIterNextPage(p, pIter);
|
|
if( pIter->pLeaf==0 ){
|
|
if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
|
|
return;
|
|
}
|
|
iOff = 4;
|
|
a = pIter->pLeaf->p;
|
|
}
|
|
iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
|
|
pIter->iLeafOffset = iOff;
|
|
}
|
|
|
|
/*
|
|
** Fts5SegIter.iLeafOffset currently points to the first byte of the
|
|
** "nSuffix" field of a term. Function parameter nKeep contains the value
|
|
** of the "nPrefix" field (if there was one - it is passed 0 if this is
|
|
** the first term in the segment).
|
|
**
|
|
** This function populates:
|
|
**
|
|
** Fts5SegIter.term
|
|
** Fts5SegIter.rowid
|
|
**
|
|
** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
|
|
** the first position list. The position list belonging to document
|
|
** (Fts5SegIter.iRowid).
|
|
*/
|
|
static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
|
|
u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
|
|
i64 iOff = pIter->iLeafOffset; /* Offset to read at */
|
|
int nNew; /* Bytes of new data */
|
|
|
|
iOff += fts5GetVarint32(&a[iOff], nNew);
|
|
if( iOff+nNew>pIter->pLeaf->szLeaf || nKeep>pIter->term.n || nNew==0 ){
|
|
p->rc = FTS5_CORRUPT;
|
|
return;
|
|
}
|
|
pIter->term.n = nKeep;
|
|
fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
|
|
assert( pIter->term.n<=pIter->term.nSpace );
|
|
iOff += nNew;
|
|
pIter->iTermLeafOffset = iOff;
|
|
pIter->iTermLeafPgno = pIter->iLeafPgno;
|
|
pIter->iLeafOffset = iOff;
|
|
|
|
if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
|
|
pIter->iEndofDoclist = pIter->pLeaf->nn+1;
|
|
}else{
|
|
int nExtra;
|
|
pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
|
|
pIter->iEndofDoclist += nExtra;
|
|
}
|
|
|
|
fts5SegIterLoadRowid(p, pIter);
|
|
}
|
|
|
|
static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
|
|
static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
|
|
static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
|
|
|
|
static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
|
|
if( pIter->flags & FTS5_SEGITER_REVERSE ){
|
|
pIter->xNext = fts5SegIterNext_Reverse;
|
|
}else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
|
|
pIter->xNext = fts5SegIterNext_None;
|
|
}else{
|
|
pIter->xNext = fts5SegIterNext;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Allocate a tombstone hash page array object (pIter->pTombArray) for
|
|
** the iterator passed as the second argument. If an OOM error occurs,
|
|
** leave an error in the Fts5Index object.
|
|
*/
|
|
static void fts5SegIterAllocTombstone(Fts5Index *p, Fts5SegIter *pIter){
|
|
const int nTomb = pIter->pSeg->nPgTombstone;
|
|
if( nTomb>0 ){
|
|
int nByte = nTomb * sizeof(Fts5Data*) + sizeof(Fts5TombstoneArray);
|
|
Fts5TombstoneArray *pNew;
|
|
pNew = (Fts5TombstoneArray*)sqlite3Fts5MallocZero(&p->rc, nByte);
|
|
if( pNew ){
|
|
pNew->nTombstone = nTomb;
|
|
pNew->nRef = 1;
|
|
pIter->pTombArray = pNew;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Initialize the iterator object pIter to iterate through the entries in
|
|
** segment pSeg. The iterator is left pointing to the first entry when
|
|
** this function returns.
|
|
**
|
|
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
|
|
** an error has already occurred when this function is called, it is a no-op.
|
|
*/
|
|
static void fts5SegIterInit(
|
|
Fts5Index *p, /* FTS index object */
|
|
Fts5StructureSegment *pSeg, /* Description of segment */
|
|
Fts5SegIter *pIter /* Object to populate */
|
|
){
|
|
if( pSeg->pgnoFirst==0 ){
|
|
/* This happens if the segment is being used as an input to an incremental
|
|
** merge and all data has already been "trimmed". See function
|
|
** fts5TrimSegments() for details. In this case leave the iterator empty.
|
|
** The caller will see the (pIter->pLeaf==0) and assume the iterator is
|
|
** at EOF already. */
|
|
assert( pIter->pLeaf==0 );
|
|
return;
|
|
}
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
memset(pIter, 0, sizeof(*pIter));
|
|
fts5SegIterSetNext(p, pIter);
|
|
pIter->pSeg = pSeg;
|
|
pIter->iLeafPgno = pSeg->pgnoFirst-1;
|
|
do {
|
|
fts5SegIterNextPage(p, pIter);
|
|
}while( p->rc==SQLITE_OK && pIter->pLeaf && pIter->pLeaf->nn==4 );
|
|
}
|
|
|
|
if( p->rc==SQLITE_OK && pIter->pLeaf ){
|
|
pIter->iLeafOffset = 4;
|
|
assert( pIter->pLeaf!=0 );
|
|
assert_nc( pIter->pLeaf->nn>4 );
|
|
assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
|
|
pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
|
|
fts5SegIterLoadTerm(p, pIter, 0);
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
fts5SegIterAllocTombstone(p, pIter);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** This function is only ever called on iterators created by calls to
|
|
** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
|
|
**
|
|
** The iterator is in an unusual state when this function is called: the
|
|
** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
|
|
** the position-list size field for the first relevant rowid on the page.
|
|
** Fts5SegIter.rowid is set, but nPos and bDel are not.
|
|
**
|
|
** This function advances the iterator so that it points to the last
|
|
** relevant rowid on the page and, if necessary, initializes the
|
|
** aRowidOffset[] and iRowidOffset variables. At this point the iterator
|
|
** is in its regular state - Fts5SegIter.iLeafOffset points to the first
|
|
** byte of the position list content associated with said rowid.
|
|
*/
|
|
static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
|
|
int eDetail = p->pConfig->eDetail;
|
|
int n = pIter->pLeaf->szLeaf;
|
|
int i = pIter->iLeafOffset;
|
|
u8 *a = pIter->pLeaf->p;
|
|
int iRowidOffset = 0;
|
|
|
|
if( n>pIter->iEndofDoclist ){
|
|
n = pIter->iEndofDoclist;
|
|
}
|
|
|
|
ASSERT_SZLEAF_OK(pIter->pLeaf);
|
|
while( 1 ){
|
|
u64 iDelta = 0;
|
|
|
|
if( eDetail==FTS5_DETAIL_NONE ){
|
|
/* todo */
|
|
if( i<n && a[i]==0 ){
|
|
i++;
|
|
if( i<n && a[i]==0 ) i++;
|
|
}
|
|
}else{
|
|
int nPos;
|
|
int bDummy;
|
|
i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
|
|
i += nPos;
|
|
}
|
|
if( i>=n ) break;
|
|
i += fts5GetVarint(&a[i], &iDelta);
|
|
pIter->iRowid += iDelta;
|
|
|
|
/* If necessary, grow the pIter->aRowidOffset[] array. */
|
|
if( iRowidOffset>=pIter->nRowidOffset ){
|
|
int nNew = pIter->nRowidOffset + 8;
|
|
int *aNew = (int*)sqlite3_realloc64(pIter->aRowidOffset,nNew*sizeof(int));
|
|
if( aNew==0 ){
|
|
p->rc = SQLITE_NOMEM;
|
|
break;
|
|
}
|
|
pIter->aRowidOffset = aNew;
|
|
pIter->nRowidOffset = nNew;
|
|
}
|
|
|
|
pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
|
|
pIter->iLeafOffset = i;
|
|
}
|
|
pIter->iRowidOffset = iRowidOffset;
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
}
|
|
|
|
/*
|
|
**
|
|
*/
|
|
static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
|
|
assert( pIter->flags & FTS5_SEGITER_REVERSE );
|
|
assert( pIter->flags & FTS5_SEGITER_ONETERM );
|
|
|
|
fts5DataRelease(pIter->pLeaf);
|
|
pIter->pLeaf = 0;
|
|
while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
|
|
Fts5Data *pNew;
|
|
pIter->iLeafPgno--;
|
|
pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
|
|
pIter->pSeg->iSegid, pIter->iLeafPgno
|
|
));
|
|
if( pNew ){
|
|
/* iTermLeafOffset may be equal to szLeaf if the term is the last
|
|
** thing on the page - i.e. the first rowid is on the following page.
|
|
** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
|
|
if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
|
|
assert( pIter->pLeaf==0 );
|
|
if( pIter->iTermLeafOffset<pNew->szLeaf ){
|
|
pIter->pLeaf = pNew;
|
|
pIter->iLeafOffset = pIter->iTermLeafOffset;
|
|
}
|
|
}else{
|
|
int iRowidOff;
|
|
iRowidOff = fts5LeafFirstRowidOff(pNew);
|
|
if( iRowidOff ){
|
|
if( iRowidOff>=pNew->szLeaf ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else{
|
|
pIter->pLeaf = pNew;
|
|
pIter->iLeafOffset = iRowidOff;
|
|
}
|
|
}
|
|
}
|
|
|
|
if( pIter->pLeaf ){
|
|
u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
|
|
pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
|
|
break;
|
|
}else{
|
|
fts5DataRelease(pNew);
|
|
}
|
|
}
|
|
}
|
|
|
|
if( pIter->pLeaf ){
|
|
pIter->iEndofDoclist = pIter->pLeaf->nn+1;
|
|
fts5SegIterReverseInitPage(p, pIter);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Return true if the iterator passed as the second argument currently
|
|
** points to a delete marker. A delete marker is an entry with a 0 byte
|
|
** position-list.
|
|
*/
|
|
static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
|
|
Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
|
|
return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
|
|
}
|
|
|
|
/*
|
|
** Advance iterator pIter to the next entry.
|
|
**
|
|
** This version of fts5SegIterNext() is only used by reverse iterators.
|
|
*/
|
|
static void fts5SegIterNext_Reverse(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5SegIter *pIter, /* Iterator to advance */
|
|
int *pbUnused /* Unused */
|
|
){
|
|
assert( pIter->flags & FTS5_SEGITER_REVERSE );
|
|
assert( pIter->pNextLeaf==0 );
|
|
UNUSED_PARAM(pbUnused);
|
|
|
|
if( pIter->iRowidOffset>0 ){
|
|
u8 *a = pIter->pLeaf->p;
|
|
int iOff;
|
|
u64 iDelta;
|
|
|
|
pIter->iRowidOffset--;
|
|
pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
iOff = pIter->iLeafOffset;
|
|
if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
|
|
iOff += pIter->nPos;
|
|
}
|
|
fts5GetVarint(&a[iOff], &iDelta);
|
|
pIter->iRowid -= iDelta;
|
|
}else{
|
|
fts5SegIterReverseNewPage(p, pIter);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Advance iterator pIter to the next entry.
|
|
**
|
|
** This version of fts5SegIterNext() is only used if detail=none and the
|
|
** iterator is not a reverse direction iterator.
|
|
*/
|
|
static void fts5SegIterNext_None(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5SegIter *pIter, /* Iterator to advance */
|
|
int *pbNewTerm /* OUT: Set for new term */
|
|
){
|
|
int iOff;
|
|
|
|
assert( p->rc==SQLITE_OK );
|
|
assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
|
|
assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
|
|
|
|
ASSERT_SZLEAF_OK(pIter->pLeaf);
|
|
iOff = pIter->iLeafOffset;
|
|
|
|
/* Next entry is on the next page */
|
|
while( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
|
|
fts5SegIterNextPage(p, pIter);
|
|
if( p->rc || pIter->pLeaf==0 ) return;
|
|
pIter->iRowid = 0;
|
|
iOff = 4;
|
|
}
|
|
|
|
if( iOff<pIter->iEndofDoclist ){
|
|
/* Next entry is on the current page */
|
|
i64 iDelta;
|
|
iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
|
|
pIter->iLeafOffset = iOff;
|
|
pIter->iRowid += iDelta;
|
|
}else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
|
|
if( pIter->pSeg ){
|
|
int nKeep = 0;
|
|
if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
|
|
iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
|
|
}
|
|
pIter->iLeafOffset = iOff;
|
|
fts5SegIterLoadTerm(p, pIter, nKeep);
|
|
}else{
|
|
const u8 *pList = 0;
|
|
const char *zTerm = 0;
|
|
int nTerm = 0;
|
|
int nList;
|
|
sqlite3Fts5HashScanNext(p->pHash);
|
|
sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList);
|
|
if( pList==0 ) goto next_none_eof;
|
|
pIter->pLeaf->p = (u8*)pList;
|
|
pIter->pLeaf->nn = nList;
|
|
pIter->pLeaf->szLeaf = nList;
|
|
pIter->iEndofDoclist = nList;
|
|
sqlite3Fts5BufferSet(&p->rc,&pIter->term, nTerm, (u8*)zTerm);
|
|
pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
|
|
}
|
|
|
|
if( pbNewTerm ) *pbNewTerm = 1;
|
|
}else{
|
|
goto next_none_eof;
|
|
}
|
|
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
|
|
return;
|
|
next_none_eof:
|
|
fts5DataRelease(pIter->pLeaf);
|
|
pIter->pLeaf = 0;
|
|
}
|
|
|
|
|
|
/*
|
|
** Advance iterator pIter to the next entry.
|
|
**
|
|
** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
|
|
** is not considered an error if the iterator reaches EOF. If an error has
|
|
** already occurred when this function is called, it is a no-op.
|
|
*/
|
|
static void fts5SegIterNext(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5SegIter *pIter, /* Iterator to advance */
|
|
int *pbNewTerm /* OUT: Set for new term */
|
|
){
|
|
Fts5Data *pLeaf = pIter->pLeaf;
|
|
int iOff;
|
|
int bNewTerm = 0;
|
|
int nKeep = 0;
|
|
u8 *a;
|
|
int n;
|
|
|
|
assert( pbNewTerm==0 || *pbNewTerm==0 );
|
|
assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
|
|
|
|
/* Search for the end of the position list within the current page. */
|
|
a = pLeaf->p;
|
|
n = pLeaf->szLeaf;
|
|
|
|
ASSERT_SZLEAF_OK(pLeaf);
|
|
iOff = pIter->iLeafOffset + pIter->nPos;
|
|
|
|
if( iOff<n ){
|
|
/* The next entry is on the current page. */
|
|
assert_nc( iOff<=pIter->iEndofDoclist );
|
|
if( iOff>=pIter->iEndofDoclist ){
|
|
bNewTerm = 1;
|
|
if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
|
|
iOff += fts5GetVarint32(&a[iOff], nKeep);
|
|
}
|
|
}else{
|
|
u64 iDelta;
|
|
iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
|
|
pIter->iRowid += iDelta;
|
|
assert_nc( iDelta>0 );
|
|
}
|
|
pIter->iLeafOffset = iOff;
|
|
|
|
}else if( pIter->pSeg==0 ){
|
|
const u8 *pList = 0;
|
|
const char *zTerm = 0;
|
|
int nTerm = 0;
|
|
int nList = 0;
|
|
assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
|
|
if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
|
|
sqlite3Fts5HashScanNext(p->pHash);
|
|
sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList);
|
|
}
|
|
if( pList==0 ){
|
|
fts5DataRelease(pIter->pLeaf);
|
|
pIter->pLeaf = 0;
|
|
}else{
|
|
pIter->pLeaf->p = (u8*)pList;
|
|
pIter->pLeaf->nn = nList;
|
|
pIter->pLeaf->szLeaf = nList;
|
|
pIter->iEndofDoclist = nList+1;
|
|
sqlite3Fts5BufferSet(&p->rc, &pIter->term, nTerm, (u8*)zTerm);
|
|
pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
|
|
*pbNewTerm = 1;
|
|
}
|
|
}else{
|
|
iOff = 0;
|
|
/* Next entry is not on the current page */
|
|
while( iOff==0 ){
|
|
fts5SegIterNextPage(p, pIter);
|
|
pLeaf = pIter->pLeaf;
|
|
if( pLeaf==0 ) break;
|
|
ASSERT_SZLEAF_OK(pLeaf);
|
|
if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
|
|
iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
|
|
pIter->iLeafOffset = iOff;
|
|
|
|
if( pLeaf->nn>pLeaf->szLeaf ){
|
|
pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
|
|
&pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
|
|
);
|
|
}
|
|
}
|
|
else if( pLeaf->nn>pLeaf->szLeaf ){
|
|
pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
|
|
&pLeaf->p[pLeaf->szLeaf], iOff
|
|
);
|
|
pIter->iLeafOffset = iOff;
|
|
pIter->iEndofDoclist = iOff;
|
|
bNewTerm = 1;
|
|
}
|
|
assert_nc( iOff<pLeaf->szLeaf );
|
|
if( iOff>pLeaf->szLeaf ){
|
|
p->rc = FTS5_CORRUPT;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Check if the iterator is now at EOF. If so, return early. */
|
|
if( pIter->pLeaf ){
|
|
if( bNewTerm ){
|
|
if( pIter->flags & FTS5_SEGITER_ONETERM ){
|
|
fts5DataRelease(pIter->pLeaf);
|
|
pIter->pLeaf = 0;
|
|
}else{
|
|
fts5SegIterLoadTerm(p, pIter, nKeep);
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
if( pbNewTerm ) *pbNewTerm = 1;
|
|
}
|
|
}else{
|
|
/* The following could be done by calling fts5SegIterLoadNPos(). But
|
|
** this block is particularly performance critical, so equivalent
|
|
** code is inlined. */
|
|
int nSz;
|
|
assert_nc( pIter->iLeafOffset<=pIter->pLeaf->nn );
|
|
fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
|
|
pIter->bDel = (nSz & 0x0001);
|
|
pIter->nPos = nSz>>1;
|
|
assert_nc( pIter->nPos>=0 );
|
|
}
|
|
}
|
|
}
|
|
|
|
#define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
|
|
|
|
#define fts5IndexSkipVarint(a, iOff) { \
|
|
int iEnd = iOff+9; \
|
|
while( (a[iOff++] & 0x80) && iOff<iEnd ); \
|
|
}
|
|
|
|
/*
|
|
** Iterator pIter currently points to the first rowid in a doclist. This
|
|
** function sets the iterator up so that iterates in reverse order through
|
|
** the doclist.
|
|
*/
|
|
static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
|
|
Fts5DlidxIter *pDlidx = pIter->pDlidx;
|
|
Fts5Data *pLast = 0;
|
|
int pgnoLast = 0;
|
|
|
|
if( pDlidx && p->pConfig->iVersion==FTS5_CURRENT_VERSION ){
|
|
int iSegid = pIter->pSeg->iSegid;
|
|
pgnoLast = fts5DlidxIterPgno(pDlidx);
|
|
pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
|
|
}else{
|
|
Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
|
|
|
|
/* Currently, Fts5SegIter.iLeafOffset points to the first byte of
|
|
** position-list content for the current rowid. Back it up so that it
|
|
** points to the start of the position-list size field. */
|
|
int iPoslist;
|
|
if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
|
|
iPoslist = pIter->iTermLeafOffset;
|
|
}else{
|
|
iPoslist = 4;
|
|
}
|
|
fts5IndexSkipVarint(pLeaf->p, iPoslist);
|
|
pIter->iLeafOffset = iPoslist;
|
|
|
|
/* If this condition is true then the largest rowid for the current
|
|
** term may not be stored on the current page. So search forward to
|
|
** see where said rowid really is. */
|
|
if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
|
|
int pgno;
|
|
Fts5StructureSegment *pSeg = pIter->pSeg;
|
|
|
|
/* The last rowid in the doclist may not be on the current page. Search
|
|
** forward to find the page containing the last rowid. */
|
|
for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
|
|
i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
|
|
Fts5Data *pNew = fts5LeafRead(p, iAbs);
|
|
if( pNew ){
|
|
int iRowid, bTermless;
|
|
iRowid = fts5LeafFirstRowidOff(pNew);
|
|
bTermless = fts5LeafIsTermless(pNew);
|
|
if( iRowid ){
|
|
SWAPVAL(Fts5Data*, pNew, pLast);
|
|
pgnoLast = pgno;
|
|
}
|
|
fts5DataRelease(pNew);
|
|
if( bTermless==0 ) break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* If pLast is NULL at this point, then the last rowid for this doclist
|
|
** lies on the page currently indicated by the iterator. In this case
|
|
** pIter->iLeafOffset is already set to point to the position-list size
|
|
** field associated with the first relevant rowid on the page.
|
|
**
|
|
** Or, if pLast is non-NULL, then it is the page that contains the last
|
|
** rowid. In this case configure the iterator so that it points to the
|
|
** first rowid on this page.
|
|
*/
|
|
if( pLast ){
|
|
int iOff;
|
|
fts5DataRelease(pIter->pLeaf);
|
|
pIter->pLeaf = pLast;
|
|
pIter->iLeafPgno = pgnoLast;
|
|
iOff = fts5LeafFirstRowidOff(pLast);
|
|
if( iOff>pLast->szLeaf ){
|
|
p->rc = FTS5_CORRUPT;
|
|
return;
|
|
}
|
|
iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
|
|
pIter->iLeafOffset = iOff;
|
|
|
|
if( fts5LeafIsTermless(pLast) ){
|
|
pIter->iEndofDoclist = pLast->nn+1;
|
|
}else{
|
|
pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
|
|
}
|
|
}
|
|
|
|
fts5SegIterReverseInitPage(p, pIter);
|
|
}
|
|
|
|
/*
|
|
** Iterator pIter currently points to the first rowid of a doclist.
|
|
** There is a doclist-index associated with the final term on the current
|
|
** page. If the current term is the last term on the page, load the
|
|
** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
|
|
*/
|
|
static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
|
|
int iSeg = pIter->pSeg->iSegid;
|
|
int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
|
|
Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
|
|
|
|
assert( pIter->flags & FTS5_SEGITER_ONETERM );
|
|
assert( pIter->pDlidx==0 );
|
|
|
|
/* Check if the current doclist ends on this page. If it does, return
|
|
** early without loading the doclist-index (as it belongs to a different
|
|
** term. */
|
|
if( pIter->iTermLeafPgno==pIter->iLeafPgno
|
|
&& pIter->iEndofDoclist<pLeaf->szLeaf
|
|
){
|
|
return;
|
|
}
|
|
|
|
pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
|
|
}
|
|
|
|
/*
|
|
** The iterator object passed as the second argument currently contains
|
|
** no valid values except for the Fts5SegIter.pLeaf member variable. This
|
|
** function searches the leaf page for a term matching (pTerm/nTerm).
|
|
**
|
|
** If the specified term is found on the page, then the iterator is left
|
|
** pointing to it. If argument bGe is zero and the term is not found,
|
|
** the iterator is left pointing at EOF.
|
|
**
|
|
** If bGe is non-zero and the specified term is not found, then the
|
|
** iterator is left pointing to the smallest term in the segment that
|
|
** is larger than the specified term, even if this term is not on the
|
|
** current page.
|
|
*/
|
|
static void fts5LeafSeek(
|
|
Fts5Index *p, /* Leave any error code here */
|
|
int bGe, /* True for a >= search */
|
|
Fts5SegIter *pIter, /* Iterator to seek */
|
|
const u8 *pTerm, int nTerm /* Term to search for */
|
|
){
|
|
u32 iOff;
|
|
const u8 *a = pIter->pLeaf->p;
|
|
u32 n = (u32)pIter->pLeaf->nn;
|
|
|
|
u32 nMatch = 0;
|
|
u32 nKeep = 0;
|
|
u32 nNew = 0;
|
|
u32 iTermOff;
|
|
u32 iPgidx; /* Current offset in pgidx */
|
|
int bEndOfPage = 0;
|
|
|
|
assert( p->rc==SQLITE_OK );
|
|
|
|
iPgidx = (u32)pIter->pLeaf->szLeaf;
|
|
iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
|
|
iOff = iTermOff;
|
|
if( iOff>n ){
|
|
p->rc = FTS5_CORRUPT;
|
|
return;
|
|
}
|
|
|
|
while( 1 ){
|
|
|
|
/* Figure out how many new bytes are in this term */
|
|
fts5FastGetVarint32(a, iOff, nNew);
|
|
if( nKeep<nMatch ){
|
|
goto search_failed;
|
|
}
|
|
|
|
assert( nKeep>=nMatch );
|
|
if( nKeep==nMatch ){
|
|
u32 nCmp;
|
|
u32 i;
|
|
nCmp = (u32)MIN(nNew, nTerm-nMatch);
|
|
for(i=0; i<nCmp; i++){
|
|
if( a[iOff+i]!=pTerm[nMatch+i] ) break;
|
|
}
|
|
nMatch += i;
|
|
|
|
if( (u32)nTerm==nMatch ){
|
|
if( i==nNew ){
|
|
goto search_success;
|
|
}else{
|
|
goto search_failed;
|
|
}
|
|
}else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
|
|
goto search_failed;
|
|
}
|
|
}
|
|
|
|
if( iPgidx>=n ){
|
|
bEndOfPage = 1;
|
|
break;
|
|
}
|
|
|
|
iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
|
|
iTermOff += nKeep;
|
|
iOff = iTermOff;
|
|
|
|
if( iOff>=n ){
|
|
p->rc = FTS5_CORRUPT;
|
|
return;
|
|
}
|
|
|
|
/* Read the nKeep field of the next term. */
|
|
fts5FastGetVarint32(a, iOff, nKeep);
|
|
}
|
|
|
|
search_failed:
|
|
if( bGe==0 ){
|
|
fts5DataRelease(pIter->pLeaf);
|
|
pIter->pLeaf = 0;
|
|
return;
|
|
}else if( bEndOfPage ){
|
|
do {
|
|
fts5SegIterNextPage(p, pIter);
|
|
if( pIter->pLeaf==0 ) return;
|
|
a = pIter->pLeaf->p;
|
|
if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
|
|
iPgidx = (u32)pIter->pLeaf->szLeaf;
|
|
iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
|
|
if( iOff<4 || (i64)iOff>=pIter->pLeaf->szLeaf ){
|
|
p->rc = FTS5_CORRUPT;
|
|
return;
|
|
}else{
|
|
nKeep = 0;
|
|
iTermOff = iOff;
|
|
n = (u32)pIter->pLeaf->nn;
|
|
iOff += fts5GetVarint32(&a[iOff], nNew);
|
|
break;
|
|
}
|
|
}
|
|
}while( 1 );
|
|
}
|
|
|
|
search_success:
|
|
if( (i64)iOff+nNew>n || nNew<1 ){
|
|
p->rc = FTS5_CORRUPT;
|
|
return;
|
|
}
|
|
pIter->iLeafOffset = iOff + nNew;
|
|
pIter->iTermLeafOffset = pIter->iLeafOffset;
|
|
pIter->iTermLeafPgno = pIter->iLeafPgno;
|
|
|
|
fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
|
|
fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
|
|
|
|
if( iPgidx>=n ){
|
|
pIter->iEndofDoclist = pIter->pLeaf->nn+1;
|
|
}else{
|
|
int nExtra;
|
|
iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
|
|
pIter->iEndofDoclist = iTermOff + nExtra;
|
|
}
|
|
pIter->iPgidxOff = iPgidx;
|
|
|
|
fts5SegIterLoadRowid(p, pIter);
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
}
|
|
|
|
static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
|
|
if( p->pIdxSelect==0 ){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
|
|
"SELECT pgno FROM '%q'.'%q_idx' WHERE "
|
|
"segid=? AND term<=? ORDER BY term DESC LIMIT 1",
|
|
pConfig->zDb, pConfig->zName
|
|
));
|
|
}
|
|
return p->pIdxSelect;
|
|
}
|
|
|
|
/*
|
|
** Initialize the object pIter to point to term pTerm/nTerm within segment
|
|
** pSeg. If there is no such term in the index, the iterator is set to EOF.
|
|
**
|
|
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
|
|
** an error has already occurred when this function is called, it is a no-op.
|
|
*/
|
|
static void fts5SegIterSeekInit(
|
|
Fts5Index *p, /* FTS5 backend */
|
|
const u8 *pTerm, int nTerm, /* Term to seek to */
|
|
int flags, /* Mask of FTS5INDEX_XXX flags */
|
|
Fts5StructureSegment *pSeg, /* Description of segment */
|
|
Fts5SegIter *pIter /* Object to populate */
|
|
){
|
|
int iPg = 1;
|
|
int bGe = (flags & FTS5INDEX_QUERY_SCAN);
|
|
int bDlidx = 0; /* True if there is a doclist-index */
|
|
sqlite3_stmt *pIdxSelect = 0;
|
|
|
|
assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
|
|
assert( pTerm && nTerm );
|
|
memset(pIter, 0, sizeof(*pIter));
|
|
pIter->pSeg = pSeg;
|
|
|
|
/* This block sets stack variable iPg to the leaf page number that may
|
|
** contain term (pTerm/nTerm), if it is present in the segment. */
|
|
pIdxSelect = fts5IdxSelectStmt(p);
|
|
if( p->rc ) return;
|
|
sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
|
|
sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
|
|
if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
|
|
i64 val = sqlite3_column_int(pIdxSelect, 0);
|
|
iPg = (int)(val>>1);
|
|
bDlidx = (val & 0x0001);
|
|
}
|
|
p->rc = sqlite3_reset(pIdxSelect);
|
|
sqlite3_bind_null(pIdxSelect, 2);
|
|
|
|
if( iPg<pSeg->pgnoFirst ){
|
|
iPg = pSeg->pgnoFirst;
|
|
bDlidx = 0;
|
|
}
|
|
|
|
pIter->iLeafPgno = iPg - 1;
|
|
fts5SegIterNextPage(p, pIter);
|
|
|
|
if( pIter->pLeaf ){
|
|
fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
|
|
}
|
|
|
|
if( p->rc==SQLITE_OK && (bGe==0 || (flags & FTS5INDEX_QUERY_SCANONETERM)) ){
|
|
pIter->flags |= FTS5_SEGITER_ONETERM;
|
|
if( pIter->pLeaf ){
|
|
if( flags & FTS5INDEX_QUERY_DESC ){
|
|
pIter->flags |= FTS5_SEGITER_REVERSE;
|
|
}
|
|
if( bDlidx ){
|
|
fts5SegIterLoadDlidx(p, pIter);
|
|
}
|
|
if( flags & FTS5INDEX_QUERY_DESC ){
|
|
fts5SegIterReverse(p, pIter);
|
|
}
|
|
}
|
|
}
|
|
|
|
fts5SegIterSetNext(p, pIter);
|
|
if( 0==(flags & FTS5INDEX_QUERY_SCANONETERM) ){
|
|
fts5SegIterAllocTombstone(p, pIter);
|
|
}
|
|
|
|
/* Either:
|
|
**
|
|
** 1) an error has occurred, or
|
|
** 2) the iterator points to EOF, or
|
|
** 3) the iterator points to an entry with term (pTerm/nTerm), or
|
|
** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
|
|
** to an entry with a term greater than or equal to (pTerm/nTerm).
|
|
*/
|
|
assert_nc( p->rc!=SQLITE_OK /* 1 */
|
|
|| pIter->pLeaf==0 /* 2 */
|
|
|| fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */
|
|
|| (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */
|
|
);
|
|
}
|
|
|
|
|
|
/*
|
|
** SQL used by fts5SegIterNextInit() to find the page to open.
|
|
*/
|
|
static sqlite3_stmt *fts5IdxNextStmt(Fts5Index *p){
|
|
if( p->pIdxNextSelect==0 ){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
fts5IndexPrepareStmt(p, &p->pIdxNextSelect, sqlite3_mprintf(
|
|
"SELECT pgno FROM '%q'.'%q_idx' WHERE "
|
|
"segid=? AND term>? ORDER BY term ASC LIMIT 1",
|
|
pConfig->zDb, pConfig->zName
|
|
));
|
|
|
|
}
|
|
return p->pIdxNextSelect;
|
|
}
|
|
|
|
/*
|
|
** This is similar to fts5SegIterSeekInit(), except that it initializes
|
|
** the segment iterator to point to the first term following the page
|
|
** with pToken/nToken on it.
|
|
*/
|
|
static void fts5SegIterNextInit(
|
|
Fts5Index *p,
|
|
const char *pTerm, int nTerm,
|
|
Fts5StructureSegment *pSeg, /* Description of segment */
|
|
Fts5SegIter *pIter /* Object to populate */
|
|
){
|
|
int iPg = -1; /* Page of segment to open */
|
|
int bDlidx = 0;
|
|
sqlite3_stmt *pSel = 0; /* SELECT to find iPg */
|
|
|
|
pSel = fts5IdxNextStmt(p);
|
|
if( pSel ){
|
|
assert( p->rc==SQLITE_OK );
|
|
sqlite3_bind_int(pSel, 1, pSeg->iSegid);
|
|
sqlite3_bind_blob(pSel, 2, pTerm, nTerm, SQLITE_STATIC);
|
|
|
|
if( sqlite3_step(pSel)==SQLITE_ROW ){
|
|
i64 val = sqlite3_column_int64(pSel, 0);
|
|
iPg = (int)(val>>1);
|
|
bDlidx = (val & 0x0001);
|
|
}
|
|
p->rc = sqlite3_reset(pSel);
|
|
sqlite3_bind_null(pSel, 2);
|
|
if( p->rc ) return;
|
|
}
|
|
|
|
memset(pIter, 0, sizeof(*pIter));
|
|
pIter->pSeg = pSeg;
|
|
pIter->flags |= FTS5_SEGITER_ONETERM;
|
|
if( iPg>=0 ){
|
|
pIter->iLeafPgno = iPg - 1;
|
|
fts5SegIterNextPage(p, pIter);
|
|
fts5SegIterSetNext(p, pIter);
|
|
}
|
|
if( pIter->pLeaf ){
|
|
const u8 *a = pIter->pLeaf->p;
|
|
int iTermOff = 0;
|
|
|
|
pIter->iPgidxOff = pIter->pLeaf->szLeaf;
|
|
pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], iTermOff);
|
|
pIter->iLeafOffset = iTermOff;
|
|
fts5SegIterLoadTerm(p, pIter, 0);
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
if( bDlidx ) fts5SegIterLoadDlidx(p, pIter);
|
|
|
|
assert( p->rc!=SQLITE_OK ||
|
|
fts5BufferCompareBlob(&pIter->term, (const u8*)pTerm, nTerm)>0
|
|
);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Initialize the object pIter to point to term pTerm/nTerm within the
|
|
** in-memory hash table. If there is no such term in the hash-table, the
|
|
** iterator is set to EOF.
|
|
**
|
|
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
|
|
** an error has already occurred when this function is called, it is a no-op.
|
|
*/
|
|
static void fts5SegIterHashInit(
|
|
Fts5Index *p, /* FTS5 backend */
|
|
const u8 *pTerm, int nTerm, /* Term to seek to */
|
|
int flags, /* Mask of FTS5INDEX_XXX flags */
|
|
Fts5SegIter *pIter /* Object to populate */
|
|
){
|
|
int nList = 0;
|
|
const u8 *z = 0;
|
|
int n = 0;
|
|
Fts5Data *pLeaf = 0;
|
|
|
|
assert( p->pHash );
|
|
assert( p->rc==SQLITE_OK );
|
|
|
|
if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
|
|
const u8 *pList = 0;
|
|
|
|
p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
|
|
sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &n, &pList, &nList);
|
|
if( pList ){
|
|
pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
|
|
if( pLeaf ){
|
|
pLeaf->p = (u8*)pList;
|
|
}
|
|
}
|
|
|
|
/* The call to sqlite3Fts5HashScanInit() causes the hash table to
|
|
** fill the size field of all existing position lists. This means they
|
|
** can no longer be appended to. Since the only scenario in which they
|
|
** can be appended to is if the previous operation on this table was
|
|
** a DELETE, by clearing the Fts5Index.bDelete flag we can avoid this
|
|
** possibility altogether. */
|
|
p->bDelete = 0;
|
|
}else{
|
|
p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data),
|
|
(const char*)pTerm, nTerm, (void**)&pLeaf, &nList
|
|
);
|
|
if( pLeaf ){
|
|
pLeaf->p = (u8*)&pLeaf[1];
|
|
}
|
|
z = pTerm;
|
|
n = nTerm;
|
|
pIter->flags |= FTS5_SEGITER_ONETERM;
|
|
}
|
|
|
|
if( pLeaf ){
|
|
sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
|
|
pLeaf->nn = pLeaf->szLeaf = nList;
|
|
pIter->pLeaf = pLeaf;
|
|
pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
|
|
pIter->iEndofDoclist = pLeaf->nn;
|
|
|
|
if( flags & FTS5INDEX_QUERY_DESC ){
|
|
pIter->flags |= FTS5_SEGITER_REVERSE;
|
|
fts5SegIterReverseInitPage(p, pIter);
|
|
}else{
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
}
|
|
}
|
|
|
|
fts5SegIterSetNext(p, pIter);
|
|
}
|
|
|
|
/*
|
|
** Array ap[] contains n elements. Release each of these elements using
|
|
** fts5DataRelease(). Then free the array itself using sqlite3_free().
|
|
*/
|
|
static void fts5IndexFreeArray(Fts5Data **ap, int n){
|
|
if( ap ){
|
|
int ii;
|
|
for(ii=0; ii<n; ii++){
|
|
fts5DataRelease(ap[ii]);
|
|
}
|
|
sqlite3_free(ap);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Decrement the ref-count of the object passed as the only argument. If it
|
|
** reaches 0, free it and its contents.
|
|
*/
|
|
static void fts5TombstoneArrayDelete(Fts5TombstoneArray *p){
|
|
if( p ){
|
|
p->nRef--;
|
|
if( p->nRef<=0 ){
|
|
int ii;
|
|
for(ii=0; ii<p->nTombstone; ii++){
|
|
fts5DataRelease(p->apTombstone[ii]);
|
|
}
|
|
sqlite3_free(p);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Zero the iterator passed as the only argument.
|
|
*/
|
|
static void fts5SegIterClear(Fts5SegIter *pIter){
|
|
fts5BufferFree(&pIter->term);
|
|
fts5DataRelease(pIter->pLeaf);
|
|
fts5DataRelease(pIter->pNextLeaf);
|
|
fts5TombstoneArrayDelete(pIter->pTombArray);
|
|
fts5DlidxIterFree(pIter->pDlidx);
|
|
sqlite3_free(pIter->aRowidOffset);
|
|
memset(pIter, 0, sizeof(Fts5SegIter));
|
|
}
|
|
|
|
#ifdef SQLITE_DEBUG
|
|
|
|
/*
|
|
** This function is used as part of the big assert() procedure implemented by
|
|
** fts5AssertMultiIterSetup(). It ensures that the result currently stored
|
|
** in *pRes is the correct result of comparing the current positions of the
|
|
** two iterators.
|
|
*/
|
|
static void fts5AssertComparisonResult(
|
|
Fts5Iter *pIter,
|
|
Fts5SegIter *p1,
|
|
Fts5SegIter *p2,
|
|
Fts5CResult *pRes
|
|
){
|
|
int i1 = p1 - pIter->aSeg;
|
|
int i2 = p2 - pIter->aSeg;
|
|
|
|
if( p1->pLeaf || p2->pLeaf ){
|
|
if( p1->pLeaf==0 ){
|
|
assert( pRes->iFirst==i2 );
|
|
}else if( p2->pLeaf==0 ){
|
|
assert( pRes->iFirst==i1 );
|
|
}else{
|
|
int nMin = MIN(p1->term.n, p2->term.n);
|
|
int res = fts5Memcmp(p1->term.p, p2->term.p, nMin);
|
|
if( res==0 ) res = p1->term.n - p2->term.n;
|
|
|
|
if( res==0 ){
|
|
assert( pRes->bTermEq==1 );
|
|
assert( p1->iRowid!=p2->iRowid );
|
|
res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
|
|
}else{
|
|
assert( pRes->bTermEq==0 );
|
|
}
|
|
|
|
if( res<0 ){
|
|
assert( pRes->iFirst==i1 );
|
|
}else{
|
|
assert( pRes->iFirst==i2 );
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** This function is a no-op unless SQLITE_DEBUG is defined when this module
|
|
** is compiled. In that case, this function is essentially an assert()
|
|
** statement used to verify that the contents of the pIter->aFirst[] array
|
|
** are correct.
|
|
*/
|
|
static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
|
|
if( p->rc==SQLITE_OK ){
|
|
Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
|
|
int i;
|
|
|
|
assert( (pFirst->pLeaf==0)==pIter->base.bEof );
|
|
|
|
/* Check that pIter->iSwitchRowid is set correctly. */
|
|
for(i=0; i<pIter->nSeg; i++){
|
|
Fts5SegIter *p1 = &pIter->aSeg[i];
|
|
assert( p1==pFirst
|
|
|| p1->pLeaf==0
|
|
|| fts5BufferCompare(&pFirst->term, &p1->term)
|
|
|| p1->iRowid==pIter->iSwitchRowid
|
|
|| (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
|
|
);
|
|
}
|
|
|
|
for(i=0; i<pIter->nSeg; i+=2){
|
|
Fts5SegIter *p1 = &pIter->aSeg[i];
|
|
Fts5SegIter *p2 = &pIter->aSeg[i+1];
|
|
Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
|
|
fts5AssertComparisonResult(pIter, p1, p2, pRes);
|
|
}
|
|
|
|
for(i=1; i<(pIter->nSeg / 2); i+=2){
|
|
Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
|
|
Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
|
|
Fts5CResult *pRes = &pIter->aFirst[i];
|
|
fts5AssertComparisonResult(pIter, p1, p2, pRes);
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
# define fts5AssertMultiIterSetup(x,y)
|
|
#endif
|
|
|
|
/*
|
|
** Do the comparison necessary to populate pIter->aFirst[iOut].
|
|
**
|
|
** If the returned value is non-zero, then it is the index of an entry
|
|
** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
|
|
** to a key that is a duplicate of another, higher priority,
|
|
** segment-iterator in the pSeg->aSeg[] array.
|
|
*/
|
|
static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
|
|
int i1; /* Index of left-hand Fts5SegIter */
|
|
int i2; /* Index of right-hand Fts5SegIter */
|
|
int iRes;
|
|
Fts5SegIter *p1; /* Left-hand Fts5SegIter */
|
|
Fts5SegIter *p2; /* Right-hand Fts5SegIter */
|
|
Fts5CResult *pRes = &pIter->aFirst[iOut];
|
|
|
|
assert( iOut<pIter->nSeg && iOut>0 );
|
|
assert( pIter->bRev==0 || pIter->bRev==1 );
|
|
|
|
if( iOut>=(pIter->nSeg/2) ){
|
|
i1 = (iOut - pIter->nSeg/2) * 2;
|
|
i2 = i1 + 1;
|
|
}else{
|
|
i1 = pIter->aFirst[iOut*2].iFirst;
|
|
i2 = pIter->aFirst[iOut*2+1].iFirst;
|
|
}
|
|
p1 = &pIter->aSeg[i1];
|
|
p2 = &pIter->aSeg[i2];
|
|
|
|
pRes->bTermEq = 0;
|
|
if( p1->pLeaf==0 ){ /* If p1 is at EOF */
|
|
iRes = i2;
|
|
}else if( p2->pLeaf==0 ){ /* If p2 is at EOF */
|
|
iRes = i1;
|
|
}else{
|
|
int res = fts5BufferCompare(&p1->term, &p2->term);
|
|
if( res==0 ){
|
|
assert_nc( i2>i1 );
|
|
assert_nc( i2!=0 );
|
|
pRes->bTermEq = 1;
|
|
if( p1->iRowid==p2->iRowid ){
|
|
return i2;
|
|
}
|
|
res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
|
|
}
|
|
assert( res!=0 );
|
|
if( res<0 ){
|
|
iRes = i1;
|
|
}else{
|
|
iRes = i2;
|
|
}
|
|
}
|
|
|
|
pRes->iFirst = (u16)iRes;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
|
|
** It is an error if leaf iLeafPgno does not exist. Unless the db is
|
|
** a 'secure-delete' db, if it contains no rowids then this is also an error.
|
|
*/
|
|
static void fts5SegIterGotoPage(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5SegIter *pIter, /* Iterator to advance */
|
|
int iLeafPgno
|
|
){
|
|
assert( iLeafPgno>pIter->iLeafPgno );
|
|
|
|
if( iLeafPgno>pIter->pSeg->pgnoLast ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else{
|
|
fts5DataRelease(pIter->pNextLeaf);
|
|
pIter->pNextLeaf = 0;
|
|
pIter->iLeafPgno = iLeafPgno-1;
|
|
|
|
while( p->rc==SQLITE_OK ){
|
|
int iOff;
|
|
fts5SegIterNextPage(p, pIter);
|
|
if( pIter->pLeaf==0 ) break;
|
|
iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
|
|
if( iOff>0 ){
|
|
u8 *a = pIter->pLeaf->p;
|
|
int n = pIter->pLeaf->szLeaf;
|
|
if( iOff<4 || iOff>=n ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else{
|
|
iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
|
|
pIter->iLeafOffset = iOff;
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Advance the iterator passed as the second argument until it is at or
|
|
** past rowid iFrom. Regardless of the value of iFrom, the iterator is
|
|
** always advanced at least once.
|
|
*/
|
|
static void fts5SegIterNextFrom(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5SegIter *pIter, /* Iterator to advance */
|
|
i64 iMatch /* Advance iterator at least this far */
|
|
){
|
|
int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
|
|
Fts5DlidxIter *pDlidx = pIter->pDlidx;
|
|
int iLeafPgno = pIter->iLeafPgno;
|
|
int bMove = 1;
|
|
|
|
assert( pIter->flags & FTS5_SEGITER_ONETERM );
|
|
assert( pIter->pDlidx );
|
|
assert( pIter->pLeaf );
|
|
|
|
if( bRev==0 ){
|
|
while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
|
|
iLeafPgno = fts5DlidxIterPgno(pDlidx);
|
|
fts5DlidxIterNext(p, pDlidx);
|
|
}
|
|
assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
|
|
if( iLeafPgno>pIter->iLeafPgno ){
|
|
fts5SegIterGotoPage(p, pIter, iLeafPgno);
|
|
bMove = 0;
|
|
}
|
|
}else{
|
|
assert( pIter->pNextLeaf==0 );
|
|
assert( iMatch<pIter->iRowid );
|
|
while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
|
|
fts5DlidxIterPrev(p, pDlidx);
|
|
}
|
|
iLeafPgno = fts5DlidxIterPgno(pDlidx);
|
|
|
|
assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
|
|
|
|
if( iLeafPgno<pIter->iLeafPgno ){
|
|
pIter->iLeafPgno = iLeafPgno+1;
|
|
fts5SegIterReverseNewPage(p, pIter);
|
|
bMove = 0;
|
|
}
|
|
}
|
|
|
|
do{
|
|
if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
|
|
if( pIter->pLeaf==0 ) break;
|
|
if( bRev==0 && pIter->iRowid>=iMatch ) break;
|
|
if( bRev!=0 && pIter->iRowid<=iMatch ) break;
|
|
bMove = 1;
|
|
}while( p->rc==SQLITE_OK );
|
|
}
|
|
|
|
/*
|
|
** Free the iterator object passed as the second argument.
|
|
*/
|
|
static void fts5MultiIterFree(Fts5Iter *pIter){
|
|
if( pIter ){
|
|
int i;
|
|
for(i=0; i<pIter->nSeg; i++){
|
|
fts5SegIterClear(&pIter->aSeg[i]);
|
|
}
|
|
fts5BufferFree(&pIter->poslist);
|
|
sqlite3_free(pIter);
|
|
}
|
|
}
|
|
|
|
static void fts5MultiIterAdvanced(
|
|
Fts5Index *p, /* FTS5 backend to iterate within */
|
|
Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
|
|
int iChanged, /* Index of sub-iterator just advanced */
|
|
int iMinset /* Minimum entry in aFirst[] to set */
|
|
){
|
|
int i;
|
|
for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
|
|
int iEq;
|
|
if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
|
|
Fts5SegIter *pSeg = &pIter->aSeg[iEq];
|
|
assert( p->rc==SQLITE_OK );
|
|
pSeg->xNext(p, pSeg, 0);
|
|
i = pIter->nSeg + iEq;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Sub-iterator iChanged of iterator pIter has just been advanced. It still
|
|
** points to the same term though - just a different rowid. This function
|
|
** attempts to update the contents of the pIter->aFirst[] accordingly.
|
|
** If it does so successfully, 0 is returned. Otherwise 1.
|
|
**
|
|
** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
|
|
** on the iterator instead. That function does the same as this one, except
|
|
** that it deals with more complicated cases as well.
|
|
*/
|
|
static int fts5MultiIterAdvanceRowid(
|
|
Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
|
|
int iChanged, /* Index of sub-iterator just advanced */
|
|
Fts5SegIter **ppFirst
|
|
){
|
|
Fts5SegIter *pNew = &pIter->aSeg[iChanged];
|
|
|
|
if( pNew->iRowid==pIter->iSwitchRowid
|
|
|| (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
|
|
){
|
|
int i;
|
|
Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
|
|
pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
|
|
for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
|
|
Fts5CResult *pRes = &pIter->aFirst[i];
|
|
|
|
assert( pNew->pLeaf );
|
|
assert( pRes->bTermEq==0 || pOther->pLeaf );
|
|
|
|
if( pRes->bTermEq ){
|
|
if( pNew->iRowid==pOther->iRowid ){
|
|
return 1;
|
|
}else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
|
|
pIter->iSwitchRowid = pOther->iRowid;
|
|
pNew = pOther;
|
|
}else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
|
|
pIter->iSwitchRowid = pOther->iRowid;
|
|
}
|
|
}
|
|
pRes->iFirst = (u16)(pNew - pIter->aSeg);
|
|
if( i==1 ) break;
|
|
|
|
pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
|
|
}
|
|
}
|
|
|
|
*ppFirst = pNew;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
** Set the pIter->bEof variable based on the state of the sub-iterators.
|
|
*/
|
|
static void fts5MultiIterSetEof(Fts5Iter *pIter){
|
|
Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
|
|
pIter->base.bEof = pSeg->pLeaf==0;
|
|
pIter->iSwitchRowid = pSeg->iRowid;
|
|
}
|
|
|
|
/*
|
|
** The argument to this macro must be an Fts5Data structure containing a
|
|
** tombstone hash page. This macro returns the key-size of the hash-page.
|
|
*/
|
|
#define TOMBSTONE_KEYSIZE(pPg) (pPg->p[0]==4 ? 4 : 8)
|
|
|
|
#define TOMBSTONE_NSLOT(pPg) \
|
|
((pPg->nn > 16) ? ((pPg->nn-8) / TOMBSTONE_KEYSIZE(pPg)) : 1)
|
|
|
|
/*
|
|
** Query a single tombstone hash table for rowid iRowid. Return true if
|
|
** it is found or false otherwise. The tombstone hash table is one of
|
|
** nHashTable tables.
|
|
*/
|
|
static int fts5IndexTombstoneQuery(
|
|
Fts5Data *pHash, /* Hash table page to query */
|
|
int nHashTable, /* Number of pages attached to segment */
|
|
u64 iRowid /* Rowid to query hash for */
|
|
){
|
|
const int szKey = TOMBSTONE_KEYSIZE(pHash);
|
|
const int nSlot = TOMBSTONE_NSLOT(pHash);
|
|
int iSlot = (iRowid / nHashTable) % nSlot;
|
|
int nCollide = nSlot;
|
|
|
|
if( iRowid==0 ){
|
|
return pHash->p[1];
|
|
}else if( szKey==4 ){
|
|
u32 *aSlot = (u32*)&pHash->p[8];
|
|
while( aSlot[iSlot] ){
|
|
if( fts5GetU32((u8*)&aSlot[iSlot])==iRowid ) return 1;
|
|
if( nCollide--==0 ) break;
|
|
iSlot = (iSlot+1)%nSlot;
|
|
}
|
|
}else{
|
|
u64 *aSlot = (u64*)&pHash->p[8];
|
|
while( aSlot[iSlot] ){
|
|
if( fts5GetU64((u8*)&aSlot[iSlot])==iRowid ) return 1;
|
|
if( nCollide--==0 ) break;
|
|
iSlot = (iSlot+1)%nSlot;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
** Return true if the iterator passed as the only argument points
|
|
** to an segment entry for which there is a tombstone. Return false
|
|
** if there is no tombstone or if the iterator is already at EOF.
|
|
*/
|
|
static int fts5MultiIterIsDeleted(Fts5Iter *pIter){
|
|
int iFirst = pIter->aFirst[1].iFirst;
|
|
Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
|
|
Fts5TombstoneArray *pArray = pSeg->pTombArray;
|
|
|
|
if( pSeg->pLeaf && pArray ){
|
|
/* Figure out which page the rowid might be present on. */
|
|
int iPg = ((u64)pSeg->iRowid) % pArray->nTombstone;
|
|
assert( iPg>=0 );
|
|
|
|
/* If tombstone hash page iPg has not yet been loaded from the
|
|
** database, load it now. */
|
|
if( pArray->apTombstone[iPg]==0 ){
|
|
pArray->apTombstone[iPg] = fts5DataRead(pIter->pIndex,
|
|
FTS5_TOMBSTONE_ROWID(pSeg->pSeg->iSegid, iPg)
|
|
);
|
|
if( pArray->apTombstone[iPg]==0 ) return 0;
|
|
}
|
|
|
|
return fts5IndexTombstoneQuery(
|
|
pArray->apTombstone[iPg],
|
|
pArray->nTombstone,
|
|
pSeg->iRowid
|
|
);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
** Move the iterator to the next entry.
|
|
**
|
|
** If an error occurs, an error code is left in Fts5Index.rc. It is not
|
|
** considered an error if the iterator reaches EOF, or if it is already at
|
|
** EOF when this function is called.
|
|
*/
|
|
static void fts5MultiIterNext(
|
|
Fts5Index *p,
|
|
Fts5Iter *pIter,
|
|
int bFrom, /* True if argument iFrom is valid */
|
|
i64 iFrom /* Advance at least as far as this */
|
|
){
|
|
int bUseFrom = bFrom;
|
|
assert( pIter->base.bEof==0 );
|
|
while( p->rc==SQLITE_OK ){
|
|
int iFirst = pIter->aFirst[1].iFirst;
|
|
int bNewTerm = 0;
|
|
Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
|
|
assert( p->rc==SQLITE_OK );
|
|
if( bUseFrom && pSeg->pDlidx ){
|
|
fts5SegIterNextFrom(p, pSeg, iFrom);
|
|
}else{
|
|
pSeg->xNext(p, pSeg, &bNewTerm);
|
|
}
|
|
|
|
if( pSeg->pLeaf==0 || bNewTerm
|
|
|| fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
|
|
){
|
|
fts5MultiIterAdvanced(p, pIter, iFirst, 1);
|
|
fts5MultiIterSetEof(pIter);
|
|
pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
|
|
if( pSeg->pLeaf==0 ) return;
|
|
}
|
|
|
|
fts5AssertMultiIterSetup(p, pIter);
|
|
assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
|
|
if( (pIter->bSkipEmpty==0 || pSeg->nPos)
|
|
&& 0==fts5MultiIterIsDeleted(pIter)
|
|
){
|
|
pIter->xSetOutputs(pIter, pSeg);
|
|
return;
|
|
}
|
|
bUseFrom = 0;
|
|
}
|
|
}
|
|
|
|
static void fts5MultiIterNext2(
|
|
Fts5Index *p,
|
|
Fts5Iter *pIter,
|
|
int *pbNewTerm /* OUT: True if *might* be new term */
|
|
){
|
|
assert( pIter->bSkipEmpty );
|
|
if( p->rc==SQLITE_OK ){
|
|
*pbNewTerm = 0;
|
|
do{
|
|
int iFirst = pIter->aFirst[1].iFirst;
|
|
Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
|
|
int bNewTerm = 0;
|
|
|
|
assert( p->rc==SQLITE_OK );
|
|
pSeg->xNext(p, pSeg, &bNewTerm);
|
|
if( pSeg->pLeaf==0 || bNewTerm
|
|
|| fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
|
|
){
|
|
fts5MultiIterAdvanced(p, pIter, iFirst, 1);
|
|
fts5MultiIterSetEof(pIter);
|
|
*pbNewTerm = 1;
|
|
}
|
|
fts5AssertMultiIterSetup(p, pIter);
|
|
|
|
}while( (fts5MultiIterIsEmpty(p, pIter) || fts5MultiIterIsDeleted(pIter))
|
|
&& (p->rc==SQLITE_OK)
|
|
);
|
|
}
|
|
}
|
|
|
|
static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
|
|
UNUSED_PARAM2(pUnused1, pUnused2);
|
|
}
|
|
|
|
static Fts5Iter *fts5MultiIterAlloc(
|
|
Fts5Index *p, /* FTS5 backend to iterate within */
|
|
int nSeg
|
|
){
|
|
Fts5Iter *pNew;
|
|
i64 nSlot; /* Power of two >= nSeg */
|
|
|
|
for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
|
|
pNew = fts5IdxMalloc(p,
|
|
sizeof(Fts5Iter) + /* pNew */
|
|
sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */
|
|
sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */
|
|
);
|
|
if( pNew ){
|
|
pNew->nSeg = nSlot;
|
|
pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
|
|
pNew->pIndex = p;
|
|
pNew->xSetOutputs = fts5IterSetOutputs_Noop;
|
|
}
|
|
return pNew;
|
|
}
|
|
|
|
static void fts5PoslistCallback(
|
|
Fts5Index *pUnused,
|
|
void *pContext,
|
|
const u8 *pChunk, int nChunk
|
|
){
|
|
UNUSED_PARAM(pUnused);
|
|
assert_nc( nChunk>=0 );
|
|
if( nChunk>0 ){
|
|
fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
|
|
}
|
|
}
|
|
|
|
typedef struct PoslistCallbackCtx PoslistCallbackCtx;
|
|
struct PoslistCallbackCtx {
|
|
Fts5Buffer *pBuf; /* Append to this buffer */
|
|
Fts5Colset *pColset; /* Restrict matches to this column */
|
|
int eState; /* See above */
|
|
};
|
|
|
|
typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
|
|
struct PoslistOffsetsCtx {
|
|
Fts5Buffer *pBuf; /* Append to this buffer */
|
|
Fts5Colset *pColset; /* Restrict matches to this column */
|
|
int iRead;
|
|
int iWrite;
|
|
};
|
|
|
|
/*
|
|
** TODO: Make this more efficient!
|
|
*/
|
|
static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
|
|
int i;
|
|
for(i=0; i<pColset->nCol; i++){
|
|
if( pColset->aiCol[i]==iCol ) return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void fts5PoslistOffsetsCallback(
|
|
Fts5Index *pUnused,
|
|
void *pContext,
|
|
const u8 *pChunk, int nChunk
|
|
){
|
|
PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
|
|
UNUSED_PARAM(pUnused);
|
|
assert_nc( nChunk>=0 );
|
|
if( nChunk>0 ){
|
|
int i = 0;
|
|
while( i<nChunk ){
|
|
int iVal;
|
|
i += fts5GetVarint32(&pChunk[i], iVal);
|
|
iVal += pCtx->iRead - 2;
|
|
pCtx->iRead = iVal;
|
|
if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
|
|
fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
|
|
pCtx->iWrite = iVal;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void fts5PoslistFilterCallback(
|
|
Fts5Index *pUnused,
|
|
void *pContext,
|
|
const u8 *pChunk, int nChunk
|
|
){
|
|
PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
|
|
UNUSED_PARAM(pUnused);
|
|
assert_nc( nChunk>=0 );
|
|
if( nChunk>0 ){
|
|
/* Search through to find the first varint with value 1. This is the
|
|
** start of the next columns hits. */
|
|
int i = 0;
|
|
int iStart = 0;
|
|
|
|
if( pCtx->eState==2 ){
|
|
int iCol;
|
|
fts5FastGetVarint32(pChunk, i, iCol);
|
|
if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
|
|
pCtx->eState = 1;
|
|
fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
|
|
}else{
|
|
pCtx->eState = 0;
|
|
}
|
|
}
|
|
|
|
do {
|
|
while( i<nChunk && pChunk[i]!=0x01 ){
|
|
while( pChunk[i] & 0x80 ) i++;
|
|
i++;
|
|
}
|
|
if( pCtx->eState ){
|
|
fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
|
|
}
|
|
if( i<nChunk ){
|
|
int iCol;
|
|
iStart = i;
|
|
i++;
|
|
if( i>=nChunk ){
|
|
pCtx->eState = 2;
|
|
}else{
|
|
fts5FastGetVarint32(pChunk, i, iCol);
|
|
pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
|
|
if( pCtx->eState ){
|
|
fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
|
|
iStart = i;
|
|
}
|
|
}
|
|
}
|
|
}while( i<nChunk );
|
|
}
|
|
}
|
|
|
|
static void fts5ChunkIterate(
|
|
Fts5Index *p, /* Index object */
|
|
Fts5SegIter *pSeg, /* Poslist of this iterator */
|
|
void *pCtx, /* Context pointer for xChunk callback */
|
|
void (*xChunk)(Fts5Index*, void*, const u8*, int)
|
|
){
|
|
int nRem = pSeg->nPos; /* Number of bytes still to come */
|
|
Fts5Data *pData = 0;
|
|
u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
|
|
int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
|
|
int pgno = pSeg->iLeafPgno;
|
|
int pgnoSave = 0;
|
|
|
|
/* This function does not work with detail=none databases. */
|
|
assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
|
|
|
|
if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
|
|
pgnoSave = pgno+1;
|
|
}
|
|
|
|
while( 1 ){
|
|
xChunk(p, pCtx, pChunk, nChunk);
|
|
nRem -= nChunk;
|
|
fts5DataRelease(pData);
|
|
if( nRem<=0 ){
|
|
break;
|
|
}else if( pSeg->pSeg==0 ){
|
|
p->rc = FTS5_CORRUPT;
|
|
return;
|
|
}else{
|
|
pgno++;
|
|
pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
|
|
if( pData==0 ) break;
|
|
pChunk = &pData->p[4];
|
|
nChunk = MIN(nRem, pData->szLeaf - 4);
|
|
if( pgno==pgnoSave ){
|
|
assert( pSeg->pNextLeaf==0 );
|
|
pSeg->pNextLeaf = pData;
|
|
pData = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Iterator pIter currently points to a valid entry (not EOF). This
|
|
** function appends the position list data for the current entry to
|
|
** buffer pBuf. It does not make a copy of the position-list size
|
|
** field.
|
|
*/
|
|
static void fts5SegiterPoslist(
|
|
Fts5Index *p,
|
|
Fts5SegIter *pSeg,
|
|
Fts5Colset *pColset,
|
|
Fts5Buffer *pBuf
|
|
){
|
|
assert( pBuf!=0 );
|
|
assert( pSeg!=0 );
|
|
if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING) ){
|
|
assert( pBuf->p!=0 );
|
|
assert( pBuf->nSpace >= pBuf->n+pSeg->nPos+FTS5_DATA_ZERO_PADDING );
|
|
memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING);
|
|
if( pColset==0 ){
|
|
fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
|
|
}else{
|
|
if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
|
|
PoslistCallbackCtx sCtx;
|
|
sCtx.pBuf = pBuf;
|
|
sCtx.pColset = pColset;
|
|
sCtx.eState = fts5IndexColsetTest(pColset, 0);
|
|
assert( sCtx.eState==0 || sCtx.eState==1 );
|
|
fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
|
|
}else{
|
|
PoslistOffsetsCtx sCtx;
|
|
memset(&sCtx, 0, sizeof(sCtx));
|
|
sCtx.pBuf = pBuf;
|
|
sCtx.pColset = pColset;
|
|
fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Parameter pPos points to a buffer containing a position list, size nPos.
|
|
** This function filters it according to pColset (which must be non-NULL)
|
|
** and sets pIter->base.pData/nData to point to the new position list.
|
|
** If memory is required for the new position list, use buffer pIter->poslist.
|
|
** Or, if the new position list is a contiguous subset of the input, set
|
|
** pIter->base.pData/nData to point directly to it.
|
|
**
|
|
** This function is a no-op if *pRc is other than SQLITE_OK when it is
|
|
** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM
|
|
** before returning.
|
|
*/
|
|
static void fts5IndexExtractColset(
|
|
int *pRc,
|
|
Fts5Colset *pColset, /* Colset to filter on */
|
|
const u8 *pPos, int nPos, /* Position list */
|
|
Fts5Iter *pIter
|
|
){
|
|
if( *pRc==SQLITE_OK ){
|
|
const u8 *p = pPos;
|
|
const u8 *aCopy = p;
|
|
const u8 *pEnd = &p[nPos]; /* One byte past end of position list */
|
|
int i = 0;
|
|
int iCurrent = 0;
|
|
|
|
if( pColset->nCol>1 && sqlite3Fts5BufferSize(pRc, &pIter->poslist, nPos) ){
|
|
return;
|
|
}
|
|
|
|
while( 1 ){
|
|
while( pColset->aiCol[i]<iCurrent ){
|
|
i++;
|
|
if( i==pColset->nCol ){
|
|
pIter->base.pData = pIter->poslist.p;
|
|
pIter->base.nData = pIter->poslist.n;
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* Advance pointer p until it points to pEnd or an 0x01 byte that is
|
|
** not part of a varint */
|
|
while( p<pEnd && *p!=0x01 ){
|
|
while( *p++ & 0x80 );
|
|
}
|
|
|
|
if( pColset->aiCol[i]==iCurrent ){
|
|
if( pColset->nCol==1 ){
|
|
pIter->base.pData = aCopy;
|
|
pIter->base.nData = p-aCopy;
|
|
return;
|
|
}
|
|
fts5BufferSafeAppendBlob(&pIter->poslist, aCopy, p-aCopy);
|
|
}
|
|
if( p>=pEnd ){
|
|
pIter->base.pData = pIter->poslist.p;
|
|
pIter->base.nData = pIter->poslist.n;
|
|
return;
|
|
}
|
|
aCopy = p++;
|
|
iCurrent = *p++;
|
|
if( iCurrent & 0x80 ){
|
|
p--;
|
|
p += fts5GetVarint32(p, iCurrent);
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
** xSetOutputs callback used by detail=none tables.
|
|
*/
|
|
static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
|
|
assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
|
|
pIter->base.iRowid = pSeg->iRowid;
|
|
pIter->base.nData = pSeg->nPos;
|
|
}
|
|
|
|
/*
|
|
** xSetOutputs callback used by detail=full and detail=col tables when no
|
|
** column filters are specified.
|
|
*/
|
|
static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
|
|
pIter->base.iRowid = pSeg->iRowid;
|
|
pIter->base.nData = pSeg->nPos;
|
|
|
|
assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
|
|
assert( pIter->pColset==0 );
|
|
|
|
if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
|
|
/* All data is stored on the current page. Populate the output
|
|
** variables to point into the body of the page object. */
|
|
pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
|
|
}else{
|
|
/* The data is distributed over two or more pages. Copy it into the
|
|
** Fts5Iter.poslist buffer and then set the output pointer to point
|
|
** to this buffer. */
|
|
fts5BufferZero(&pIter->poslist);
|
|
fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
|
|
pIter->base.pData = pIter->poslist.p;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
|
|
** against no columns at all).
|
|
*/
|
|
static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
|
|
UNUSED_PARAM(pSeg);
|
|
pIter->base.nData = 0;
|
|
}
|
|
|
|
/*
|
|
** xSetOutputs callback used by detail=col when there is a column filter
|
|
** and there are 100 or more columns. Also called as a fallback from
|
|
** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
|
|
*/
|
|
static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
|
|
fts5BufferZero(&pIter->poslist);
|
|
fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
|
|
pIter->base.iRowid = pSeg->iRowid;
|
|
pIter->base.pData = pIter->poslist.p;
|
|
pIter->base.nData = pIter->poslist.n;
|
|
}
|
|
|
|
/*
|
|
** xSetOutputs callback used when:
|
|
**
|
|
** * detail=col,
|
|
** * there is a column filter, and
|
|
** * the table contains 100 or fewer columns.
|
|
**
|
|
** The last point is to ensure all column numbers are stored as
|
|
** single-byte varints.
|
|
*/
|
|
static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
|
|
|
|
assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
|
|
assert( pIter->pColset );
|
|
|
|
if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
|
|
fts5IterSetOutputs_Col(pIter, pSeg);
|
|
}else{
|
|
u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
|
|
u8 *pEnd = (u8*)&a[pSeg->nPos];
|
|
int iPrev = 0;
|
|
int *aiCol = pIter->pColset->aiCol;
|
|
int *aiColEnd = &aiCol[pIter->pColset->nCol];
|
|
|
|
u8 *aOut = pIter->poslist.p;
|
|
int iPrevOut = 0;
|
|
|
|
pIter->base.iRowid = pSeg->iRowid;
|
|
|
|
while( a<pEnd ){
|
|
iPrev += (int)a++[0] - 2;
|
|
while( *aiCol<iPrev ){
|
|
aiCol++;
|
|
if( aiCol==aiColEnd ) goto setoutputs_col_out;
|
|
}
|
|
if( *aiCol==iPrev ){
|
|
*aOut++ = (u8)((iPrev - iPrevOut) + 2);
|
|
iPrevOut = iPrev;
|
|
}
|
|
}
|
|
|
|
setoutputs_col_out:
|
|
pIter->base.pData = pIter->poslist.p;
|
|
pIter->base.nData = aOut - pIter->poslist.p;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** xSetOutputs callback used by detail=full when there is a column filter.
|
|
*/
|
|
static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
|
|
Fts5Colset *pColset = pIter->pColset;
|
|
pIter->base.iRowid = pSeg->iRowid;
|
|
|
|
assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
|
|
assert( pColset );
|
|
|
|
if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
|
|
/* All data is stored on the current page. Populate the output
|
|
** variables to point into the body of the page object. */
|
|
const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
|
|
int *pRc = &pIter->pIndex->rc;
|
|
fts5BufferZero(&pIter->poslist);
|
|
fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, pIter);
|
|
}else{
|
|
/* The data is distributed over two or more pages. Copy it into the
|
|
** Fts5Iter.poslist buffer and then set the output pointer to point
|
|
** to this buffer. */
|
|
fts5BufferZero(&pIter->poslist);
|
|
fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
|
|
pIter->base.pData = pIter->poslist.p;
|
|
pIter->base.nData = pIter->poslist.n;
|
|
}
|
|
}
|
|
|
|
static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
|
|
assert( pIter!=0 || (*pRc)!=SQLITE_OK );
|
|
if( *pRc==SQLITE_OK ){
|
|
Fts5Config *pConfig = pIter->pIndex->pConfig;
|
|
if( pConfig->eDetail==FTS5_DETAIL_NONE ){
|
|
pIter->xSetOutputs = fts5IterSetOutputs_None;
|
|
}
|
|
|
|
else if( pIter->pColset==0 ){
|
|
pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
|
|
}
|
|
|
|
else if( pIter->pColset->nCol==0 ){
|
|
pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset;
|
|
}
|
|
|
|
else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
|
|
pIter->xSetOutputs = fts5IterSetOutputs_Full;
|
|
}
|
|
|
|
else{
|
|
assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
|
|
if( pConfig->nCol<=100 ){
|
|
pIter->xSetOutputs = fts5IterSetOutputs_Col100;
|
|
sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
|
|
}else{
|
|
pIter->xSetOutputs = fts5IterSetOutputs_Col;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** All the component segment-iterators of pIter have been set up. This
|
|
** functions finishes setup for iterator pIter itself.
|
|
*/
|
|
static void fts5MultiIterFinishSetup(Fts5Index *p, Fts5Iter *pIter){
|
|
int iIter;
|
|
for(iIter=pIter->nSeg-1; iIter>0; iIter--){
|
|
int iEq;
|
|
if( (iEq = fts5MultiIterDoCompare(pIter, iIter)) ){
|
|
Fts5SegIter *pSeg = &pIter->aSeg[iEq];
|
|
if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
|
|
fts5MultiIterAdvanced(p, pIter, iEq, iIter);
|
|
}
|
|
}
|
|
fts5MultiIterSetEof(pIter);
|
|
fts5AssertMultiIterSetup(p, pIter);
|
|
|
|
if( (pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter))
|
|
|| fts5MultiIterIsDeleted(pIter)
|
|
){
|
|
fts5MultiIterNext(p, pIter, 0, 0);
|
|
}else if( pIter->base.bEof==0 ){
|
|
Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
|
|
pIter->xSetOutputs(pIter, pSeg);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Allocate a new Fts5Iter object.
|
|
**
|
|
** The new object will be used to iterate through data in structure pStruct.
|
|
** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
|
|
** is zero or greater, data from the first nSegment segments on level iLevel
|
|
** is merged.
|
|
**
|
|
** The iterator initially points to the first term/rowid entry in the
|
|
** iterated data.
|
|
*/
|
|
static void fts5MultiIterNew(
|
|
Fts5Index *p, /* FTS5 backend to iterate within */
|
|
Fts5Structure *pStruct, /* Structure of specific index */
|
|
int flags, /* FTS5INDEX_QUERY_XXX flags */
|
|
Fts5Colset *pColset, /* Colset to filter on (or NULL) */
|
|
const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */
|
|
int iLevel, /* Level to iterate (-1 for all) */
|
|
int nSegment, /* Number of segments to merge (iLevel>=0) */
|
|
Fts5Iter **ppOut /* New object */
|
|
){
|
|
int nSeg = 0; /* Number of segment-iters in use */
|
|
int iIter = 0; /* */
|
|
int iSeg; /* Used to iterate through segments */
|
|
Fts5StructureLevel *pLvl;
|
|
Fts5Iter *pNew;
|
|
|
|
assert( (pTerm==0 && nTerm==0) || iLevel<0 );
|
|
|
|
/* Allocate space for the new multi-seg-iterator. */
|
|
if( p->rc==SQLITE_OK ){
|
|
if( iLevel<0 ){
|
|
assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
|
|
nSeg = pStruct->nSegment;
|
|
nSeg += (p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH));
|
|
}else{
|
|
nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
|
|
}
|
|
}
|
|
*ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
|
|
if( pNew==0 ){
|
|
assert( p->rc!=SQLITE_OK );
|
|
goto fts5MultiIterNew_post_check;
|
|
}
|
|
pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
|
|
pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
|
|
pNew->pColset = pColset;
|
|
if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
|
|
fts5IterSetOutputCb(&p->rc, pNew);
|
|
}
|
|
|
|
/* Initialize each of the component segment iterators. */
|
|
if( p->rc==SQLITE_OK ){
|
|
if( iLevel<0 ){
|
|
Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
|
|
if( p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH) ){
|
|
/* Add a segment iterator for the current contents of the hash table. */
|
|
Fts5SegIter *pIter = &pNew->aSeg[iIter++];
|
|
fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
|
|
}
|
|
for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
|
|
for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
|
|
Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
|
|
Fts5SegIter *pIter = &pNew->aSeg[iIter++];
|
|
if( pTerm==0 ){
|
|
fts5SegIterInit(p, pSeg, pIter);
|
|
}else{
|
|
fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
|
|
}
|
|
}
|
|
}
|
|
}else{
|
|
pLvl = &pStruct->aLevel[iLevel];
|
|
for(iSeg=nSeg-1; iSeg>=0; iSeg--){
|
|
fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
|
|
}
|
|
}
|
|
assert( iIter==nSeg );
|
|
}
|
|
|
|
/* If the above was successful, each component iterator now points
|
|
** to the first entry in its segment. In this case initialize the
|
|
** aFirst[] array. Or, if an error has occurred, free the iterator
|
|
** object and set the output variable to NULL. */
|
|
if( p->rc==SQLITE_OK ){
|
|
fts5MultiIterFinishSetup(p, pNew);
|
|
}else{
|
|
fts5MultiIterFree(pNew);
|
|
*ppOut = 0;
|
|
}
|
|
|
|
fts5MultiIterNew_post_check:
|
|
assert( (*ppOut)!=0 || p->rc!=SQLITE_OK );
|
|
return;
|
|
}
|
|
|
|
/*
|
|
** Create an Fts5Iter that iterates through the doclist provided
|
|
** as the second argument.
|
|
*/
|
|
static void fts5MultiIterNew2(
|
|
Fts5Index *p, /* FTS5 backend to iterate within */
|
|
Fts5Data *pData, /* Doclist to iterate through */
|
|
int bDesc, /* True for descending rowid order */
|
|
Fts5Iter **ppOut /* New object */
|
|
){
|
|
Fts5Iter *pNew;
|
|
pNew = fts5MultiIterAlloc(p, 2);
|
|
if( pNew ){
|
|
Fts5SegIter *pIter = &pNew->aSeg[1];
|
|
pIter->flags = FTS5_SEGITER_ONETERM;
|
|
if( pData->szLeaf>0 ){
|
|
pIter->pLeaf = pData;
|
|
pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
|
|
pIter->iEndofDoclist = pData->nn;
|
|
pNew->aFirst[1].iFirst = 1;
|
|
if( bDesc ){
|
|
pNew->bRev = 1;
|
|
pIter->flags |= FTS5_SEGITER_REVERSE;
|
|
fts5SegIterReverseInitPage(p, pIter);
|
|
}else{
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
}
|
|
pData = 0;
|
|
}else{
|
|
pNew->base.bEof = 1;
|
|
}
|
|
fts5SegIterSetNext(p, pIter);
|
|
|
|
*ppOut = pNew;
|
|
}
|
|
|
|
fts5DataRelease(pData);
|
|
}
|
|
|
|
/*
|
|
** Return true if the iterator is at EOF or if an error has occurred.
|
|
** False otherwise.
|
|
*/
|
|
static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
|
|
assert( pIter!=0 || p->rc!=SQLITE_OK );
|
|
assert( p->rc!=SQLITE_OK
|
|
|| (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
|
|
);
|
|
return (p->rc || pIter->base.bEof);
|
|
}
|
|
|
|
/*
|
|
** Return the rowid of the entry that the iterator currently points
|
|
** to. If the iterator points to EOF when this function is called the
|
|
** results are undefined.
|
|
*/
|
|
static i64 fts5MultiIterRowid(Fts5Iter *pIter){
|
|
assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
|
|
return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
|
|
}
|
|
|
|
/*
|
|
** Move the iterator to the next entry at or following iMatch.
|
|
*/
|
|
static void fts5MultiIterNextFrom(
|
|
Fts5Index *p,
|
|
Fts5Iter *pIter,
|
|
i64 iMatch
|
|
){
|
|
while( 1 ){
|
|
i64 iRowid;
|
|
fts5MultiIterNext(p, pIter, 1, iMatch);
|
|
if( fts5MultiIterEof(p, pIter) ) break;
|
|
iRowid = fts5MultiIterRowid(pIter);
|
|
if( pIter->bRev==0 && iRowid>=iMatch ) break;
|
|
if( pIter->bRev!=0 && iRowid<=iMatch ) break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Return a pointer to a buffer containing the term associated with the
|
|
** entry that the iterator currently points to.
|
|
*/
|
|
static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
|
|
Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
|
|
*pn = p->term.n;
|
|
return p->term.p;
|
|
}
|
|
|
|
/*
|
|
** Allocate a new segment-id for the structure pStruct. The new segment
|
|
** id must be between 1 and 65335 inclusive, and must not be used by
|
|
** any currently existing segment. If a free segment id cannot be found,
|
|
** SQLITE_FULL is returned.
|
|
**
|
|
** If an error has already occurred, this function is a no-op. 0 is
|
|
** returned in this case.
|
|
*/
|
|
static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
|
|
int iSegid = 0;
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
|
|
p->rc = SQLITE_FULL;
|
|
}else{
|
|
/* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
|
|
** array is 63 elements, or 252 bytes, in size. */
|
|
u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
|
|
int iLvl, iSeg;
|
|
int i;
|
|
u32 mask;
|
|
memset(aUsed, 0, sizeof(aUsed));
|
|
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
|
|
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
|
|
int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
|
|
if( iId<=FTS5_MAX_SEGMENT && iId>0 ){
|
|
aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32);
|
|
}
|
|
}
|
|
}
|
|
|
|
for(i=0; aUsed[i]==0xFFFFFFFF; i++);
|
|
mask = aUsed[i];
|
|
for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++);
|
|
iSegid += 1 + i*32;
|
|
|
|
#ifdef SQLITE_DEBUG
|
|
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
|
|
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
|
|
assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
|
|
}
|
|
}
|
|
assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
|
|
|
|
{
|
|
sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
|
|
if( p->rc==SQLITE_OK ){
|
|
u8 aBlob[2] = {0xff, 0xff};
|
|
sqlite3_bind_int(pIdxSelect, 1, iSegid);
|
|
sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
|
|
assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
|
|
p->rc = sqlite3_reset(pIdxSelect);
|
|
sqlite3_bind_null(pIdxSelect, 2);
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
return iSegid;
|
|
}
|
|
|
|
/*
|
|
** Discard all data currently cached in the hash-tables.
|
|
*/
|
|
static void fts5IndexDiscardData(Fts5Index *p){
|
|
assert( p->pHash || p->nPendingData==0 );
|
|
if( p->pHash ){
|
|
sqlite3Fts5HashClear(p->pHash);
|
|
p->nPendingData = 0;
|
|
p->nPendingRow = 0;
|
|
p->flushRc = SQLITE_OK;
|
|
}
|
|
p->nContentlessDelete = 0;
|
|
}
|
|
|
|
/*
|
|
** Return the size of the prefix, in bytes, that buffer
|
|
** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
|
|
**
|
|
** Buffer (pNew/<length-unknown>) is guaranteed to be greater
|
|
** than buffer (pOld/nOld).
|
|
*/
|
|
static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
|
|
int i;
|
|
for(i=0; i<nOld; i++){
|
|
if( pOld[i]!=pNew[i] ) break;
|
|
}
|
|
return i;
|
|
}
|
|
|
|
static void fts5WriteDlidxClear(
|
|
Fts5Index *p,
|
|
Fts5SegWriter *pWriter,
|
|
int bFlush /* If true, write dlidx to disk */
|
|
){
|
|
int i;
|
|
assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
|
|
for(i=0; i<pWriter->nDlidx; i++){
|
|
Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
|
|
if( pDlidx->buf.n==0 ) break;
|
|
if( bFlush ){
|
|
assert( pDlidx->pgno!=0 );
|
|
fts5DataWrite(p,
|
|
FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
|
|
pDlidx->buf.p, pDlidx->buf.n
|
|
);
|
|
}
|
|
sqlite3Fts5BufferZero(&pDlidx->buf);
|
|
pDlidx->bPrevValid = 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
|
|
** Any new array elements are zeroed before returning.
|
|
*/
|
|
static int fts5WriteDlidxGrow(
|
|
Fts5Index *p,
|
|
Fts5SegWriter *pWriter,
|
|
int nLvl
|
|
){
|
|
if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
|
|
Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64(
|
|
pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
|
|
);
|
|
if( aDlidx==0 ){
|
|
p->rc = SQLITE_NOMEM;
|
|
}else{
|
|
size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
|
|
memset(&aDlidx[pWriter->nDlidx], 0, nByte);
|
|
pWriter->aDlidx = aDlidx;
|
|
pWriter->nDlidx = nLvl;
|
|
}
|
|
}
|
|
return p->rc;
|
|
}
|
|
|
|
/*
|
|
** If the current doclist-index accumulating in pWriter->aDlidx[] is large
|
|
** enough, flush it to disk and return 1. Otherwise discard it and return
|
|
** zero.
|
|
*/
|
|
static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
|
|
int bFlag = 0;
|
|
|
|
/* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
|
|
** to the database, also write the doclist-index to disk. */
|
|
if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
|
|
bFlag = 1;
|
|
}
|
|
fts5WriteDlidxClear(p, pWriter, bFlag);
|
|
pWriter->nEmpty = 0;
|
|
return bFlag;
|
|
}
|
|
|
|
/*
|
|
** This function is called whenever processing of the doclist for the
|
|
** last term on leaf page (pWriter->iBtPage) is completed.
|
|
**
|
|
** The doclist-index for that term is currently stored in-memory within the
|
|
** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
|
|
** writes it out to disk. Or, if it is too small to bother with, discards
|
|
** it.
|
|
**
|
|
** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
|
|
*/
|
|
static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
|
|
int bFlag;
|
|
|
|
assert( pWriter->iBtPage || pWriter->nEmpty==0 );
|
|
if( pWriter->iBtPage==0 ) return;
|
|
bFlag = fts5WriteFlushDlidx(p, pWriter);
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
|
|
/* The following was already done in fts5WriteInit(): */
|
|
/* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
|
|
sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
|
|
sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
|
|
sqlite3_step(p->pIdxWriter);
|
|
p->rc = sqlite3_reset(p->pIdxWriter);
|
|
sqlite3_bind_null(p->pIdxWriter, 2);
|
|
}
|
|
pWriter->iBtPage = 0;
|
|
}
|
|
|
|
/*
|
|
** This is called once for each leaf page except the first that contains
|
|
** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
|
|
** is larger than all terms written to earlier leaves, and equal to or
|
|
** smaller than the first term on the new leaf.
|
|
**
|
|
** If an error occurs, an error code is left in Fts5Index.rc. If an error
|
|
** has already occurred when this function is called, it is a no-op.
|
|
*/
|
|
static void fts5WriteBtreeTerm(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5SegWriter *pWriter, /* Writer object */
|
|
int nTerm, const u8 *pTerm /* First term on new page */
|
|
){
|
|
fts5WriteFlushBtree(p, pWriter);
|
|
if( p->rc==SQLITE_OK ){
|
|
fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
|
|
pWriter->iBtPage = pWriter->writer.pgno;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** This function is called when flushing a leaf page that contains no
|
|
** terms at all to disk.
|
|
*/
|
|
static void fts5WriteBtreeNoTerm(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5SegWriter *pWriter /* Writer object */
|
|
){
|
|
/* If there were no rowids on the leaf page either and the doclist-index
|
|
** has already been started, append an 0x00 byte to it. */
|
|
if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
|
|
Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
|
|
assert( pDlidx->bPrevValid );
|
|
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
|
|
}
|
|
|
|
/* Increment the "number of sequential leaves without a term" counter. */
|
|
pWriter->nEmpty++;
|
|
}
|
|
|
|
static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
|
|
i64 iRowid;
|
|
int iOff;
|
|
|
|
iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
|
|
fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
|
|
return iRowid;
|
|
}
|
|
|
|
/*
|
|
** Rowid iRowid has just been appended to the current leaf page. It is the
|
|
** first on the page. This function appends an appropriate entry to the current
|
|
** doclist-index.
|
|
*/
|
|
static void fts5WriteDlidxAppend(
|
|
Fts5Index *p,
|
|
Fts5SegWriter *pWriter,
|
|
i64 iRowid
|
|
){
|
|
int i;
|
|
int bDone = 0;
|
|
|
|
for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
|
|
i64 iVal;
|
|
Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
|
|
|
|
if( pDlidx->buf.n>=p->pConfig->pgsz ){
|
|
/* The current doclist-index page is full. Write it to disk and push
|
|
** a copy of iRowid (which will become the first rowid on the next
|
|
** doclist-index leaf page) up into the next level of the b-tree
|
|
** hierarchy. If the node being flushed is currently the root node,
|
|
** also push its first rowid upwards. */
|
|
pDlidx->buf.p[0] = 0x01; /* Not the root node */
|
|
fts5DataWrite(p,
|
|
FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
|
|
pDlidx->buf.p, pDlidx->buf.n
|
|
);
|
|
fts5WriteDlidxGrow(p, pWriter, i+2);
|
|
pDlidx = &pWriter->aDlidx[i];
|
|
if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
|
|
i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
|
|
|
|
/* This was the root node. Push its first rowid up to the new root. */
|
|
pDlidx[1].pgno = pDlidx->pgno;
|
|
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
|
|
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
|
|
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
|
|
pDlidx[1].bPrevValid = 1;
|
|
pDlidx[1].iPrev = iFirst;
|
|
}
|
|
|
|
sqlite3Fts5BufferZero(&pDlidx->buf);
|
|
pDlidx->bPrevValid = 0;
|
|
pDlidx->pgno++;
|
|
}else{
|
|
bDone = 1;
|
|
}
|
|
|
|
if( pDlidx->bPrevValid ){
|
|
iVal = (u64)iRowid - (u64)pDlidx->iPrev;
|
|
}else{
|
|
i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
|
|
assert( pDlidx->buf.n==0 );
|
|
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
|
|
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
|
|
iVal = iRowid;
|
|
}
|
|
|
|
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
|
|
pDlidx->bPrevValid = 1;
|
|
pDlidx->iPrev = iRowid;
|
|
}
|
|
}
|
|
|
|
static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
|
|
static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
|
|
Fts5PageWriter *pPage = &pWriter->writer;
|
|
i64 iRowid;
|
|
|
|
assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
|
|
|
|
/* Set the szLeaf header field. */
|
|
assert( 0==fts5GetU16(&pPage->buf.p[2]) );
|
|
fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
|
|
|
|
if( pWriter->bFirstTermInPage ){
|
|
/* No term was written to this page. */
|
|
assert( pPage->pgidx.n==0 );
|
|
fts5WriteBtreeNoTerm(p, pWriter);
|
|
}else{
|
|
/* Append the pgidx to the page buffer. Set the szLeaf header field. */
|
|
fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
|
|
}
|
|
|
|
/* Write the page out to disk */
|
|
iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
|
|
fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
|
|
|
|
/* Initialize the next page. */
|
|
fts5BufferZero(&pPage->buf);
|
|
fts5BufferZero(&pPage->pgidx);
|
|
fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
|
|
pPage->iPrevPgidx = 0;
|
|
pPage->pgno++;
|
|
|
|
/* Increase the leaves written counter */
|
|
pWriter->nLeafWritten++;
|
|
|
|
/* The new leaf holds no terms or rowids */
|
|
pWriter->bFirstTermInPage = 1;
|
|
pWriter->bFirstRowidInPage = 1;
|
|
}
|
|
|
|
/*
|
|
** Append term pTerm/nTerm to the segment being written by the writer passed
|
|
** as the second argument.
|
|
**
|
|
** If an error occurs, set the Fts5Index.rc error code. If an error has
|
|
** already occurred, this function is a no-op.
|
|
*/
|
|
static void fts5WriteAppendTerm(
|
|
Fts5Index *p,
|
|
Fts5SegWriter *pWriter,
|
|
int nTerm, const u8 *pTerm
|
|
){
|
|
int nPrefix; /* Bytes of prefix compression for term */
|
|
Fts5PageWriter *pPage = &pWriter->writer;
|
|
Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
|
|
int nMin = MIN(pPage->term.n, nTerm);
|
|
|
|
assert( p->rc==SQLITE_OK );
|
|
assert( pPage->buf.n>=4 );
|
|
assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
|
|
|
|
/* If the current leaf page is full, flush it to disk. */
|
|
if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
|
|
if( pPage->buf.n>4 ){
|
|
fts5WriteFlushLeaf(p, pWriter);
|
|
if( p->rc!=SQLITE_OK ) return;
|
|
}
|
|
fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
|
|
}
|
|
|
|
/* TODO1: Updating pgidx here. */
|
|
pPgidx->n += sqlite3Fts5PutVarint(
|
|
&pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
|
|
);
|
|
pPage->iPrevPgidx = pPage->buf.n;
|
|
#if 0
|
|
fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
|
|
pPgidx->n += 2;
|
|
#endif
|
|
|
|
if( pWriter->bFirstTermInPage ){
|
|
nPrefix = 0;
|
|
if( pPage->pgno!=1 ){
|
|
/* This is the first term on a leaf that is not the leftmost leaf in
|
|
** the segment b-tree. In this case it is necessary to add a term to
|
|
** the b-tree hierarchy that is (a) larger than the largest term
|
|
** already written to the segment and (b) smaller than or equal to
|
|
** this term. In other words, a prefix of (pTerm/nTerm) that is one
|
|
** byte longer than the longest prefix (pTerm/nTerm) shares with the
|
|
** previous term.
|
|
**
|
|
** Usually, the previous term is available in pPage->term. The exception
|
|
** is if this is the first term written in an incremental-merge step.
|
|
** In this case the previous term is not available, so just write a
|
|
** copy of (pTerm/nTerm) into the parent node. This is slightly
|
|
** inefficient, but still correct. */
|
|
int n = nTerm;
|
|
if( pPage->term.n ){
|
|
n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm);
|
|
}
|
|
fts5WriteBtreeTerm(p, pWriter, n, pTerm);
|
|
if( p->rc!=SQLITE_OK ) return;
|
|
pPage = &pWriter->writer;
|
|
}
|
|
}else{
|
|
nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm);
|
|
fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
|
|
}
|
|
|
|
/* Append the number of bytes of new data, then the term data itself
|
|
** to the page. */
|
|
fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
|
|
fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
|
|
|
|
/* Update the Fts5PageWriter.term field. */
|
|
fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
|
|
pWriter->bFirstTermInPage = 0;
|
|
|
|
pWriter->bFirstRowidInPage = 0;
|
|
pWriter->bFirstRowidInDoclist = 1;
|
|
|
|
assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
|
|
pWriter->aDlidx[0].pgno = pPage->pgno;
|
|
}
|
|
|
|
/*
|
|
** Append a rowid and position-list size field to the writers output.
|
|
*/
|
|
static void fts5WriteAppendRowid(
|
|
Fts5Index *p,
|
|
Fts5SegWriter *pWriter,
|
|
i64 iRowid
|
|
){
|
|
if( p->rc==SQLITE_OK ){
|
|
Fts5PageWriter *pPage = &pWriter->writer;
|
|
|
|
if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
|
|
fts5WriteFlushLeaf(p, pWriter);
|
|
}
|
|
|
|
/* If this is to be the first rowid written to the page, set the
|
|
** rowid-pointer in the page-header. Also append a value to the dlidx
|
|
** buffer, in case a doclist-index is required. */
|
|
if( pWriter->bFirstRowidInPage ){
|
|
fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
|
|
fts5WriteDlidxAppend(p, pWriter, iRowid);
|
|
}
|
|
|
|
/* Write the rowid. */
|
|
if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
|
|
fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
|
|
}else{
|
|
assert_nc( p->rc || iRowid>pWriter->iPrevRowid );
|
|
fts5BufferAppendVarint(&p->rc, &pPage->buf,
|
|
(u64)iRowid - (u64)pWriter->iPrevRowid
|
|
);
|
|
}
|
|
pWriter->iPrevRowid = iRowid;
|
|
pWriter->bFirstRowidInDoclist = 0;
|
|
pWriter->bFirstRowidInPage = 0;
|
|
}
|
|
}
|
|
|
|
static void fts5WriteAppendPoslistData(
|
|
Fts5Index *p,
|
|
Fts5SegWriter *pWriter,
|
|
const u8 *aData,
|
|
int nData
|
|
){
|
|
Fts5PageWriter *pPage = &pWriter->writer;
|
|
const u8 *a = aData;
|
|
int n = nData;
|
|
|
|
assert( p->pConfig->pgsz>0 || p->rc!=SQLITE_OK );
|
|
while( p->rc==SQLITE_OK
|
|
&& (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
|
|
){
|
|
int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
|
|
int nCopy = 0;
|
|
while( nCopy<nReq ){
|
|
i64 dummy;
|
|
nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
|
|
}
|
|
fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
|
|
a += nCopy;
|
|
n -= nCopy;
|
|
fts5WriteFlushLeaf(p, pWriter);
|
|
}
|
|
if( n>0 ){
|
|
fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Flush any data cached by the writer object to the database. Free any
|
|
** allocations associated with the writer.
|
|
*/
|
|
static void fts5WriteFinish(
|
|
Fts5Index *p,
|
|
Fts5SegWriter *pWriter, /* Writer object */
|
|
int *pnLeaf /* OUT: Number of leaf pages in b-tree */
|
|
){
|
|
int i;
|
|
Fts5PageWriter *pLeaf = &pWriter->writer;
|
|
if( p->rc==SQLITE_OK ){
|
|
assert( pLeaf->pgno>=1 );
|
|
if( pLeaf->buf.n>4 ){
|
|
fts5WriteFlushLeaf(p, pWriter);
|
|
}
|
|
*pnLeaf = pLeaf->pgno-1;
|
|
if( pLeaf->pgno>1 ){
|
|
fts5WriteFlushBtree(p, pWriter);
|
|
}
|
|
}
|
|
fts5BufferFree(&pLeaf->term);
|
|
fts5BufferFree(&pLeaf->buf);
|
|
fts5BufferFree(&pLeaf->pgidx);
|
|
fts5BufferFree(&pWriter->btterm);
|
|
|
|
for(i=0; i<pWriter->nDlidx; i++){
|
|
sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
|
|
}
|
|
sqlite3_free(pWriter->aDlidx);
|
|
}
|
|
|
|
static void fts5WriteInit(
|
|
Fts5Index *p,
|
|
Fts5SegWriter *pWriter,
|
|
int iSegid
|
|
){
|
|
const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
|
|
|
|
memset(pWriter, 0, sizeof(Fts5SegWriter));
|
|
pWriter->iSegid = iSegid;
|
|
|
|
fts5WriteDlidxGrow(p, pWriter, 1);
|
|
pWriter->writer.pgno = 1;
|
|
pWriter->bFirstTermInPage = 1;
|
|
pWriter->iBtPage = 1;
|
|
|
|
assert( pWriter->writer.buf.n==0 );
|
|
assert( pWriter->writer.pgidx.n==0 );
|
|
|
|
/* Grow the two buffers to pgsz + padding bytes in size. */
|
|
sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
|
|
sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
|
|
|
|
if( p->pIdxWriter==0 ){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
|
|
"INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
|
|
pConfig->zDb, pConfig->zName
|
|
));
|
|
}
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
/* Initialize the 4-byte leaf-page header to 0x00. */
|
|
memset(pWriter->writer.buf.p, 0, 4);
|
|
pWriter->writer.buf.n = 4;
|
|
|
|
/* Bind the current output segment id to the index-writer. This is an
|
|
** optimization over binding the same value over and over as rows are
|
|
** inserted into %_idx by the current writer. */
|
|
sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Iterator pIter was used to iterate through the input segments of on an
|
|
** incremental merge operation. This function is called if the incremental
|
|
** merge step has finished but the input has not been completely exhausted.
|
|
*/
|
|
static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
|
|
int i;
|
|
Fts5Buffer buf;
|
|
memset(&buf, 0, sizeof(Fts5Buffer));
|
|
for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK; i++){
|
|
Fts5SegIter *pSeg = &pIter->aSeg[i];
|
|
if( pSeg->pSeg==0 ){
|
|
/* no-op */
|
|
}else if( pSeg->pLeaf==0 ){
|
|
/* All keys from this input segment have been transfered to the output.
|
|
** Set both the first and last page-numbers to 0 to indicate that the
|
|
** segment is now empty. */
|
|
pSeg->pSeg->pgnoLast = 0;
|
|
pSeg->pSeg->pgnoFirst = 0;
|
|
}else{
|
|
int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */
|
|
i64 iLeafRowid;
|
|
Fts5Data *pData;
|
|
int iId = pSeg->pSeg->iSegid;
|
|
u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
|
|
|
|
iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
|
|
pData = fts5LeafRead(p, iLeafRowid);
|
|
if( pData ){
|
|
if( iOff>pData->szLeaf ){
|
|
/* This can occur if the pages that the segments occupy overlap - if
|
|
** a single page has been assigned to more than one segment. In
|
|
** this case a prior iteration of this loop may have corrupted the
|
|
** segment currently being trimmed. */
|
|
p->rc = FTS5_CORRUPT;
|
|
}else{
|
|
fts5BufferZero(&buf);
|
|
fts5BufferGrow(&p->rc, &buf, pData->nn);
|
|
fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
|
|
fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
|
|
fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
|
|
fts5BufferAppendBlob(&p->rc, &buf,pData->szLeaf-iOff,&pData->p[iOff]);
|
|
if( p->rc==SQLITE_OK ){
|
|
/* Set the szLeaf field */
|
|
fts5PutU16(&buf.p[2], (u16)buf.n);
|
|
}
|
|
|
|
/* Set up the new page-index array */
|
|
fts5BufferAppendVarint(&p->rc, &buf, 4);
|
|
if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
|
|
&& pSeg->iEndofDoclist<pData->szLeaf
|
|
&& pSeg->iPgidxOff<=pData->nn
|
|
){
|
|
int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
|
|
fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
|
|
fts5BufferAppendBlob(&p->rc, &buf,
|
|
pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
|
|
);
|
|
}
|
|
|
|
pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
|
|
fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
|
|
fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
|
|
}
|
|
fts5DataRelease(pData);
|
|
}
|
|
}
|
|
}
|
|
fts5BufferFree(&buf);
|
|
}
|
|
|
|
static void fts5MergeChunkCallback(
|
|
Fts5Index *p,
|
|
void *pCtx,
|
|
const u8 *pChunk, int nChunk
|
|
){
|
|
Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
|
|
fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
|
|
}
|
|
|
|
/*
|
|
**
|
|
*/
|
|
static void fts5IndexMergeLevel(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */
|
|
int iLvl, /* Level to read input from */
|
|
int *pnRem /* Write up to this many output leaves */
|
|
){
|
|
Fts5Structure *pStruct = *ppStruct;
|
|
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
|
|
Fts5StructureLevel *pLvlOut;
|
|
Fts5Iter *pIter = 0; /* Iterator to read input data */
|
|
int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */
|
|
int nInput; /* Number of input segments */
|
|
Fts5SegWriter writer; /* Writer object */
|
|
Fts5StructureSegment *pSeg; /* Output segment */
|
|
Fts5Buffer term;
|
|
int bOldest; /* True if the output segment is the oldest */
|
|
int eDetail = p->pConfig->eDetail;
|
|
const int flags = FTS5INDEX_QUERY_NOOUTPUT;
|
|
int bTermWritten = 0; /* True if current term already output */
|
|
|
|
assert( iLvl<pStruct->nLevel );
|
|
assert( pLvl->nMerge<=pLvl->nSeg );
|
|
|
|
memset(&writer, 0, sizeof(Fts5SegWriter));
|
|
memset(&term, 0, sizeof(Fts5Buffer));
|
|
if( pLvl->nMerge ){
|
|
pLvlOut = &pStruct->aLevel[iLvl+1];
|
|
assert( pLvlOut->nSeg>0 );
|
|
nInput = pLvl->nMerge;
|
|
pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
|
|
|
|
fts5WriteInit(p, &writer, pSeg->iSegid);
|
|
writer.writer.pgno = pSeg->pgnoLast+1;
|
|
writer.iBtPage = 0;
|
|
}else{
|
|
int iSegid = fts5AllocateSegid(p, pStruct);
|
|
|
|
/* Extend the Fts5Structure object as required to ensure the output
|
|
** segment exists. */
|
|
if( iLvl==pStruct->nLevel-1 ){
|
|
fts5StructureAddLevel(&p->rc, ppStruct);
|
|
pStruct = *ppStruct;
|
|
}
|
|
fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
|
|
if( p->rc ) return;
|
|
pLvl = &pStruct->aLevel[iLvl];
|
|
pLvlOut = &pStruct->aLevel[iLvl+1];
|
|
|
|
fts5WriteInit(p, &writer, iSegid);
|
|
|
|
/* Add the new segment to the output level */
|
|
pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
|
|
pLvlOut->nSeg++;
|
|
pSeg->pgnoFirst = 1;
|
|
pSeg->iSegid = iSegid;
|
|
pStruct->nSegment++;
|
|
|
|
/* Read input from all segments in the input level */
|
|
nInput = pLvl->nSeg;
|
|
|
|
/* Set the range of origins that will go into the output segment. */
|
|
if( pStruct->nOriginCntr>0 ){
|
|
pSeg->iOrigin1 = pLvl->aSeg[0].iOrigin1;
|
|
pSeg->iOrigin2 = pLvl->aSeg[pLvl->nSeg-1].iOrigin2;
|
|
}
|
|
}
|
|
bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
|
|
|
|
assert( iLvl>=0 );
|
|
for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
|
|
fts5MultiIterEof(p, pIter)==0;
|
|
fts5MultiIterNext(p, pIter, 0, 0)
|
|
){
|
|
Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
|
|
int nPos; /* position-list size field value */
|
|
int nTerm;
|
|
const u8 *pTerm;
|
|
|
|
pTerm = fts5MultiIterTerm(pIter, &nTerm);
|
|
if( nTerm!=term.n || fts5Memcmp(pTerm, term.p, nTerm) ){
|
|
if( pnRem && writer.nLeafWritten>nRem ){
|
|
break;
|
|
}
|
|
fts5BufferSet(&p->rc, &term, nTerm, pTerm);
|
|
bTermWritten =0;
|
|
}
|
|
|
|
/* Check for key annihilation. */
|
|
if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
|
|
|
|
if( p->rc==SQLITE_OK && bTermWritten==0 ){
|
|
/* This is a new term. Append a term to the output segment. */
|
|
fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
|
|
bTermWritten = 1;
|
|
}
|
|
|
|
/* Append the rowid to the output */
|
|
/* WRITEPOSLISTSIZE */
|
|
fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
|
|
|
|
if( eDetail==FTS5_DETAIL_NONE ){
|
|
if( pSegIter->bDel ){
|
|
fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
|
|
if( pSegIter->nPos>0 ){
|
|
fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
|
|
}
|
|
}
|
|
}else{
|
|
/* Append the position-list data to the output */
|
|
nPos = pSegIter->nPos*2 + pSegIter->bDel;
|
|
fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
|
|
fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
|
|
}
|
|
}
|
|
|
|
/* Flush the last leaf page to disk. Set the output segment b-tree height
|
|
** and last leaf page number at the same time. */
|
|
fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
|
|
|
|
assert( pIter!=0 || p->rc!=SQLITE_OK );
|
|
if( fts5MultiIterEof(p, pIter) ){
|
|
int i;
|
|
|
|
/* Remove the redundant segments from the %_data table */
|
|
assert( pSeg->nEntry==0 );
|
|
for(i=0; i<nInput; i++){
|
|
Fts5StructureSegment *pOld = &pLvl->aSeg[i];
|
|
pSeg->nEntry += (pOld->nEntry - pOld->nEntryTombstone);
|
|
fts5DataRemoveSegment(p, pOld);
|
|
}
|
|
|
|
/* Remove the redundant segments from the input level */
|
|
if( pLvl->nSeg!=nInput ){
|
|
int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
|
|
memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
|
|
}
|
|
pStruct->nSegment -= nInput;
|
|
pLvl->nSeg -= nInput;
|
|
pLvl->nMerge = 0;
|
|
if( pSeg->pgnoLast==0 ){
|
|
pLvlOut->nSeg--;
|
|
pStruct->nSegment--;
|
|
}
|
|
}else{
|
|
assert( pSeg->pgnoLast>0 );
|
|
fts5TrimSegments(p, pIter);
|
|
pLvl->nMerge = nInput;
|
|
}
|
|
|
|
fts5MultiIterFree(pIter);
|
|
fts5BufferFree(&term);
|
|
if( pnRem ) *pnRem -= writer.nLeafWritten;
|
|
}
|
|
|
|
/*
|
|
** If this is not a contentless_delete=1 table, or if the 'deletemerge'
|
|
** configuration option is set to 0, then this function always returns -1.
|
|
** Otherwise, it searches the structure object passed as the second argument
|
|
** for a level suitable for merging due to having a large number of
|
|
** tombstones in the tombstone hash. If one is found, its index is returned.
|
|
** Otherwise, if there is no suitable level, -1.
|
|
*/
|
|
static int fts5IndexFindDeleteMerge(Fts5Index *p, Fts5Structure *pStruct){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
int iRet = -1;
|
|
if( pConfig->bContentlessDelete && pConfig->nDeleteMerge>0 ){
|
|
int ii;
|
|
int nBest = 0;
|
|
|
|
for(ii=0; ii<pStruct->nLevel; ii++){
|
|
Fts5StructureLevel *pLvl = &pStruct->aLevel[ii];
|
|
i64 nEntry = 0;
|
|
i64 nTomb = 0;
|
|
int iSeg;
|
|
for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
|
|
nEntry += pLvl->aSeg[iSeg].nEntry;
|
|
nTomb += pLvl->aSeg[iSeg].nEntryTombstone;
|
|
}
|
|
assert_nc( nEntry>0 || pLvl->nSeg==0 );
|
|
if( nEntry>0 ){
|
|
int nPercent = (nTomb * 100) / nEntry;
|
|
if( nPercent>=pConfig->nDeleteMerge && nPercent>nBest ){
|
|
iRet = ii;
|
|
nBest = nPercent;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return iRet;
|
|
}
|
|
|
|
/*
|
|
** Do up to nPg pages of automerge work on the index.
|
|
**
|
|
** Return true if any changes were actually made, or false otherwise.
|
|
*/
|
|
static int fts5IndexMerge(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
|
|
int nPg, /* Pages of work to do */
|
|
int nMin /* Minimum number of segments to merge */
|
|
){
|
|
int nRem = nPg;
|
|
int bRet = 0;
|
|
Fts5Structure *pStruct = *ppStruct;
|
|
while( nRem>0 && p->rc==SQLITE_OK ){
|
|
int iLvl; /* To iterate through levels */
|
|
int iBestLvl = 0; /* Level offering the most input segments */
|
|
int nBest = 0; /* Number of input segments on best level */
|
|
|
|
/* Set iBestLvl to the level to read input segments from. Or to -1 if
|
|
** there is no level suitable to merge segments from. */
|
|
assert( pStruct->nLevel>0 );
|
|
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
|
|
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
|
|
if( pLvl->nMerge ){
|
|
if( pLvl->nMerge>nBest ){
|
|
iBestLvl = iLvl;
|
|
nBest = nMin;
|
|
}
|
|
break;
|
|
}
|
|
if( pLvl->nSeg>nBest ){
|
|
nBest = pLvl->nSeg;
|
|
iBestLvl = iLvl;
|
|
}
|
|
}
|
|
if( nBest<nMin ){
|
|
iBestLvl = fts5IndexFindDeleteMerge(p, pStruct);
|
|
}
|
|
|
|
if( iBestLvl<0 ) break;
|
|
bRet = 1;
|
|
fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
|
|
if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
|
|
fts5StructurePromote(p, iBestLvl+1, pStruct);
|
|
}
|
|
|
|
if( nMin==1 ) nMin = 2;
|
|
}
|
|
*ppStruct = pStruct;
|
|
return bRet;
|
|
}
|
|
|
|
/*
|
|
** A total of nLeaf leaf pages of data has just been flushed to a level-0
|
|
** segment. This function updates the write-counter accordingly and, if
|
|
** necessary, performs incremental merge work.
|
|
**
|
|
** If an error occurs, set the Fts5Index.rc error code. If an error has
|
|
** already occurred, this function is a no-op.
|
|
*/
|
|
static void fts5IndexAutomerge(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
|
|
int nLeaf /* Number of output leaves just written */
|
|
){
|
|
if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 && ALWAYS((*ppStruct)!=0) ){
|
|
Fts5Structure *pStruct = *ppStruct;
|
|
u64 nWrite; /* Initial value of write-counter */
|
|
int nWork; /* Number of work-quanta to perform */
|
|
int nRem; /* Number of leaf pages left to write */
|
|
|
|
/* Update the write-counter. While doing so, set nWork. */
|
|
nWrite = pStruct->nWriteCounter;
|
|
nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
|
|
pStruct->nWriteCounter += nLeaf;
|
|
nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
|
|
|
|
fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge);
|
|
}
|
|
}
|
|
|
|
static void fts5IndexCrisismerge(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5Structure **ppStruct /* IN/OUT: Current structure of index */
|
|
){
|
|
const int nCrisis = p->pConfig->nCrisisMerge;
|
|
Fts5Structure *pStruct = *ppStruct;
|
|
if( pStruct && pStruct->nLevel>0 ){
|
|
int iLvl = 0;
|
|
while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
|
|
fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
|
|
assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
|
|
fts5StructurePromote(p, iLvl+1, pStruct);
|
|
iLvl++;
|
|
}
|
|
*ppStruct = pStruct;
|
|
}
|
|
}
|
|
|
|
static int fts5IndexReturn(Fts5Index *p){
|
|
int rc = p->rc;
|
|
p->rc = SQLITE_OK;
|
|
return rc;
|
|
}
|
|
|
|
typedef struct Fts5FlushCtx Fts5FlushCtx;
|
|
struct Fts5FlushCtx {
|
|
Fts5Index *pIdx;
|
|
Fts5SegWriter writer;
|
|
};
|
|
|
|
/*
|
|
** Buffer aBuf[] contains a list of varints, all small enough to fit
|
|
** in a 32-bit integer. Return the size of the largest prefix of this
|
|
** list nMax bytes or less in size.
|
|
*/
|
|
static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
|
|
int ret;
|
|
u32 dummy;
|
|
ret = fts5GetVarint32(aBuf, dummy);
|
|
if( ret<nMax ){
|
|
while( 1 ){
|
|
int i = fts5GetVarint32(&aBuf[ret], dummy);
|
|
if( (ret + i) > nMax ) break;
|
|
ret += i;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
** Execute the SQL statement:
|
|
**
|
|
** DELETE FROM %_idx WHERE (segid, (pgno/2)) = ($iSegid, $iPgno);
|
|
**
|
|
** This is used when a secure-delete operation removes the last term
|
|
** from a segment leaf page. In that case the %_idx entry is removed
|
|
** too. This is done to ensure that if all instances of a token are
|
|
** removed from an fts5 database in secure-delete mode, no trace of
|
|
** the token itself remains in the database.
|
|
*/
|
|
static void fts5SecureDeleteIdxEntry(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
int iSegid, /* Id of segment to delete entry for */
|
|
int iPgno /* Page number within segment */
|
|
){
|
|
if( iPgno!=1 ){
|
|
assert( p->pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE );
|
|
if( p->pDeleteFromIdx==0 ){
|
|
fts5IndexPrepareStmt(p, &p->pDeleteFromIdx, sqlite3_mprintf(
|
|
"DELETE FROM '%q'.'%q_idx' WHERE (segid, (pgno/2)) = (?1, ?2)",
|
|
p->pConfig->zDb, p->pConfig->zName
|
|
));
|
|
}
|
|
if( p->rc==SQLITE_OK ){
|
|
sqlite3_bind_int(p->pDeleteFromIdx, 1, iSegid);
|
|
sqlite3_bind_int(p->pDeleteFromIdx, 2, iPgno);
|
|
sqlite3_step(p->pDeleteFromIdx);
|
|
p->rc = sqlite3_reset(p->pDeleteFromIdx);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** This is called when a secure-delete operation removes a position-list
|
|
** that overflows onto segment page iPgno of segment pSeg. This function
|
|
** rewrites node iPgno, and possibly one or more of its right-hand peers,
|
|
** to remove this portion of the position list.
|
|
**
|
|
** Output variable (*pbLastInDoclist) is set to true if the position-list
|
|
** removed is followed by a new term or the end-of-segment, or false if
|
|
** it is followed by another rowid/position list.
|
|
*/
|
|
static void fts5SecureDeleteOverflow(
|
|
Fts5Index *p,
|
|
Fts5StructureSegment *pSeg,
|
|
int iPgno,
|
|
int *pbLastInDoclist
|
|
){
|
|
const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE);
|
|
int pgno;
|
|
Fts5Data *pLeaf = 0;
|
|
assert( iPgno!=1 );
|
|
|
|
*pbLastInDoclist = 1;
|
|
for(pgno=iPgno; p->rc==SQLITE_OK && pgno<=pSeg->pgnoLast; pgno++){
|
|
i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
|
|
int iNext = 0;
|
|
u8 *aPg = 0;
|
|
|
|
pLeaf = fts5DataRead(p, iRowid);
|
|
if( pLeaf==0 ) break;
|
|
aPg = pLeaf->p;
|
|
|
|
iNext = fts5GetU16(&aPg[0]);
|
|
if( iNext!=0 ){
|
|
*pbLastInDoclist = 0;
|
|
}
|
|
if( iNext==0 && pLeaf->szLeaf!=pLeaf->nn ){
|
|
fts5GetVarint32(&aPg[pLeaf->szLeaf], iNext);
|
|
}
|
|
|
|
if( iNext==0 ){
|
|
/* The page contains no terms or rowids. Replace it with an empty
|
|
** page and move on to the right-hand peer. */
|
|
const u8 aEmpty[] = {0x00, 0x00, 0x00, 0x04};
|
|
assert_nc( bDetailNone==0 || pLeaf->nn==4 );
|
|
if( bDetailNone==0 ) fts5DataWrite(p, iRowid, aEmpty, sizeof(aEmpty));
|
|
fts5DataRelease(pLeaf);
|
|
pLeaf = 0;
|
|
}else if( bDetailNone ){
|
|
break;
|
|
}else if( iNext>=pLeaf->szLeaf || pLeaf->nn<pLeaf->szLeaf || iNext<4 ){
|
|
p->rc = FTS5_CORRUPT;
|
|
break;
|
|
}else{
|
|
int nShift = iNext - 4;
|
|
int nPg;
|
|
|
|
int nIdx = 0;
|
|
u8 *aIdx = 0;
|
|
|
|
/* Unless the current page footer is 0 bytes in size (in which case
|
|
** the new page footer will be as well), allocate and populate a
|
|
** buffer containing the new page footer. Set stack variables aIdx
|
|
** and nIdx accordingly. */
|
|
if( pLeaf->nn>pLeaf->szLeaf ){
|
|
int iFirst = 0;
|
|
int i1 = pLeaf->szLeaf;
|
|
int i2 = 0;
|
|
|
|
i1 += fts5GetVarint32(&aPg[i1], iFirst);
|
|
if( iFirst<iNext ){
|
|
p->rc = FTS5_CORRUPT;
|
|
break;
|
|
}
|
|
aIdx = sqlite3Fts5MallocZero(&p->rc, (pLeaf->nn-pLeaf->szLeaf)+2);
|
|
if( aIdx==0 ) break;
|
|
i2 = sqlite3Fts5PutVarint(aIdx, iFirst-nShift);
|
|
if( i1<pLeaf->nn ){
|
|
memcpy(&aIdx[i2], &aPg[i1], pLeaf->nn-i1);
|
|
i2 += (pLeaf->nn-i1);
|
|
}
|
|
nIdx = i2;
|
|
}
|
|
|
|
/* Modify the contents of buffer aPg[]. Set nPg to the new size
|
|
** in bytes. The new page is always smaller than the old. */
|
|
nPg = pLeaf->szLeaf - nShift;
|
|
memmove(&aPg[4], &aPg[4+nShift], nPg-4);
|
|
fts5PutU16(&aPg[2], nPg);
|
|
if( fts5GetU16(&aPg[0]) ) fts5PutU16(&aPg[0], 4);
|
|
if( nIdx>0 ){
|
|
memcpy(&aPg[nPg], aIdx, nIdx);
|
|
nPg += nIdx;
|
|
}
|
|
sqlite3_free(aIdx);
|
|
|
|
/* Write the new page to disk and exit the loop */
|
|
assert( nPg>4 || fts5GetU16(aPg)==0 );
|
|
fts5DataWrite(p, iRowid, aPg, nPg);
|
|
break;
|
|
}
|
|
}
|
|
fts5DataRelease(pLeaf);
|
|
}
|
|
|
|
/*
|
|
** Completely remove the entry that pSeg currently points to from
|
|
** the database.
|
|
*/
|
|
static void fts5DoSecureDelete(
|
|
Fts5Index *p,
|
|
Fts5SegIter *pSeg
|
|
){
|
|
const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE);
|
|
int iSegid = pSeg->pSeg->iSegid;
|
|
u8 *aPg = pSeg->pLeaf->p;
|
|
int nPg = pSeg->pLeaf->nn;
|
|
int iPgIdx = pSeg->pLeaf->szLeaf;
|
|
|
|
u64 iDelta = 0;
|
|
int iNextOff = 0;
|
|
int iOff = 0;
|
|
int nIdx = 0;
|
|
u8 *aIdx = 0;
|
|
int bLastInDoclist = 0;
|
|
int iIdx = 0;
|
|
int iStart = 0;
|
|
int iDelKeyOff = 0; /* Offset of deleted key, if any */
|
|
|
|
nIdx = nPg-iPgIdx;
|
|
aIdx = sqlite3Fts5MallocZero(&p->rc, nIdx+16);
|
|
if( p->rc ) return;
|
|
memcpy(aIdx, &aPg[iPgIdx], nIdx);
|
|
|
|
/* At this point segment iterator pSeg points to the entry
|
|
** this function should remove from the b-tree segment.
|
|
**
|
|
** In detail=full or detail=column mode, pSeg->iLeafOffset is the
|
|
** offset of the first byte in the position-list for the entry to
|
|
** remove. Immediately before this comes two varints that will also
|
|
** need to be removed:
|
|
**
|
|
** + the rowid or delta rowid value for the entry, and
|
|
** + the size of the position list in bytes.
|
|
**
|
|
** Or, in detail=none mode, there is a single varint prior to
|
|
** pSeg->iLeafOffset - the rowid or delta rowid value.
|
|
**
|
|
** This block sets the following variables:
|
|
**
|
|
** iStart:
|
|
** The offset of the first byte of the rowid or delta-rowid
|
|
** value for the doclist entry being removed.
|
|
**
|
|
** iDelta:
|
|
** The value of the rowid or delta-rowid value for the doclist
|
|
** entry being removed.
|
|
**
|
|
** iNextOff:
|
|
** The offset of the next entry following the position list
|
|
** for the one being removed. If the position list for this
|
|
** entry overflows onto the next leaf page, this value will be
|
|
** greater than pLeaf->szLeaf.
|
|
*/
|
|
{
|
|
int iSOP; /* Start-Of-Position-list */
|
|
if( pSeg->iLeafPgno==pSeg->iTermLeafPgno ){
|
|
iStart = pSeg->iTermLeafOffset;
|
|
}else{
|
|
iStart = fts5GetU16(&aPg[0]);
|
|
}
|
|
|
|
iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
|
|
assert_nc( iSOP<=pSeg->iLeafOffset );
|
|
|
|
if( bDetailNone ){
|
|
while( iSOP<pSeg->iLeafOffset ){
|
|
if( aPg[iSOP]==0x00 ) iSOP++;
|
|
if( aPg[iSOP]==0x00 ) iSOP++;
|
|
iStart = iSOP;
|
|
iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
|
|
}
|
|
|
|
iNextOff = iSOP;
|
|
if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++;
|
|
if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++;
|
|
|
|
}else{
|
|
int nPos = 0;
|
|
iSOP += fts5GetVarint32(&aPg[iSOP], nPos);
|
|
while( iSOP<pSeg->iLeafOffset ){
|
|
iStart = iSOP + (nPos/2);
|
|
iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
|
|
iSOP += fts5GetVarint32(&aPg[iSOP], nPos);
|
|
}
|
|
assert_nc( iSOP==pSeg->iLeafOffset );
|
|
iNextOff = pSeg->iLeafOffset + pSeg->nPos;
|
|
}
|
|
}
|
|
|
|
iOff = iStart;
|
|
|
|
/* If the position-list for the entry being removed flows over past
|
|
** the end of this page, delete the portion of the position-list on the
|
|
** next page and beyond.
|
|
**
|
|
** Set variable bLastInDoclist to true if this entry happens
|
|
** to be the last rowid in the doclist for its term. */
|
|
if( iNextOff>=iPgIdx ){
|
|
int pgno = pSeg->iLeafPgno+1;
|
|
fts5SecureDeleteOverflow(p, pSeg->pSeg, pgno, &bLastInDoclist);
|
|
iNextOff = iPgIdx;
|
|
}
|
|
|
|
if( pSeg->bDel==0 ){
|
|
if( iNextOff!=iPgIdx ){
|
|
/* Loop through the page-footer. If iNextOff (offset of the
|
|
** entry following the one we are removing) is equal to the
|
|
** offset of a key on this page, then the entry is the last
|
|
** in its doclist. */
|
|
int iKeyOff = 0;
|
|
for(iIdx=0; iIdx<nIdx; /* no-op */){
|
|
u32 iVal = 0;
|
|
iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
|
|
iKeyOff += iVal;
|
|
if( iKeyOff==iNextOff ){
|
|
bLastInDoclist = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* If this is (a) the first rowid on a page and (b) is not followed by
|
|
** another position list on the same page, set the "first-rowid" field
|
|
** of the header to 0. */
|
|
if( fts5GetU16(&aPg[0])==iStart && (bLastInDoclist || iNextOff==iPgIdx) ){
|
|
fts5PutU16(&aPg[0], 0);
|
|
}
|
|
}
|
|
|
|
if( pSeg->bDel ){
|
|
iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta);
|
|
aPg[iOff++] = 0x01;
|
|
}else if( bLastInDoclist==0 ){
|
|
if( iNextOff!=iPgIdx ){
|
|
u64 iNextDelta = 0;
|
|
iNextOff += fts5GetVarint(&aPg[iNextOff], &iNextDelta);
|
|
iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta + iNextDelta);
|
|
}
|
|
}else if(
|
|
pSeg->iLeafPgno==pSeg->iTermLeafPgno
|
|
&& iStart==pSeg->iTermLeafOffset
|
|
){
|
|
/* The entry being removed was the only position list in its
|
|
** doclist. Therefore the term needs to be removed as well. */
|
|
int iKey = 0;
|
|
int iKeyOff = 0;
|
|
|
|
/* Set iKeyOff to the offset of the term that will be removed - the
|
|
** last offset in the footer that is not greater than iStart. */
|
|
for(iIdx=0; iIdx<nIdx; iKey++){
|
|
u32 iVal = 0;
|
|
iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
|
|
if( (iKeyOff+iVal)>(u32)iStart ) break;
|
|
iKeyOff += iVal;
|
|
}
|
|
assert_nc( iKey>=1 );
|
|
|
|
/* Set iDelKeyOff to the value of the footer entry to remove from
|
|
** the page. */
|
|
iDelKeyOff = iOff = iKeyOff;
|
|
|
|
if( iNextOff!=iPgIdx ){
|
|
/* This is the only position-list associated with the term, and there
|
|
** is another term following it on this page. So the subsequent term
|
|
** needs to be moved to replace the term associated with the entry
|
|
** being removed. */
|
|
int nPrefix = 0;
|
|
int nSuffix = 0;
|
|
int nPrefix2 = 0;
|
|
int nSuffix2 = 0;
|
|
|
|
iDelKeyOff = iNextOff;
|
|
iNextOff += fts5GetVarint32(&aPg[iNextOff], nPrefix2);
|
|
iNextOff += fts5GetVarint32(&aPg[iNextOff], nSuffix2);
|
|
|
|
if( iKey!=1 ){
|
|
iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nPrefix);
|
|
}
|
|
iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nSuffix);
|
|
|
|
nPrefix = MIN(nPrefix, nPrefix2);
|
|
nSuffix = (nPrefix2 + nSuffix2) - nPrefix;
|
|
|
|
if( (iKeyOff+nSuffix)>iPgIdx || (iNextOff+nSuffix2)>iPgIdx ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else{
|
|
if( iKey!=1 ){
|
|
iOff += sqlite3Fts5PutVarint(&aPg[iOff], nPrefix);
|
|
}
|
|
iOff += sqlite3Fts5PutVarint(&aPg[iOff], nSuffix);
|
|
if( nPrefix2>pSeg->term.n ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else if( nPrefix2>nPrefix ){
|
|
memcpy(&aPg[iOff], &pSeg->term.p[nPrefix], nPrefix2-nPrefix);
|
|
iOff += (nPrefix2-nPrefix);
|
|
}
|
|
memmove(&aPg[iOff], &aPg[iNextOff], nSuffix2);
|
|
iOff += nSuffix2;
|
|
iNextOff += nSuffix2;
|
|
}
|
|
}
|
|
}else if( iStart==4 ){
|
|
int iPgno;
|
|
|
|
assert_nc( pSeg->iLeafPgno>pSeg->iTermLeafPgno );
|
|
/* The entry being removed may be the only position list in
|
|
** its doclist. */
|
|
for(iPgno=pSeg->iLeafPgno-1; iPgno>pSeg->iTermLeafPgno; iPgno-- ){
|
|
Fts5Data *pPg = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, iPgno));
|
|
int bEmpty = (pPg && pPg->nn==4);
|
|
fts5DataRelease(pPg);
|
|
if( bEmpty==0 ) break;
|
|
}
|
|
|
|
if( iPgno==pSeg->iTermLeafPgno ){
|
|
i64 iId = FTS5_SEGMENT_ROWID(iSegid, pSeg->iTermLeafPgno);
|
|
Fts5Data *pTerm = fts5DataRead(p, iId);
|
|
if( pTerm && pTerm->szLeaf==pSeg->iTermLeafOffset ){
|
|
u8 *aTermIdx = &pTerm->p[pTerm->szLeaf];
|
|
int nTermIdx = pTerm->nn - pTerm->szLeaf;
|
|
int iTermIdx = 0;
|
|
int iTermOff = 0;
|
|
|
|
while( 1 ){
|
|
u32 iVal = 0;
|
|
int nByte = fts5GetVarint32(&aTermIdx[iTermIdx], iVal);
|
|
iTermOff += iVal;
|
|
if( (iTermIdx+nByte)>=nTermIdx ) break;
|
|
iTermIdx += nByte;
|
|
}
|
|
nTermIdx = iTermIdx;
|
|
|
|
memmove(&pTerm->p[iTermOff], &pTerm->p[pTerm->szLeaf], nTermIdx);
|
|
fts5PutU16(&pTerm->p[2], iTermOff);
|
|
|
|
fts5DataWrite(p, iId, pTerm->p, iTermOff+nTermIdx);
|
|
if( nTermIdx==0 ){
|
|
fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iTermLeafPgno);
|
|
}
|
|
}
|
|
fts5DataRelease(pTerm);
|
|
}
|
|
}
|
|
|
|
/* Assuming no error has occurred, this block does final edits to the
|
|
** leaf page before writing it back to disk. Input variables are:
|
|
**
|
|
** nPg: Total initial size of leaf page.
|
|
** iPgIdx: Initial offset of page footer.
|
|
**
|
|
** iOff: Offset to move data to
|
|
** iNextOff: Offset to move data from
|
|
*/
|
|
if( p->rc==SQLITE_OK ){
|
|
const int nMove = nPg - iNextOff; /* Number of bytes to move */
|
|
int nShift = iNextOff - iOff; /* Distance to move them */
|
|
|
|
int iPrevKeyOut = 0;
|
|
int iKeyIn = 0;
|
|
|
|
memmove(&aPg[iOff], &aPg[iNextOff], nMove);
|
|
iPgIdx -= nShift;
|
|
nPg = iPgIdx;
|
|
fts5PutU16(&aPg[2], iPgIdx);
|
|
|
|
for(iIdx=0; iIdx<nIdx; /* no-op */){
|
|
u32 iVal = 0;
|
|
iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
|
|
iKeyIn += iVal;
|
|
if( iKeyIn!=iDelKeyOff ){
|
|
int iKeyOut = (iKeyIn - (iKeyIn>iOff ? nShift : 0));
|
|
nPg += sqlite3Fts5PutVarint(&aPg[nPg], iKeyOut - iPrevKeyOut);
|
|
iPrevKeyOut = iKeyOut;
|
|
}
|
|
}
|
|
|
|
if( iPgIdx==nPg && nIdx>0 && pSeg->iLeafPgno!=1 ){
|
|
fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iLeafPgno);
|
|
}
|
|
|
|
assert_nc( nPg>4 || fts5GetU16(aPg)==0 );
|
|
fts5DataWrite(p, FTS5_SEGMENT_ROWID(iSegid,pSeg->iLeafPgno), aPg, nPg);
|
|
}
|
|
sqlite3_free(aIdx);
|
|
}
|
|
|
|
/*
|
|
** This is called as part of flushing a delete to disk in 'secure-delete'
|
|
** mode. It edits the segments within the database described by argument
|
|
** pStruct to remove the entries for term zTerm, rowid iRowid.
|
|
*/
|
|
static void fts5FlushSecureDelete(
|
|
Fts5Index *p,
|
|
Fts5Structure *pStruct,
|
|
const char *zTerm,
|
|
int nTerm,
|
|
i64 iRowid
|
|
){
|
|
const int f = FTS5INDEX_QUERY_SKIPHASH;
|
|
Fts5Iter *pIter = 0; /* Used to find term instance */
|
|
|
|
fts5MultiIterNew(p, pStruct, f, 0, (const u8*)zTerm, nTerm, -1, 0, &pIter);
|
|
if( fts5MultiIterEof(p, pIter)==0 ){
|
|
i64 iThis = fts5MultiIterRowid(pIter);
|
|
if( iThis<iRowid ){
|
|
fts5MultiIterNextFrom(p, pIter, iRowid);
|
|
}
|
|
|
|
if( p->rc==SQLITE_OK
|
|
&& fts5MultiIterEof(p, pIter)==0
|
|
&& iRowid==fts5MultiIterRowid(pIter)
|
|
){
|
|
Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
|
|
fts5DoSecureDelete(p, pSeg);
|
|
}
|
|
}
|
|
|
|
fts5MultiIterFree(pIter);
|
|
}
|
|
|
|
|
|
/*
|
|
** Flush the contents of in-memory hash table iHash to a new level-0
|
|
** segment on disk. Also update the corresponding structure record.
|
|
**
|
|
** If an error occurs, set the Fts5Index.rc error code. If an error has
|
|
** already occurred, this function is a no-op.
|
|
*/
|
|
static void fts5FlushOneHash(Fts5Index *p){
|
|
Fts5Hash *pHash = p->pHash;
|
|
Fts5Structure *pStruct;
|
|
int iSegid;
|
|
int pgnoLast = 0; /* Last leaf page number in segment */
|
|
|
|
/* Obtain a reference to the index structure and allocate a new segment-id
|
|
** for the new level-0 segment. */
|
|
pStruct = fts5StructureRead(p);
|
|
fts5StructureInvalidate(p);
|
|
|
|
if( sqlite3Fts5HashIsEmpty(pHash)==0 ){
|
|
iSegid = fts5AllocateSegid(p, pStruct);
|
|
if( iSegid ){
|
|
const int pgsz = p->pConfig->pgsz;
|
|
int eDetail = p->pConfig->eDetail;
|
|
int bSecureDelete = p->pConfig->bSecureDelete;
|
|
Fts5StructureSegment *pSeg; /* New segment within pStruct */
|
|
Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
|
|
Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
|
|
|
|
Fts5SegWriter writer;
|
|
fts5WriteInit(p, &writer, iSegid);
|
|
|
|
pBuf = &writer.writer.buf;
|
|
pPgidx = &writer.writer.pgidx;
|
|
|
|
/* fts5WriteInit() should have initialized the buffers to (most likely)
|
|
** the maximum space required. */
|
|
assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
|
|
assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
|
|
|
|
/* Begin scanning through hash table entries. This loop runs once for each
|
|
** term/doclist currently stored within the hash table. */
|
|
if( p->rc==SQLITE_OK ){
|
|
p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
|
|
}
|
|
while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
|
|
const char *zTerm; /* Buffer containing term */
|
|
int nTerm; /* Size of zTerm in bytes */
|
|
const u8 *pDoclist; /* Pointer to doclist for this term */
|
|
int nDoclist; /* Size of doclist in bytes */
|
|
|
|
/* Get the term and doclist for this entry. */
|
|
sqlite3Fts5HashScanEntry(pHash, &zTerm, &nTerm, &pDoclist, &nDoclist);
|
|
if( bSecureDelete==0 ){
|
|
fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm);
|
|
if( p->rc!=SQLITE_OK ) break;
|
|
assert( writer.bFirstRowidInPage==0 );
|
|
}
|
|
|
|
if( !bSecureDelete && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
|
|
/* The entire doclist will fit on the current leaf. */
|
|
fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
|
|
}else{
|
|
int bTermWritten = !bSecureDelete;
|
|
i64 iRowid = 0;
|
|
i64 iPrev = 0;
|
|
int iOff = 0;
|
|
|
|
/* The entire doclist will not fit on this leaf. The following
|
|
** loop iterates through the poslists that make up the current
|
|
** doclist. */
|
|
while( p->rc==SQLITE_OK && iOff<nDoclist ){
|
|
u64 iDelta = 0;
|
|
iOff += fts5GetVarint(&pDoclist[iOff], &iDelta);
|
|
iRowid += iDelta;
|
|
|
|
/* If in secure delete mode, and if this entry in the poslist is
|
|
** in fact a delete, then edit the existing segments directly
|
|
** using fts5FlushSecureDelete(). */
|
|
if( bSecureDelete ){
|
|
if( eDetail==FTS5_DETAIL_NONE ){
|
|
if( iOff<nDoclist && pDoclist[iOff]==0x00 ){
|
|
fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid);
|
|
iOff++;
|
|
if( iOff<nDoclist && pDoclist[iOff]==0x00 ){
|
|
iOff++;
|
|
nDoclist = 0;
|
|
}else{
|
|
continue;
|
|
}
|
|
}
|
|
}else if( (pDoclist[iOff] & 0x01) ){
|
|
fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid);
|
|
if( p->rc!=SQLITE_OK || pDoclist[iOff]==0x01 ){
|
|
iOff++;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
if( p->rc==SQLITE_OK && bTermWritten==0 ){
|
|
fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm);
|
|
bTermWritten = 1;
|
|
assert( p->rc!=SQLITE_OK || writer.bFirstRowidInPage==0 );
|
|
}
|
|
|
|
if( writer.bFirstRowidInPage ){
|
|
fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */
|
|
pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
|
|
writer.bFirstRowidInPage = 0;
|
|
fts5WriteDlidxAppend(p, &writer, iRowid);
|
|
}else{
|
|
u64 iRowidDelta = (u64)iRowid - (u64)iPrev;
|
|
pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowidDelta);
|
|
}
|
|
if( p->rc!=SQLITE_OK ) break;
|
|
assert( pBuf->n<=pBuf->nSpace );
|
|
iPrev = iRowid;
|
|
|
|
if( eDetail==FTS5_DETAIL_NONE ){
|
|
if( iOff<nDoclist && pDoclist[iOff]==0 ){
|
|
pBuf->p[pBuf->n++] = 0;
|
|
iOff++;
|
|
if( iOff<nDoclist && pDoclist[iOff]==0 ){
|
|
pBuf->p[pBuf->n++] = 0;
|
|
iOff++;
|
|
}
|
|
}
|
|
if( (pBuf->n + pPgidx->n)>=pgsz ){
|
|
fts5WriteFlushLeaf(p, &writer);
|
|
}
|
|
}else{
|
|
int bDel = 0;
|
|
int nPos = 0;
|
|
int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDel);
|
|
if( bDel && bSecureDelete ){
|
|
fts5BufferAppendVarint(&p->rc, pBuf, nPos*2);
|
|
iOff += nCopy;
|
|
nCopy = nPos;
|
|
}else{
|
|
nCopy += nPos;
|
|
}
|
|
if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
|
|
/* The entire poslist will fit on the current leaf. So copy
|
|
** it in one go. */
|
|
fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
|
|
}else{
|
|
/* The entire poslist will not fit on this leaf. So it needs
|
|
** to be broken into sections. The only qualification being
|
|
** that each varint must be stored contiguously. */
|
|
const u8 *pPoslist = &pDoclist[iOff];
|
|
int iPos = 0;
|
|
while( p->rc==SQLITE_OK ){
|
|
int nSpace = pgsz - pBuf->n - pPgidx->n;
|
|
int n = 0;
|
|
if( (nCopy - iPos)<=nSpace ){
|
|
n = nCopy - iPos;
|
|
}else{
|
|
n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
|
|
}
|
|
assert( n>0 );
|
|
fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
|
|
iPos += n;
|
|
if( (pBuf->n + pPgidx->n)>=pgsz ){
|
|
fts5WriteFlushLeaf(p, &writer);
|
|
}
|
|
if( iPos>=nCopy ) break;
|
|
}
|
|
}
|
|
iOff += nCopy;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* TODO2: Doclist terminator written here. */
|
|
/* pBuf->p[pBuf->n++] = '\0'; */
|
|
assert( pBuf->n<=pBuf->nSpace );
|
|
if( p->rc==SQLITE_OK ) sqlite3Fts5HashScanNext(pHash);
|
|
}
|
|
fts5WriteFinish(p, &writer, &pgnoLast);
|
|
|
|
assert( p->rc!=SQLITE_OK || bSecureDelete || pgnoLast>0 );
|
|
if( pgnoLast>0 ){
|
|
/* Update the Fts5Structure. It is written back to the database by the
|
|
** fts5StructureRelease() call below. */
|
|
if( pStruct->nLevel==0 ){
|
|
fts5StructureAddLevel(&p->rc, &pStruct);
|
|
}
|
|
fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
|
|
if( p->rc==SQLITE_OK ){
|
|
pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
|
|
pSeg->iSegid = iSegid;
|
|
pSeg->pgnoFirst = 1;
|
|
pSeg->pgnoLast = pgnoLast;
|
|
if( pStruct->nOriginCntr>0 ){
|
|
pSeg->iOrigin1 = pStruct->nOriginCntr;
|
|
pSeg->iOrigin2 = pStruct->nOriginCntr;
|
|
pSeg->nEntry = p->nPendingRow;
|
|
pStruct->nOriginCntr++;
|
|
}
|
|
pStruct->nSegment++;
|
|
}
|
|
fts5StructurePromote(p, 0, pStruct);
|
|
}
|
|
}
|
|
}
|
|
|
|
fts5IndexAutomerge(p, &pStruct, pgnoLast + p->nContentlessDelete);
|
|
fts5IndexCrisismerge(p, &pStruct);
|
|
fts5StructureWrite(p, pStruct);
|
|
fts5StructureRelease(pStruct);
|
|
}
|
|
|
|
/*
|
|
** Flush any data stored in the in-memory hash tables to the database.
|
|
*/
|
|
static void fts5IndexFlush(Fts5Index *p){
|
|
/* Unless it is empty, flush the hash table to disk */
|
|
if( p->flushRc ){
|
|
p->rc = p->flushRc;
|
|
return;
|
|
}
|
|
if( p->nPendingData || p->nContentlessDelete ){
|
|
assert( p->pHash );
|
|
fts5FlushOneHash(p);
|
|
if( p->rc==SQLITE_OK ){
|
|
sqlite3Fts5HashClear(p->pHash);
|
|
p->nPendingData = 0;
|
|
p->nPendingRow = 0;
|
|
p->nContentlessDelete = 0;
|
|
}else if( p->nPendingData || p->nContentlessDelete ){
|
|
p->flushRc = p->rc;
|
|
}
|
|
}
|
|
}
|
|
|
|
static Fts5Structure *fts5IndexOptimizeStruct(
|
|
Fts5Index *p,
|
|
Fts5Structure *pStruct
|
|
){
|
|
Fts5Structure *pNew = 0;
|
|
sqlite3_int64 nByte = sizeof(Fts5Structure);
|
|
int nSeg = pStruct->nSegment;
|
|
int i;
|
|
|
|
/* Figure out if this structure requires optimization. A structure does
|
|
** not require optimization if either:
|
|
**
|
|
** 1. it consists of fewer than two segments, or
|
|
** 2. all segments are on the same level, or
|
|
** 3. all segments except one are currently inputs to a merge operation.
|
|
**
|
|
** In the first case, if there are no tombstone hash pages, return NULL. In
|
|
** the second, increment the ref-count on *pStruct and return a copy of the
|
|
** pointer to it.
|
|
*/
|
|
if( nSeg==0 ) return 0;
|
|
for(i=0; i<pStruct->nLevel; i++){
|
|
int nThis = pStruct->aLevel[i].nSeg;
|
|
int nMerge = pStruct->aLevel[i].nMerge;
|
|
if( nThis>0 && (nThis==nSeg || (nThis==nSeg-1 && nMerge==nThis)) ){
|
|
if( nSeg==1 && nThis==1 && pStruct->aLevel[i].aSeg[0].nPgTombstone==0 ){
|
|
return 0;
|
|
}
|
|
fts5StructureRef(pStruct);
|
|
return pStruct;
|
|
}
|
|
assert( pStruct->aLevel[i].nMerge<=nThis );
|
|
}
|
|
|
|
nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
|
|
pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
|
|
|
|
if( pNew ){
|
|
Fts5StructureLevel *pLvl;
|
|
nByte = nSeg * sizeof(Fts5StructureSegment);
|
|
pNew->nLevel = MIN(pStruct->nLevel+1, FTS5_MAX_LEVEL);
|
|
pNew->nRef = 1;
|
|
pNew->nWriteCounter = pStruct->nWriteCounter;
|
|
pNew->nOriginCntr = pStruct->nOriginCntr;
|
|
pLvl = &pNew->aLevel[pNew->nLevel-1];
|
|
pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
|
|
if( pLvl->aSeg ){
|
|
int iLvl, iSeg;
|
|
int iSegOut = 0;
|
|
/* Iterate through all segments, from oldest to newest. Add them to
|
|
** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
|
|
** segment in the data structure. */
|
|
for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
|
|
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
|
|
pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
|
|
iSegOut++;
|
|
}
|
|
}
|
|
pNew->nSegment = pLvl->nSeg = nSeg;
|
|
}else{
|
|
sqlite3_free(pNew);
|
|
pNew = 0;
|
|
}
|
|
}
|
|
|
|
return pNew;
|
|
}
|
|
|
|
int sqlite3Fts5IndexOptimize(Fts5Index *p){
|
|
Fts5Structure *pStruct;
|
|
Fts5Structure *pNew = 0;
|
|
|
|
assert( p->rc==SQLITE_OK );
|
|
fts5IndexFlush(p);
|
|
assert( p->rc!=SQLITE_OK || p->nContentlessDelete==0 );
|
|
pStruct = fts5StructureRead(p);
|
|
assert( p->rc!=SQLITE_OK || pStruct!=0 );
|
|
fts5StructureInvalidate(p);
|
|
|
|
if( pStruct ){
|
|
pNew = fts5IndexOptimizeStruct(p, pStruct);
|
|
}
|
|
fts5StructureRelease(pStruct);
|
|
|
|
assert( pNew==0 || pNew->nSegment>0 );
|
|
if( pNew ){
|
|
int iLvl;
|
|
for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){}
|
|
while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
|
|
int nRem = FTS5_OPT_WORK_UNIT;
|
|
fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
|
|
}
|
|
|
|
fts5StructureWrite(p, pNew);
|
|
fts5StructureRelease(pNew);
|
|
}
|
|
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*
|
|
** This is called to implement the special "VALUES('merge', $nMerge)"
|
|
** INSERT command.
|
|
*/
|
|
int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
|
|
Fts5Structure *pStruct = 0;
|
|
|
|
fts5IndexFlush(p);
|
|
pStruct = fts5StructureRead(p);
|
|
if( pStruct ){
|
|
int nMin = p->pConfig->nUsermerge;
|
|
fts5StructureInvalidate(p);
|
|
if( nMerge<0 ){
|
|
Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
|
|
fts5StructureRelease(pStruct);
|
|
pStruct = pNew;
|
|
nMin = 1;
|
|
nMerge = nMerge*-1;
|
|
}
|
|
if( pStruct && pStruct->nLevel ){
|
|
if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){
|
|
fts5StructureWrite(p, pStruct);
|
|
}
|
|
}
|
|
fts5StructureRelease(pStruct);
|
|
}
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
static void fts5AppendRowid(
|
|
Fts5Index *p,
|
|
u64 iDelta,
|
|
Fts5Iter *pUnused,
|
|
Fts5Buffer *pBuf
|
|
){
|
|
UNUSED_PARAM(pUnused);
|
|
fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
|
|
}
|
|
|
|
static void fts5AppendPoslist(
|
|
Fts5Index *p,
|
|
u64 iDelta,
|
|
Fts5Iter *pMulti,
|
|
Fts5Buffer *pBuf
|
|
){
|
|
int nData = pMulti->base.nData;
|
|
int nByte = nData + 9 + 9 + FTS5_DATA_ZERO_PADDING;
|
|
assert( nData>0 );
|
|
if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nByte) ){
|
|
fts5BufferSafeAppendVarint(pBuf, iDelta);
|
|
fts5BufferSafeAppendVarint(pBuf, nData*2);
|
|
fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
|
|
memset(&pBuf->p[pBuf->n], 0, FTS5_DATA_ZERO_PADDING);
|
|
}
|
|
}
|
|
|
|
|
|
static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
|
|
u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
|
|
|
|
assert( pIter->aPoslist || (p==0 && pIter->aPoslist==0) );
|
|
if( p>=pIter->aEof ){
|
|
pIter->aPoslist = 0;
|
|
}else{
|
|
i64 iDelta;
|
|
|
|
p += fts5GetVarint(p, (u64*)&iDelta);
|
|
pIter->iRowid += iDelta;
|
|
|
|
/* Read position list size */
|
|
if( p[0] & 0x80 ){
|
|
int nPos;
|
|
pIter->nSize = fts5GetVarint32(p, nPos);
|
|
pIter->nPoslist = (nPos>>1);
|
|
}else{
|
|
pIter->nPoslist = ((int)(p[0])) >> 1;
|
|
pIter->nSize = 1;
|
|
}
|
|
|
|
pIter->aPoslist = p;
|
|
if( &pIter->aPoslist[pIter->nPoslist]>pIter->aEof ){
|
|
pIter->aPoslist = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void fts5DoclistIterInit(
|
|
Fts5Buffer *pBuf,
|
|
Fts5DoclistIter *pIter
|
|
){
|
|
memset(pIter, 0, sizeof(*pIter));
|
|
if( pBuf->n>0 ){
|
|
pIter->aPoslist = pBuf->p;
|
|
pIter->aEof = &pBuf->p[pBuf->n];
|
|
fts5DoclistIterNext(pIter);
|
|
}
|
|
}
|
|
|
|
#if 0
|
|
/*
|
|
** Append a doclist to buffer pBuf.
|
|
**
|
|
** This function assumes that space within the buffer has already been
|
|
** allocated.
|
|
*/
|
|
static void fts5MergeAppendDocid(
|
|
Fts5Buffer *pBuf, /* Buffer to write to */
|
|
i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */
|
|
i64 iRowid /* Rowid to append */
|
|
){
|
|
assert( pBuf->n!=0 || (*piLastRowid)==0 );
|
|
fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
|
|
*piLastRowid = iRowid;
|
|
}
|
|
#endif
|
|
|
|
#define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \
|
|
assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \
|
|
fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)); \
|
|
(iLastRowid) = (iRowid); \
|
|
}
|
|
|
|
/*
|
|
** Swap the contents of buffer *p1 with that of *p2.
|
|
*/
|
|
static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
|
|
Fts5Buffer tmp = *p1;
|
|
*p1 = *p2;
|
|
*p2 = tmp;
|
|
}
|
|
|
|
static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
|
|
int i = *piOff;
|
|
if( i>=pBuf->n ){
|
|
*piOff = -1;
|
|
}else{
|
|
u64 iVal;
|
|
*piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
|
|
*piRowid += iVal;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
|
|
** In this case the buffers consist of a delta-encoded list of rowids only.
|
|
*/
|
|
static void fts5MergeRowidLists(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5Buffer *p1, /* First list to merge */
|
|
int nBuf, /* Number of entries in apBuf[] */
|
|
Fts5Buffer *aBuf /* Array of other lists to merge into p1 */
|
|
){
|
|
int i1 = 0;
|
|
int i2 = 0;
|
|
i64 iRowid1 = 0;
|
|
i64 iRowid2 = 0;
|
|
i64 iOut = 0;
|
|
Fts5Buffer *p2 = &aBuf[0];
|
|
Fts5Buffer out;
|
|
|
|
(void)nBuf;
|
|
memset(&out, 0, sizeof(out));
|
|
assert( nBuf==1 );
|
|
sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
|
|
if( p->rc ) return;
|
|
|
|
fts5NextRowid(p1, &i1, &iRowid1);
|
|
fts5NextRowid(p2, &i2, &iRowid2);
|
|
while( i1>=0 || i2>=0 ){
|
|
if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
|
|
assert( iOut==0 || iRowid1>iOut );
|
|
fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
|
|
iOut = iRowid1;
|
|
fts5NextRowid(p1, &i1, &iRowid1);
|
|
}else{
|
|
assert( iOut==0 || iRowid2>iOut );
|
|
fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
|
|
iOut = iRowid2;
|
|
if( i1>=0 && iRowid1==iRowid2 ){
|
|
fts5NextRowid(p1, &i1, &iRowid1);
|
|
}
|
|
fts5NextRowid(p2, &i2, &iRowid2);
|
|
}
|
|
}
|
|
|
|
fts5BufferSwap(&out, p1);
|
|
fts5BufferFree(&out);
|
|
}
|
|
|
|
typedef struct PrefixMerger PrefixMerger;
|
|
struct PrefixMerger {
|
|
Fts5DoclistIter iter; /* Doclist iterator */
|
|
i64 iPos; /* For iterating through a position list */
|
|
int iOff;
|
|
u8 *aPos;
|
|
PrefixMerger *pNext; /* Next in docid/poslist order */
|
|
};
|
|
|
|
static void fts5PrefixMergerInsertByRowid(
|
|
PrefixMerger **ppHead,
|
|
PrefixMerger *p
|
|
){
|
|
if( p->iter.aPoslist ){
|
|
PrefixMerger **pp = ppHead;
|
|
while( *pp && p->iter.iRowid>(*pp)->iter.iRowid ){
|
|
pp = &(*pp)->pNext;
|
|
}
|
|
p->pNext = *pp;
|
|
*pp = p;
|
|
}
|
|
}
|
|
|
|
static void fts5PrefixMergerInsertByPosition(
|
|
PrefixMerger **ppHead,
|
|
PrefixMerger *p
|
|
){
|
|
if( p->iPos>=0 ){
|
|
PrefixMerger **pp = ppHead;
|
|
while( *pp && p->iPos>(*pp)->iPos ){
|
|
pp = &(*pp)->pNext;
|
|
}
|
|
p->pNext = *pp;
|
|
*pp = p;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
** Array aBuf[] contains nBuf doclists. These are all merged in with the
|
|
** doclist in buffer p1.
|
|
*/
|
|
static void fts5MergePrefixLists(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5Buffer *p1, /* First list to merge */
|
|
int nBuf, /* Number of buffers in array aBuf[] */
|
|
Fts5Buffer *aBuf /* Other lists to merge in */
|
|
){
|
|
#define fts5PrefixMergerNextPosition(p) \
|
|
sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos)
|
|
#define FTS5_MERGE_NLIST 16
|
|
PrefixMerger aMerger[FTS5_MERGE_NLIST];
|
|
PrefixMerger *pHead = 0;
|
|
int i;
|
|
int nOut = 0;
|
|
Fts5Buffer out = {0, 0, 0};
|
|
Fts5Buffer tmp = {0, 0, 0};
|
|
i64 iLastRowid = 0;
|
|
|
|
/* Initialize a doclist-iterator for each input buffer. Arrange them in
|
|
** a linked-list starting at pHead in ascending order of rowid. Avoid
|
|
** linking any iterators already at EOF into the linked list at all. */
|
|
assert( nBuf+1<=(int)(sizeof(aMerger)/sizeof(aMerger[0])) );
|
|
memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1));
|
|
pHead = &aMerger[nBuf];
|
|
fts5DoclistIterInit(p1, &pHead->iter);
|
|
for(i=0; i<nBuf; i++){
|
|
fts5DoclistIterInit(&aBuf[i], &aMerger[i].iter);
|
|
fts5PrefixMergerInsertByRowid(&pHead, &aMerger[i]);
|
|
nOut += aBuf[i].n;
|
|
}
|
|
if( nOut==0 ) return;
|
|
nOut += p1->n + 9 + 10*nBuf;
|
|
|
|
/* The maximum size of the output is equal to the sum of the
|
|
** input sizes + 1 varint (9 bytes). The extra varint is because if the
|
|
** first rowid in one input is a large negative number, and the first in
|
|
** the other a non-negative number, the delta for the non-negative
|
|
** number will be larger on disk than the literal integer value
|
|
** was.
|
|
**
|
|
** Or, if the input position-lists are corrupt, then the output might
|
|
** include up to (nBuf+1) extra 10-byte positions created by interpreting -1
|
|
** (the value PoslistNext64() uses for EOF) as a position and appending
|
|
** it to the output. This can happen at most once for each input
|
|
** position-list, hence (nBuf+1) 10 byte paddings. */
|
|
if( sqlite3Fts5BufferSize(&p->rc, &out, nOut) ) return;
|
|
|
|
while( pHead ){
|
|
fts5MergeAppendDocid(&out, iLastRowid, pHead->iter.iRowid);
|
|
|
|
if( pHead->pNext && iLastRowid==pHead->pNext->iter.iRowid ){
|
|
/* Merge data from two or more poslists */
|
|
i64 iPrev = 0;
|
|
int nTmp = FTS5_DATA_ZERO_PADDING;
|
|
int nMerge = 0;
|
|
PrefixMerger *pSave = pHead;
|
|
PrefixMerger *pThis = 0;
|
|
int nTail = 0;
|
|
|
|
pHead = 0;
|
|
while( pSave && pSave->iter.iRowid==iLastRowid ){
|
|
PrefixMerger *pNext = pSave->pNext;
|
|
pSave->iOff = 0;
|
|
pSave->iPos = 0;
|
|
pSave->aPos = &pSave->iter.aPoslist[pSave->iter.nSize];
|
|
fts5PrefixMergerNextPosition(pSave);
|
|
nTmp += pSave->iter.nPoslist + 10;
|
|
nMerge++;
|
|
fts5PrefixMergerInsertByPosition(&pHead, pSave);
|
|
pSave = pNext;
|
|
}
|
|
|
|
if( pHead==0 || pHead->pNext==0 ){
|
|
p->rc = FTS5_CORRUPT;
|
|
break;
|
|
}
|
|
|
|
/* See the earlier comment in this function for an explanation of why
|
|
** corrupt input position lists might cause the output to consume
|
|
** at most nMerge*10 bytes of unexpected space. */
|
|
if( sqlite3Fts5BufferSize(&p->rc, &tmp, nTmp+nMerge*10) ){
|
|
break;
|
|
}
|
|
fts5BufferZero(&tmp);
|
|
|
|
pThis = pHead;
|
|
pHead = pThis->pNext;
|
|
sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
|
|
fts5PrefixMergerNextPosition(pThis);
|
|
fts5PrefixMergerInsertByPosition(&pHead, pThis);
|
|
|
|
while( pHead->pNext ){
|
|
pThis = pHead;
|
|
if( pThis->iPos!=iPrev ){
|
|
sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
|
|
}
|
|
fts5PrefixMergerNextPosition(pThis);
|
|
pHead = pThis->pNext;
|
|
fts5PrefixMergerInsertByPosition(&pHead, pThis);
|
|
}
|
|
|
|
if( pHead->iPos!=iPrev ){
|
|
sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pHead->iPos);
|
|
}
|
|
nTail = pHead->iter.nPoslist - pHead->iOff;
|
|
|
|
/* WRITEPOSLISTSIZE */
|
|
assert_nc( tmp.n+nTail<=nTmp );
|
|
assert( tmp.n+nTail<=nTmp+nMerge*10 );
|
|
if( tmp.n+nTail>nTmp-FTS5_DATA_ZERO_PADDING ){
|
|
if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
|
|
break;
|
|
}
|
|
fts5BufferSafeAppendVarint(&out, (tmp.n+nTail) * 2);
|
|
fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
|
|
if( nTail>0 ){
|
|
fts5BufferSafeAppendBlob(&out, &pHead->aPos[pHead->iOff], nTail);
|
|
}
|
|
|
|
pHead = pSave;
|
|
for(i=0; i<nBuf+1; i++){
|
|
PrefixMerger *pX = &aMerger[i];
|
|
if( pX->iter.aPoslist && pX->iter.iRowid==iLastRowid ){
|
|
fts5DoclistIterNext(&pX->iter);
|
|
fts5PrefixMergerInsertByRowid(&pHead, pX);
|
|
}
|
|
}
|
|
|
|
}else{
|
|
/* Copy poslist from pHead to output */
|
|
PrefixMerger *pThis = pHead;
|
|
Fts5DoclistIter *pI = &pThis->iter;
|
|
fts5BufferSafeAppendBlob(&out, pI->aPoslist, pI->nPoslist+pI->nSize);
|
|
fts5DoclistIterNext(pI);
|
|
pHead = pThis->pNext;
|
|
fts5PrefixMergerInsertByRowid(&pHead, pThis);
|
|
}
|
|
}
|
|
|
|
fts5BufferFree(p1);
|
|
fts5BufferFree(&tmp);
|
|
memset(&out.p[out.n], 0, FTS5_DATA_ZERO_PADDING);
|
|
*p1 = out;
|
|
}
|
|
|
|
static void fts5SetupPrefixIter(
|
|
Fts5Index *p, /* Index to read from */
|
|
int bDesc, /* True for "ORDER BY rowid DESC" */
|
|
int iIdx, /* Index to scan for data */
|
|
u8 *pToken, /* Buffer containing prefix to match */
|
|
int nToken, /* Size of buffer pToken in bytes */
|
|
Fts5Colset *pColset, /* Restrict matches to these columns */
|
|
Fts5Iter **ppIter /* OUT: New iterator */
|
|
){
|
|
Fts5Structure *pStruct;
|
|
Fts5Buffer *aBuf;
|
|
int nBuf = 32;
|
|
int nMerge = 1;
|
|
|
|
void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
|
|
void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*);
|
|
if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
|
|
xMerge = fts5MergeRowidLists;
|
|
xAppend = fts5AppendRowid;
|
|
}else{
|
|
nMerge = FTS5_MERGE_NLIST-1;
|
|
nBuf = nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */
|
|
xMerge = fts5MergePrefixLists;
|
|
xAppend = fts5AppendPoslist;
|
|
}
|
|
|
|
aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
|
|
pStruct = fts5StructureRead(p);
|
|
assert( p->rc!=SQLITE_OK || (aBuf && pStruct) );
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
const int flags = FTS5INDEX_QUERY_SCAN
|
|
| FTS5INDEX_QUERY_SKIPEMPTY
|
|
| FTS5INDEX_QUERY_NOOUTPUT;
|
|
int i;
|
|
i64 iLastRowid = 0;
|
|
Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
|
|
Fts5Data *pData;
|
|
Fts5Buffer doclist;
|
|
int bNewTerm = 1;
|
|
|
|
memset(&doclist, 0, sizeof(doclist));
|
|
|
|
/* If iIdx is non-zero, then it is the number of a prefix-index for
|
|
** prefixes 1 character longer than the prefix being queried for. That
|
|
** index contains all the doclists required, except for the one
|
|
** corresponding to the prefix itself. That one is extracted from the
|
|
** main term index here. */
|
|
if( iIdx!=0 ){
|
|
int dummy = 0;
|
|
const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT;
|
|
pToken[0] = FTS5_MAIN_PREFIX;
|
|
fts5MultiIterNew(p, pStruct, f2, pColset, pToken, nToken, -1, 0, &p1);
|
|
fts5IterSetOutputCb(&p->rc, p1);
|
|
for(;
|
|
fts5MultiIterEof(p, p1)==0;
|
|
fts5MultiIterNext2(p, p1, &dummy)
|
|
){
|
|
Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
|
|
p1->xSetOutputs(p1, pSeg);
|
|
if( p1->base.nData ){
|
|
xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
|
|
iLastRowid = p1->base.iRowid;
|
|
}
|
|
}
|
|
fts5MultiIterFree(p1);
|
|
}
|
|
|
|
pToken[0] = FTS5_MAIN_PREFIX + iIdx;
|
|
fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
|
|
fts5IterSetOutputCb(&p->rc, p1);
|
|
|
|
for( /* no-op */ ;
|
|
fts5MultiIterEof(p, p1)==0;
|
|
fts5MultiIterNext2(p, p1, &bNewTerm)
|
|
){
|
|
Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
|
|
int nTerm = pSeg->term.n;
|
|
const u8 *pTerm = pSeg->term.p;
|
|
p1->xSetOutputs(p1, pSeg);
|
|
|
|
assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
|
|
if( bNewTerm ){
|
|
if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
|
|
}
|
|
|
|
if( p1->base.nData==0 ) continue;
|
|
if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
|
|
for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
|
|
int i1 = i*nMerge;
|
|
int iStore;
|
|
assert( i1+nMerge<=nBuf );
|
|
for(iStore=i1; iStore<i1+nMerge; iStore++){
|
|
if( aBuf[iStore].n==0 ){
|
|
fts5BufferSwap(&doclist, &aBuf[iStore]);
|
|
fts5BufferZero(&doclist);
|
|
break;
|
|
}
|
|
}
|
|
if( iStore==i1+nMerge ){
|
|
xMerge(p, &doclist, nMerge, &aBuf[i1]);
|
|
for(iStore=i1; iStore<i1+nMerge; iStore++){
|
|
fts5BufferZero(&aBuf[iStore]);
|
|
}
|
|
}
|
|
}
|
|
iLastRowid = 0;
|
|
}
|
|
|
|
xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
|
|
iLastRowid = p1->base.iRowid;
|
|
}
|
|
|
|
assert( (nBuf%nMerge)==0 );
|
|
for(i=0; i<nBuf; i+=nMerge){
|
|
int iFree;
|
|
if( p->rc==SQLITE_OK ){
|
|
xMerge(p, &doclist, nMerge, &aBuf[i]);
|
|
}
|
|
for(iFree=i; iFree<i+nMerge; iFree++){
|
|
fts5BufferFree(&aBuf[iFree]);
|
|
}
|
|
}
|
|
fts5MultiIterFree(p1);
|
|
|
|
pData = fts5IdxMalloc(p, sizeof(*pData)+doclist.n+FTS5_DATA_ZERO_PADDING);
|
|
if( pData ){
|
|
pData->p = (u8*)&pData[1];
|
|
pData->nn = pData->szLeaf = doclist.n;
|
|
if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n);
|
|
fts5MultiIterNew2(p, pData, bDesc, ppIter);
|
|
}
|
|
fts5BufferFree(&doclist);
|
|
}
|
|
|
|
fts5StructureRelease(pStruct);
|
|
sqlite3_free(aBuf);
|
|
}
|
|
|
|
|
|
/*
|
|
** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
|
|
** to the document with rowid iRowid.
|
|
*/
|
|
int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
|
|
assert( p->rc==SQLITE_OK );
|
|
|
|
/* Allocate the hash table if it has not already been allocated */
|
|
if( p->pHash==0 ){
|
|
p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
|
|
}
|
|
|
|
/* Flush the hash table to disk if required */
|
|
if( iRowid<p->iWriteRowid
|
|
|| (iRowid==p->iWriteRowid && p->bDelete==0)
|
|
|| (p->nPendingData > p->pConfig->nHashSize)
|
|
){
|
|
fts5IndexFlush(p);
|
|
}
|
|
|
|
p->iWriteRowid = iRowid;
|
|
p->bDelete = bDelete;
|
|
if( bDelete==0 ){
|
|
p->nPendingRow++;
|
|
}
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*
|
|
** Commit data to disk.
|
|
*/
|
|
int sqlite3Fts5IndexSync(Fts5Index *p){
|
|
assert( p->rc==SQLITE_OK );
|
|
fts5IndexFlush(p);
|
|
sqlite3Fts5IndexCloseReader(p);
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*
|
|
** Discard any data stored in the in-memory hash tables. Do not write it
|
|
** to the database. Additionally, assume that the contents of the %_data
|
|
** table may have changed on disk. So any in-memory caches of %_data
|
|
** records must be invalidated.
|
|
*/
|
|
int sqlite3Fts5IndexRollback(Fts5Index *p){
|
|
sqlite3Fts5IndexCloseReader(p);
|
|
fts5IndexDiscardData(p);
|
|
fts5StructureInvalidate(p);
|
|
/* assert( p->rc==SQLITE_OK ); */
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** The %_data table is completely empty when this function is called. This
|
|
** function populates it with the initial structure objects for each index,
|
|
** and the initial version of the "averages" record (a zero-byte blob).
|
|
*/
|
|
int sqlite3Fts5IndexReinit(Fts5Index *p){
|
|
Fts5Structure s;
|
|
fts5StructureInvalidate(p);
|
|
fts5IndexDiscardData(p);
|
|
memset(&s, 0, sizeof(Fts5Structure));
|
|
if( p->pConfig->bContentlessDelete ){
|
|
s.nOriginCntr = 1;
|
|
}
|
|
fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
|
|
fts5StructureWrite(p, &s);
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*
|
|
** Open a new Fts5Index handle. If the bCreate argument is true, create
|
|
** and initialize the underlying %_data table.
|
|
**
|
|
** If successful, set *pp to point to the new object and return SQLITE_OK.
|
|
** Otherwise, set *pp to NULL and return an SQLite error code.
|
|
*/
|
|
int sqlite3Fts5IndexOpen(
|
|
Fts5Config *pConfig,
|
|
int bCreate,
|
|
Fts5Index **pp,
|
|
char **pzErr
|
|
){
|
|
int rc = SQLITE_OK;
|
|
Fts5Index *p; /* New object */
|
|
|
|
*pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
|
|
if( rc==SQLITE_OK ){
|
|
p->pConfig = pConfig;
|
|
p->nWorkUnit = FTS5_WORK_UNIT;
|
|
p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
|
|
if( p->zDataTbl && bCreate ){
|
|
rc = sqlite3Fts5CreateTable(
|
|
pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
|
|
);
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3Fts5CreateTable(pConfig, "idx",
|
|
"segid, term, pgno, PRIMARY KEY(segid, term)",
|
|
1, pzErr
|
|
);
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3Fts5IndexReinit(p);
|
|
}
|
|
}
|
|
}
|
|
|
|
assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
|
|
if( rc ){
|
|
sqlite3Fts5IndexClose(p);
|
|
*pp = 0;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
|
|
*/
|
|
int sqlite3Fts5IndexClose(Fts5Index *p){
|
|
int rc = SQLITE_OK;
|
|
if( p ){
|
|
assert( p->pReader==0 );
|
|
fts5StructureInvalidate(p);
|
|
sqlite3_finalize(p->pWriter);
|
|
sqlite3_finalize(p->pDeleter);
|
|
sqlite3_finalize(p->pIdxWriter);
|
|
sqlite3_finalize(p->pIdxDeleter);
|
|
sqlite3_finalize(p->pIdxSelect);
|
|
sqlite3_finalize(p->pIdxNextSelect);
|
|
sqlite3_finalize(p->pDataVersion);
|
|
sqlite3_finalize(p->pDeleteFromIdx);
|
|
sqlite3Fts5HashFree(p->pHash);
|
|
sqlite3_free(p->zDataTbl);
|
|
sqlite3_free(p);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Argument p points to a buffer containing utf-8 text that is n bytes in
|
|
** size. Return the number of bytes in the nChar character prefix of the
|
|
** buffer, or 0 if there are less than nChar characters in total.
|
|
*/
|
|
int sqlite3Fts5IndexCharlenToBytelen(
|
|
const char *p,
|
|
int nByte,
|
|
int nChar
|
|
){
|
|
int n = 0;
|
|
int i;
|
|
for(i=0; i<nChar; i++){
|
|
if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */
|
|
if( (unsigned char)p[n++]>=0xc0 ){
|
|
if( n>=nByte ) return 0;
|
|
while( (p[n] & 0xc0)==0x80 ){
|
|
n++;
|
|
if( n>=nByte ){
|
|
if( i+1==nChar ) break;
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return n;
|
|
}
|
|
|
|
/*
|
|
** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
|
|
** unicode characters in the string.
|
|
*/
|
|
static int fts5IndexCharlen(const char *pIn, int nIn){
|
|
int nChar = 0;
|
|
int i = 0;
|
|
while( i<nIn ){
|
|
if( (unsigned char)pIn[i++]>=0xc0 ){
|
|
while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
|
|
}
|
|
nChar++;
|
|
}
|
|
return nChar;
|
|
}
|
|
|
|
/*
|
|
** Insert or remove data to or from the index. Each time a document is
|
|
** added to or removed from the index, this function is called one or more
|
|
** times.
|
|
**
|
|
** For an insert, it must be called once for each token in the new document.
|
|
** If the operation is a delete, it must be called (at least) once for each
|
|
** unique token in the document with an iCol value less than zero. The iPos
|
|
** argument is ignored for a delete.
|
|
*/
|
|
int sqlite3Fts5IndexWrite(
|
|
Fts5Index *p, /* Index to write to */
|
|
int iCol, /* Column token appears in (-ve -> delete) */
|
|
int iPos, /* Position of token within column */
|
|
const char *pToken, int nToken /* Token to add or remove to or from index */
|
|
){
|
|
int i; /* Used to iterate through indexes */
|
|
int rc = SQLITE_OK; /* Return code */
|
|
Fts5Config *pConfig = p->pConfig;
|
|
|
|
assert( p->rc==SQLITE_OK );
|
|
assert( (iCol<0)==p->bDelete );
|
|
|
|
/* Add the entry to the main terms index. */
|
|
rc = sqlite3Fts5HashWrite(
|
|
p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
|
|
);
|
|
|
|
for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
|
|
const int nChar = pConfig->aPrefix[i];
|
|
int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
|
|
if( nByte ){
|
|
rc = sqlite3Fts5HashWrite(p->pHash,
|
|
p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
|
|
nByte
|
|
);
|
|
}
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** pToken points to a buffer of size nToken bytes containing a search
|
|
** term, including the index number at the start, used on a tokendata=1
|
|
** table. This function returns true if the term in buffer pBuf matches
|
|
** token pToken/nToken.
|
|
*/
|
|
static int fts5IsTokendataPrefix(
|
|
Fts5Buffer *pBuf,
|
|
const u8 *pToken,
|
|
int nToken
|
|
){
|
|
return (
|
|
pBuf->n>=nToken
|
|
&& 0==memcmp(pBuf->p, pToken, nToken)
|
|
&& (pBuf->n==nToken || pBuf->p[nToken]==0x00)
|
|
);
|
|
}
|
|
|
|
/*
|
|
** Ensure the segment-iterator passed as the only argument points to EOF.
|
|
*/
|
|
static void fts5SegIterSetEOF(Fts5SegIter *pSeg){
|
|
fts5DataRelease(pSeg->pLeaf);
|
|
pSeg->pLeaf = 0;
|
|
}
|
|
|
|
/*
|
|
** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an
|
|
** array of these for each row it visits. Or, for an iterator used by an
|
|
** "ORDER BY rank" query, it accumulates an array of these for the entire
|
|
** query.
|
|
**
|
|
** Each instance in the array indicates the iterator (and therefore term)
|
|
** associated with position iPos of rowid iRowid. This is used by the
|
|
** xInstToken() API.
|
|
*/
|
|
struct Fts5TokenDataMap {
|
|
i64 iRowid; /* Row this token is located in */
|
|
i64 iPos; /* Position of token */
|
|
int iIter; /* Iterator token was read from */
|
|
};
|
|
|
|
/*
|
|
** An object used to supplement Fts5Iter for tokendata=1 iterators.
|
|
*/
|
|
struct Fts5TokenDataIter {
|
|
int nIter;
|
|
int nIterAlloc;
|
|
|
|
int nMap;
|
|
int nMapAlloc;
|
|
Fts5TokenDataMap *aMap;
|
|
|
|
Fts5PoslistReader *aPoslistReader;
|
|
int *aPoslistToIter;
|
|
Fts5Iter *apIter[1];
|
|
};
|
|
|
|
/*
|
|
** This function appends iterator pAppend to Fts5TokenDataIter pIn and
|
|
** returns the result.
|
|
*/
|
|
static Fts5TokenDataIter *fts5AppendTokendataIter(
|
|
Fts5Index *p, /* Index object (for error code) */
|
|
Fts5TokenDataIter *pIn, /* Current Fts5TokenDataIter struct */
|
|
Fts5Iter *pAppend /* Append this iterator */
|
|
){
|
|
Fts5TokenDataIter *pRet = pIn;
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
if( pIn==0 || pIn->nIter==pIn->nIterAlloc ){
|
|
int nAlloc = pIn ? pIn->nIterAlloc*2 : 16;
|
|
int nByte = nAlloc * sizeof(Fts5Iter*) + sizeof(Fts5TokenDataIter);
|
|
Fts5TokenDataIter *pNew = (Fts5TokenDataIter*)sqlite3_realloc(pIn, nByte);
|
|
|
|
if( pNew==0 ){
|
|
p->rc = SQLITE_NOMEM;
|
|
}else{
|
|
if( pIn==0 ) memset(pNew, 0, nByte);
|
|
pRet = pNew;
|
|
pNew->nIterAlloc = nAlloc;
|
|
}
|
|
}
|
|
}
|
|
if( p->rc ){
|
|
sqlite3Fts5IterClose((Fts5IndexIter*)pAppend);
|
|
}else{
|
|
pRet->apIter[pRet->nIter++] = pAppend;
|
|
}
|
|
assert( pRet==0 || pRet->nIter<=pRet->nIterAlloc );
|
|
|
|
return pRet;
|
|
}
|
|
|
|
/*
|
|
** Delete an Fts5TokenDataIter structure and its contents.
|
|
*/
|
|
static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){
|
|
if( pSet ){
|
|
int ii;
|
|
for(ii=0; ii<pSet->nIter; ii++){
|
|
fts5MultiIterFree(pSet->apIter[ii]);
|
|
}
|
|
sqlite3_free(pSet->aPoslistReader);
|
|
sqlite3_free(pSet->aMap);
|
|
sqlite3_free(pSet);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Append a mapping to the token-map belonging to object pT.
|
|
*/
|
|
static void fts5TokendataIterAppendMap(
|
|
Fts5Index *p,
|
|
Fts5TokenDataIter *pT,
|
|
int iIter,
|
|
i64 iRowid,
|
|
i64 iPos
|
|
){
|
|
if( p->rc==SQLITE_OK ){
|
|
if( pT->nMap==pT->nMapAlloc ){
|
|
int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64;
|
|
int nByte = nNew * sizeof(Fts5TokenDataMap);
|
|
Fts5TokenDataMap *aNew;
|
|
|
|
aNew = (Fts5TokenDataMap*)sqlite3_realloc(pT->aMap, nByte);
|
|
if( aNew==0 ){
|
|
p->rc = SQLITE_NOMEM;
|
|
return;
|
|
}
|
|
|
|
pT->aMap = aNew;
|
|
pT->nMapAlloc = nNew;
|
|
}
|
|
|
|
pT->aMap[pT->nMap].iRowid = iRowid;
|
|
pT->aMap[pT->nMap].iPos = iPos;
|
|
pT->aMap[pT->nMap].iIter = iIter;
|
|
pT->nMap++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** The iterator passed as the only argument must be a tokendata=1 iterator
|
|
** (pIter->pTokenDataIter!=0). This function sets the iterator output
|
|
** variables (pIter->base.*) according to the contents of the current
|
|
** row.
|
|
*/
|
|
static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){
|
|
int ii;
|
|
int nHit = 0;
|
|
i64 iRowid = SMALLEST_INT64;
|
|
int iMin = 0;
|
|
|
|
Fts5TokenDataIter *pT = pIter->pTokenDataIter;
|
|
|
|
pIter->base.nData = 0;
|
|
pIter->base.pData = 0;
|
|
|
|
for(ii=0; ii<pT->nIter; ii++){
|
|
Fts5Iter *p = pT->apIter[ii];
|
|
if( p->base.bEof==0 ){
|
|
if( nHit==0 || p->base.iRowid<iRowid ){
|
|
iRowid = p->base.iRowid;
|
|
nHit = 1;
|
|
pIter->base.pData = p->base.pData;
|
|
pIter->base.nData = p->base.nData;
|
|
iMin = ii;
|
|
}else if( p->base.iRowid==iRowid ){
|
|
nHit++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if( nHit==0 ){
|
|
pIter->base.bEof = 1;
|
|
}else{
|
|
int eDetail = pIter->pIndex->pConfig->eDetail;
|
|
pIter->base.bEof = 0;
|
|
pIter->base.iRowid = iRowid;
|
|
|
|
if( nHit==1 && eDetail==FTS5_DETAIL_FULL ){
|
|
fts5TokendataIterAppendMap(pIter->pIndex, pT, iMin, iRowid, -1);
|
|
}else
|
|
if( nHit>1 && eDetail!=FTS5_DETAIL_NONE ){
|
|
int nReader = 0;
|
|
int nByte = 0;
|
|
i64 iPrev = 0;
|
|
|
|
/* Allocate array of iterators if they are not already allocated. */
|
|
if( pT->aPoslistReader==0 ){
|
|
pT->aPoslistReader = (Fts5PoslistReader*)sqlite3Fts5MallocZero(
|
|
&pIter->pIndex->rc,
|
|
pT->nIter * (sizeof(Fts5PoslistReader) + sizeof(int))
|
|
);
|
|
if( pT->aPoslistReader==0 ) return;
|
|
pT->aPoslistToIter = (int*)&pT->aPoslistReader[pT->nIter];
|
|
}
|
|
|
|
/* Populate an iterator for each poslist that will be merged */
|
|
for(ii=0; ii<pT->nIter; ii++){
|
|
Fts5Iter *p = pT->apIter[ii];
|
|
if( iRowid==p->base.iRowid ){
|
|
pT->aPoslistToIter[nReader] = ii;
|
|
sqlite3Fts5PoslistReaderInit(
|
|
p->base.pData, p->base.nData, &pT->aPoslistReader[nReader++]
|
|
);
|
|
nByte += p->base.nData;
|
|
}
|
|
}
|
|
|
|
/* Ensure the output buffer is large enough */
|
|
if( fts5BufferGrow(&pIter->pIndex->rc, &pIter->poslist, nByte+nHit*10) ){
|
|
return;
|
|
}
|
|
|
|
/* Ensure the token-mapping is large enough */
|
|
if( eDetail==FTS5_DETAIL_FULL && pT->nMapAlloc<(pT->nMap + nByte) ){
|
|
int nNew = (pT->nMapAlloc + nByte) * 2;
|
|
Fts5TokenDataMap *aNew = (Fts5TokenDataMap*)sqlite3_realloc(
|
|
pT->aMap, nNew*sizeof(Fts5TokenDataMap)
|
|
);
|
|
if( aNew==0 ){
|
|
pIter->pIndex->rc = SQLITE_NOMEM;
|
|
return;
|
|
}
|
|
pT->aMap = aNew;
|
|
pT->nMapAlloc = nNew;
|
|
}
|
|
|
|
pIter->poslist.n = 0;
|
|
|
|
while( 1 ){
|
|
i64 iMinPos = LARGEST_INT64;
|
|
|
|
/* Find smallest position */
|
|
iMin = 0;
|
|
for(ii=0; ii<nReader; ii++){
|
|
Fts5PoslistReader *pReader = &pT->aPoslistReader[ii];
|
|
if( pReader->bEof==0 ){
|
|
if( pReader->iPos<iMinPos ){
|
|
iMinPos = pReader->iPos;
|
|
iMin = ii;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* If all readers were at EOF, break out of the loop. */
|
|
if( iMinPos==LARGEST_INT64 ) break;
|
|
|
|
sqlite3Fts5PoslistSafeAppend(&pIter->poslist, &iPrev, iMinPos);
|
|
sqlite3Fts5PoslistReaderNext(&pT->aPoslistReader[iMin]);
|
|
|
|
if( eDetail==FTS5_DETAIL_FULL ){
|
|
pT->aMap[pT->nMap].iPos = iMinPos;
|
|
pT->aMap[pT->nMap].iIter = pT->aPoslistToIter[iMin];
|
|
pT->aMap[pT->nMap].iRowid = iRowid;
|
|
pT->nMap++;
|
|
}
|
|
}
|
|
|
|
pIter->base.pData = pIter->poslist.p;
|
|
pIter->base.nData = pIter->poslist.n;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** The iterator passed as the only argument must be a tokendata=1 iterator
|
|
** (pIter->pTokenDataIter!=0). This function advances the iterator. If
|
|
** argument bFrom is false, then the iterator is advanced to the next
|
|
** entry. Or, if bFrom is true, it is advanced to the first entry with
|
|
** a rowid of iFrom or greater.
|
|
*/
|
|
static void fts5TokendataIterNext(Fts5Iter *pIter, int bFrom, i64 iFrom){
|
|
int ii;
|
|
Fts5TokenDataIter *pT = pIter->pTokenDataIter;
|
|
|
|
for(ii=0; ii<pT->nIter; ii++){
|
|
Fts5Iter *p = pT->apIter[ii];
|
|
if( p->base.bEof==0
|
|
&& (p->base.iRowid==pIter->base.iRowid || (bFrom && p->base.iRowid<iFrom))
|
|
){
|
|
fts5MultiIterNext(p->pIndex, p, bFrom, iFrom);
|
|
while( bFrom && p->base.bEof==0
|
|
&& p->base.iRowid<iFrom
|
|
&& p->pIndex->rc==SQLITE_OK
|
|
){
|
|
fts5MultiIterNext(p->pIndex, p, 0, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
fts5IterSetOutputsTokendata(pIter);
|
|
}
|
|
|
|
/*
|
|
** If the segment-iterator passed as the first argument is at EOF, then
|
|
** set pIter->term to a copy of buffer pTerm.
|
|
*/
|
|
static void fts5TokendataSetTermIfEof(Fts5Iter *pIter, Fts5Buffer *pTerm){
|
|
if( pIter && pIter->aSeg[0].pLeaf==0 ){
|
|
fts5BufferSet(&pIter->pIndex->rc, &pIter->aSeg[0].term, pTerm->n, pTerm->p);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** This function sets up an iterator to use for a non-prefix query on a
|
|
** tokendata=1 table.
|
|
*/
|
|
static Fts5Iter *fts5SetupTokendataIter(
|
|
Fts5Index *p, /* FTS index to query */
|
|
const u8 *pToken, /* Buffer containing query term */
|
|
int nToken, /* Size of buffer pToken in bytes */
|
|
Fts5Colset *pColset /* Colset to filter on */
|
|
){
|
|
Fts5Iter *pRet = 0;
|
|
Fts5TokenDataIter *pSet = 0;
|
|
Fts5Structure *pStruct = 0;
|
|
const int flags = FTS5INDEX_QUERY_SCANONETERM | FTS5INDEX_QUERY_SCAN;
|
|
|
|
Fts5Buffer bSeek = {0, 0, 0};
|
|
Fts5Buffer *pSmall = 0;
|
|
|
|
fts5IndexFlush(p);
|
|
pStruct = fts5StructureRead(p);
|
|
|
|
while( p->rc==SQLITE_OK ){
|
|
Fts5Iter *pPrev = pSet ? pSet->apIter[pSet->nIter-1] : 0;
|
|
Fts5Iter *pNew = 0;
|
|
Fts5SegIter *pNewIter = 0;
|
|
Fts5SegIter *pPrevIter = 0;
|
|
|
|
int iLvl, iSeg, ii;
|
|
|
|
pNew = fts5MultiIterAlloc(p, pStruct->nSegment);
|
|
if( pSmall ){
|
|
fts5BufferSet(&p->rc, &bSeek, pSmall->n, pSmall->p);
|
|
fts5BufferAppendBlob(&p->rc, &bSeek, 1, (const u8*)"\0");
|
|
}else{
|
|
fts5BufferSet(&p->rc, &bSeek, nToken, pToken);
|
|
}
|
|
if( p->rc ){
|
|
sqlite3Fts5IterClose((Fts5IndexIter*)pNew);
|
|
break;
|
|
}
|
|
|
|
pNewIter = &pNew->aSeg[0];
|
|
pPrevIter = (pPrev ? &pPrev->aSeg[0] : 0);
|
|
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
|
|
for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){
|
|
Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
|
|
int bDone = 0;
|
|
|
|
if( pPrevIter ){
|
|
if( fts5BufferCompare(pSmall, &pPrevIter->term) ){
|
|
memcpy(pNewIter, pPrevIter, sizeof(Fts5SegIter));
|
|
memset(pPrevIter, 0, sizeof(Fts5SegIter));
|
|
bDone = 1;
|
|
}else if( pPrevIter->iEndofDoclist>pPrevIter->pLeaf->szLeaf ){
|
|
fts5SegIterNextInit(p,(const char*)bSeek.p,bSeek.n-1,pSeg,pNewIter);
|
|
bDone = 1;
|
|
}
|
|
}
|
|
|
|
if( bDone==0 ){
|
|
fts5SegIterSeekInit(p, bSeek.p, bSeek.n, flags, pSeg, pNewIter);
|
|
}
|
|
|
|
if( pPrevIter ){
|
|
if( pPrevIter->pTombArray ){
|
|
pNewIter->pTombArray = pPrevIter->pTombArray;
|
|
pNewIter->pTombArray->nRef++;
|
|
}
|
|
}else{
|
|
fts5SegIterAllocTombstone(p, pNewIter);
|
|
}
|
|
|
|
pNewIter++;
|
|
if( pPrevIter ) pPrevIter++;
|
|
if( p->rc ) break;
|
|
}
|
|
}
|
|
fts5TokendataSetTermIfEof(pPrev, pSmall);
|
|
|
|
pNew->bSkipEmpty = 1;
|
|
pNew->pColset = pColset;
|
|
fts5IterSetOutputCb(&p->rc, pNew);
|
|
|
|
/* Loop through all segments in the new iterator. Find the smallest
|
|
** term that any segment-iterator points to. Iterator pNew will be
|
|
** used for this term. Also, set any iterator that points to a term that
|
|
** does not match pToken/nToken to point to EOF */
|
|
pSmall = 0;
|
|
for(ii=0; ii<pNew->nSeg; ii++){
|
|
Fts5SegIter *pII = &pNew->aSeg[ii];
|
|
if( 0==fts5IsTokendataPrefix(&pII->term, pToken, nToken) ){
|
|
fts5SegIterSetEOF(pII);
|
|
}
|
|
if( pII->pLeaf && (!pSmall || fts5BufferCompare(pSmall, &pII->term)>0) ){
|
|
pSmall = &pII->term;
|
|
}
|
|
}
|
|
|
|
/* If pSmall is still NULL at this point, then the new iterator does
|
|
** not point to any terms that match the query. So delete it and break
|
|
** out of the loop - all required iterators have been collected. */
|
|
if( pSmall==0 ){
|
|
sqlite3Fts5IterClose((Fts5IndexIter*)pNew);
|
|
break;
|
|
}
|
|
|
|
/* Append this iterator to the set and continue. */
|
|
pSet = fts5AppendTokendataIter(p, pSet, pNew);
|
|
}
|
|
|
|
if( p->rc==SQLITE_OK && pSet ){
|
|
int ii;
|
|
for(ii=0; ii<pSet->nIter; ii++){
|
|
Fts5Iter *pIter = pSet->apIter[ii];
|
|
int iSeg;
|
|
for(iSeg=0; iSeg<pIter->nSeg; iSeg++){
|
|
pIter->aSeg[iSeg].flags |= FTS5_SEGITER_ONETERM;
|
|
}
|
|
fts5MultiIterFinishSetup(p, pIter);
|
|
}
|
|
}
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
pRet = fts5MultiIterAlloc(p, 0);
|
|
}
|
|
if( pRet ){
|
|
pRet->pTokenDataIter = pSet;
|
|
if( pSet ){
|
|
fts5IterSetOutputsTokendata(pRet);
|
|
}else{
|
|
pRet->base.bEof = 1;
|
|
}
|
|
}else{
|
|
fts5TokendataIterDelete(pSet);
|
|
}
|
|
|
|
fts5StructureRelease(pStruct);
|
|
fts5BufferFree(&bSeek);
|
|
return pRet;
|
|
}
|
|
|
|
|
|
/*
|
|
** Open a new iterator to iterate though all rowid that match the
|
|
** specified token or token prefix.
|
|
*/
|
|
int sqlite3Fts5IndexQuery(
|
|
Fts5Index *p, /* FTS index to query */
|
|
const char *pToken, int nToken, /* Token (or prefix) to query for */
|
|
int flags, /* Mask of FTS5INDEX_QUERY_X flags */
|
|
Fts5Colset *pColset, /* Match these columns only */
|
|
Fts5IndexIter **ppIter /* OUT: New iterator object */
|
|
){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
Fts5Iter *pRet = 0;
|
|
Fts5Buffer buf = {0, 0, 0};
|
|
|
|
/* If the QUERY_SCAN flag is set, all other flags must be clear. */
|
|
assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
|
|
|
|
if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
|
|
int iIdx = 0; /* Index to search */
|
|
int iPrefixIdx = 0; /* +1 prefix index */
|
|
int bTokendata = pConfig->bTokendata;
|
|
if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken);
|
|
|
|
if( flags & (FTS5INDEX_QUERY_NOTOKENDATA|FTS5INDEX_QUERY_SCAN) ){
|
|
bTokendata = 0;
|
|
}
|
|
|
|
/* Figure out which index to search and set iIdx accordingly. If this
|
|
** is a prefix query for which there is no prefix index, set iIdx to
|
|
** greater than pConfig->nPrefix to indicate that the query will be
|
|
** satisfied by scanning multiple terms in the main index.
|
|
**
|
|
** If the QUERY_TEST_NOIDX flag was specified, then this must be a
|
|
** prefix-query. Instead of using a prefix-index (if one exists),
|
|
** evaluate the prefix query using the main FTS index. This is used
|
|
** for internal sanity checking by the integrity-check in debug
|
|
** mode only. */
|
|
#ifdef SQLITE_DEBUG
|
|
if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
|
|
assert( flags & FTS5INDEX_QUERY_PREFIX );
|
|
iIdx = 1+pConfig->nPrefix;
|
|
}else
|
|
#endif
|
|
if( flags & FTS5INDEX_QUERY_PREFIX ){
|
|
int nChar = fts5IndexCharlen(pToken, nToken);
|
|
for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
|
|
int nIdxChar = pConfig->aPrefix[iIdx-1];
|
|
if( nIdxChar==nChar ) break;
|
|
if( nIdxChar==nChar+1 ) iPrefixIdx = iIdx;
|
|
}
|
|
}
|
|
|
|
if( bTokendata && iIdx==0 ){
|
|
buf.p[0] = '0';
|
|
pRet = fts5SetupTokendataIter(p, buf.p, nToken+1, pColset);
|
|
}else if( iIdx<=pConfig->nPrefix ){
|
|
/* Straight index lookup */
|
|
Fts5Structure *pStruct = fts5StructureRead(p);
|
|
buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
|
|
if( pStruct ){
|
|
fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
|
|
pColset, buf.p, nToken+1, -1, 0, &pRet
|
|
);
|
|
fts5StructureRelease(pStruct);
|
|
}
|
|
}else{
|
|
/* Scan multiple terms in the main index */
|
|
int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
|
|
fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet);
|
|
if( pRet==0 ){
|
|
assert( p->rc!=SQLITE_OK );
|
|
}else{
|
|
assert( pRet->pColset==0 );
|
|
fts5IterSetOutputCb(&p->rc, pRet);
|
|
if( p->rc==SQLITE_OK ){
|
|
Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
|
|
if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
|
|
}
|
|
}
|
|
}
|
|
|
|
if( p->rc ){
|
|
sqlite3Fts5IterClose((Fts5IndexIter*)pRet);
|
|
pRet = 0;
|
|
sqlite3Fts5IndexCloseReader(p);
|
|
}
|
|
|
|
*ppIter = (Fts5IndexIter*)pRet;
|
|
sqlite3Fts5BufferFree(&buf);
|
|
}
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*
|
|
** Return true if the iterator passed as the only argument is at EOF.
|
|
*/
|
|
/*
|
|
** Move to the next matching rowid.
|
|
*/
|
|
int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
|
|
Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
|
|
assert( pIter->pIndex->rc==SQLITE_OK );
|
|
if( pIter->pTokenDataIter ){
|
|
fts5TokendataIterNext(pIter, 0, 0);
|
|
}else{
|
|
fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
|
|
}
|
|
return fts5IndexReturn(pIter->pIndex);
|
|
}
|
|
|
|
/*
|
|
** Move to the next matching term/rowid. Used by the fts5vocab module.
|
|
*/
|
|
int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
|
|
Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
|
|
Fts5Index *p = pIter->pIndex;
|
|
|
|
assert( pIter->pIndex->rc==SQLITE_OK );
|
|
|
|
fts5MultiIterNext(p, pIter, 0, 0);
|
|
if( p->rc==SQLITE_OK ){
|
|
Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
|
|
if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
|
|
fts5DataRelease(pSeg->pLeaf);
|
|
pSeg->pLeaf = 0;
|
|
pIter->base.bEof = 1;
|
|
}
|
|
}
|
|
|
|
return fts5IndexReturn(pIter->pIndex);
|
|
}
|
|
|
|
/*
|
|
** Move to the next matching rowid that occurs at or after iMatch. The
|
|
** definition of "at or after" depends on whether this iterator iterates
|
|
** in ascending or descending rowid order.
|
|
*/
|
|
int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
|
|
Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
|
|
if( pIter->pTokenDataIter ){
|
|
fts5TokendataIterNext(pIter, 1, iMatch);
|
|
}else{
|
|
fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
|
|
}
|
|
return fts5IndexReturn(pIter->pIndex);
|
|
}
|
|
|
|
/*
|
|
** Return the current term.
|
|
*/
|
|
const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
|
|
int n;
|
|
const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
|
|
assert_nc( z || n<=1 );
|
|
*pn = n-1;
|
|
return (z ? &z[1] : 0);
|
|
}
|
|
|
|
/*
|
|
** This is used by xInstToken() to access the token at offset iOff, column
|
|
** iCol of row iRowid. The token is returned via output variables *ppOut
|
|
** and *pnOut. The iterator passed as the first argument must be a tokendata=1
|
|
** iterator (pIter->pTokenDataIter!=0).
|
|
*/
|
|
int sqlite3Fts5IterToken(
|
|
Fts5IndexIter *pIndexIter,
|
|
i64 iRowid,
|
|
int iCol,
|
|
int iOff,
|
|
const char **ppOut, int *pnOut
|
|
){
|
|
Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
|
|
Fts5TokenDataIter *pT = pIter->pTokenDataIter;
|
|
Fts5TokenDataMap *aMap = pT->aMap;
|
|
i64 iPos = (((i64)iCol)<<32) + iOff;
|
|
|
|
int i1 = 0;
|
|
int i2 = pT->nMap;
|
|
int iTest = 0;
|
|
|
|
while( i2>i1 ){
|
|
iTest = (i1 + i2) / 2;
|
|
|
|
if( aMap[iTest].iRowid<iRowid ){
|
|
i1 = iTest+1;
|
|
}else if( aMap[iTest].iRowid>iRowid ){
|
|
i2 = iTest;
|
|
}else{
|
|
if( aMap[iTest].iPos<iPos ){
|
|
if( aMap[iTest].iPos<0 ){
|
|
break;
|
|
}
|
|
i1 = iTest+1;
|
|
}else if( aMap[iTest].iPos>iPos ){
|
|
i2 = iTest;
|
|
}else{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if( i2>i1 ){
|
|
Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter];
|
|
*ppOut = (const char*)pMap->aSeg[0].term.p+1;
|
|
*pnOut = pMap->aSeg[0].term.n-1;
|
|
}
|
|
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Clear any existing entries from the token-map associated with the
|
|
** iterator passed as the only argument.
|
|
*/
|
|
void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){
|
|
Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
|
|
if( pIter && pIter->pTokenDataIter ){
|
|
pIter->pTokenDataIter->nMap = 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Set a token-mapping for the iterator passed as the first argument. This
|
|
** is used in detail=column or detail=none mode when a token is requested
|
|
** using the xInstToken() API. In this case the caller tokenizers the
|
|
** current row and configures the token-mapping via multiple calls to this
|
|
** function.
|
|
*/
|
|
int sqlite3Fts5IndexIterWriteTokendata(
|
|
Fts5IndexIter *pIndexIter,
|
|
const char *pToken, int nToken,
|
|
i64 iRowid, int iCol, int iOff
|
|
){
|
|
Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
|
|
Fts5TokenDataIter *pT = pIter->pTokenDataIter;
|
|
Fts5Index *p = pIter->pIndex;
|
|
int ii;
|
|
|
|
assert( p->pConfig->eDetail!=FTS5_DETAIL_FULL );
|
|
assert( pIter->pTokenDataIter );
|
|
|
|
for(ii=0; ii<pT->nIter; ii++){
|
|
Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term;
|
|
if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break;
|
|
}
|
|
if( ii<pT->nIter ){
|
|
fts5TokendataIterAppendMap(p, pT, ii, iRowid, (((i64)iCol)<<32) + iOff);
|
|
}
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*
|
|
** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
|
|
*/
|
|
void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
|
|
if( pIndexIter ){
|
|
Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
|
|
Fts5Index *pIndex = pIter->pIndex;
|
|
fts5TokendataIterDelete(pIter->pTokenDataIter);
|
|
fts5MultiIterFree(pIter);
|
|
sqlite3Fts5IndexCloseReader(pIndex);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Read and decode the "averages" record from the database.
|
|
**
|
|
** Parameter anSize must point to an array of size nCol, where nCol is
|
|
** the number of user defined columns in the FTS table.
|
|
*/
|
|
int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
|
|
int nCol = p->pConfig->nCol;
|
|
Fts5Data *pData;
|
|
|
|
*pnRow = 0;
|
|
memset(anSize, 0, sizeof(i64) * nCol);
|
|
pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
|
|
if( p->rc==SQLITE_OK && pData->nn ){
|
|
int i = 0;
|
|
int iCol;
|
|
i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
|
|
for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
|
|
i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
|
|
}
|
|
}
|
|
|
|
fts5DataRelease(pData);
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*
|
|
** Replace the current "averages" record with the contents of the buffer
|
|
** supplied as the second argument.
|
|
*/
|
|
int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
|
|
assert( p->rc==SQLITE_OK );
|
|
fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*
|
|
** Return the total number of blocks this module has read from the %_data
|
|
** table since it was created.
|
|
*/
|
|
int sqlite3Fts5IndexReads(Fts5Index *p){
|
|
return p->nRead;
|
|
}
|
|
|
|
/*
|
|
** Set the 32-bit cookie value stored at the start of all structure
|
|
** records to the value passed as the second argument.
|
|
**
|
|
** Return SQLITE_OK if successful, or an SQLite error code if an error
|
|
** occurs.
|
|
*/
|
|
int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
|
|
int rc; /* Return code */
|
|
Fts5Config *pConfig = p->pConfig; /* Configuration object */
|
|
u8 aCookie[4]; /* Binary representation of iNew */
|
|
sqlite3_blob *pBlob = 0;
|
|
|
|
assert( p->rc==SQLITE_OK );
|
|
sqlite3Fts5Put32(aCookie, iNew);
|
|
|
|
rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
|
|
"block", FTS5_STRUCTURE_ROWID, 1, &pBlob
|
|
);
|
|
if( rc==SQLITE_OK ){
|
|
sqlite3_blob_write(pBlob, aCookie, 4, 0);
|
|
rc = sqlite3_blob_close(pBlob);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
|
|
Fts5Structure *pStruct;
|
|
pStruct = fts5StructureRead(p);
|
|
fts5StructureRelease(pStruct);
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*
|
|
** Retrieve the origin value that will be used for the segment currently
|
|
** being accumulated in the in-memory hash table when it is flushed to
|
|
** disk. If successful, SQLITE_OK is returned and (*piOrigin) set to
|
|
** the queried value. Or, if an error occurs, an error code is returned
|
|
** and the final value of (*piOrigin) is undefined.
|
|
*/
|
|
int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin){
|
|
Fts5Structure *pStruct;
|
|
pStruct = fts5StructureRead(p);
|
|
if( pStruct ){
|
|
*piOrigin = pStruct->nOriginCntr;
|
|
fts5StructureRelease(pStruct);
|
|
}
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*
|
|
** Buffer pPg contains a page of a tombstone hash table - one of nPg pages
|
|
** associated with the same segment. This function adds rowid iRowid to
|
|
** the hash table. The caller is required to guarantee that there is at
|
|
** least one free slot on the page.
|
|
**
|
|
** If parameter bForce is false and the hash table is deemed to be full
|
|
** (more than half of the slots are occupied), then non-zero is returned
|
|
** and iRowid not inserted. Or, if bForce is true or if the hash table page
|
|
** is not full, iRowid is inserted and zero returned.
|
|
*/
|
|
static int fts5IndexTombstoneAddToPage(
|
|
Fts5Data *pPg,
|
|
int bForce,
|
|
int nPg,
|
|
u64 iRowid
|
|
){
|
|
const int szKey = TOMBSTONE_KEYSIZE(pPg);
|
|
const int nSlot = TOMBSTONE_NSLOT(pPg);
|
|
const int nElem = fts5GetU32(&pPg->p[4]);
|
|
int iSlot = (iRowid / nPg) % nSlot;
|
|
int nCollide = nSlot;
|
|
|
|
if( szKey==4 && iRowid>0xFFFFFFFF ) return 2;
|
|
if( iRowid==0 ){
|
|
pPg->p[1] = 0x01;
|
|
return 0;
|
|
}
|
|
|
|
if( bForce==0 && nElem>=(nSlot/2) ){
|
|
return 1;
|
|
}
|
|
|
|
fts5PutU32(&pPg->p[4], nElem+1);
|
|
if( szKey==4 ){
|
|
u32 *aSlot = (u32*)&pPg->p[8];
|
|
while( aSlot[iSlot] ){
|
|
iSlot = (iSlot + 1) % nSlot;
|
|
if( nCollide--==0 ) return 0;
|
|
}
|
|
fts5PutU32((u8*)&aSlot[iSlot], (u32)iRowid);
|
|
}else{
|
|
u64 *aSlot = (u64*)&pPg->p[8];
|
|
while( aSlot[iSlot] ){
|
|
iSlot = (iSlot + 1) % nSlot;
|
|
if( nCollide--==0 ) return 0;
|
|
}
|
|
fts5PutU64((u8*)&aSlot[iSlot], iRowid);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
** This function attempts to build a new hash containing all the keys
|
|
** currently in the tombstone hash table for segment pSeg. The new
|
|
** hash will be stored in the nOut buffers passed in array apOut[].
|
|
** All pages of the new hash use key-size szKey (4 or 8).
|
|
**
|
|
** Return 0 if the hash is successfully rebuilt into the nOut pages.
|
|
** Or non-zero if it is not (because one page became overfull). In this
|
|
** case the caller should retry with a larger nOut parameter.
|
|
**
|
|
** Parameter pData1 is page iPg1 of the hash table being rebuilt.
|
|
*/
|
|
static int fts5IndexTombstoneRehash(
|
|
Fts5Index *p,
|
|
Fts5StructureSegment *pSeg, /* Segment to rebuild hash of */
|
|
Fts5Data *pData1, /* One page of current hash - or NULL */
|
|
int iPg1, /* Which page of the current hash is pData1 */
|
|
int szKey, /* 4 or 8, the keysize */
|
|
int nOut, /* Number of output pages */
|
|
Fts5Data **apOut /* Array of output hash pages */
|
|
){
|
|
int ii;
|
|
int res = 0;
|
|
|
|
/* Initialize the headers of all the output pages */
|
|
for(ii=0; ii<nOut; ii++){
|
|
apOut[ii]->p[0] = szKey;
|
|
fts5PutU32(&apOut[ii]->p[4], 0);
|
|
}
|
|
|
|
/* Loop through the current pages of the hash table. */
|
|
for(ii=0; res==0 && ii<pSeg->nPgTombstone; ii++){
|
|
Fts5Data *pData = 0; /* Page ii of the current hash table */
|
|
Fts5Data *pFree = 0; /* Free this at the end of the loop */
|
|
|
|
if( iPg1==ii ){
|
|
pData = pData1;
|
|
}else{
|
|
pFree = pData = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii));
|
|
}
|
|
|
|
if( pData ){
|
|
int szKeyIn = TOMBSTONE_KEYSIZE(pData);
|
|
int nSlotIn = (pData->nn - 8) / szKeyIn;
|
|
int iIn;
|
|
for(iIn=0; iIn<nSlotIn; iIn++){
|
|
u64 iVal = 0;
|
|
|
|
/* Read the value from slot iIn of the input page into iVal. */
|
|
if( szKeyIn==4 ){
|
|
u32 *aSlot = (u32*)&pData->p[8];
|
|
if( aSlot[iIn] ) iVal = fts5GetU32((u8*)&aSlot[iIn]);
|
|
}else{
|
|
u64 *aSlot = (u64*)&pData->p[8];
|
|
if( aSlot[iIn] ) iVal = fts5GetU64((u8*)&aSlot[iIn]);
|
|
}
|
|
|
|
/* If iVal is not 0 at this point, insert it into the new hash table */
|
|
if( iVal ){
|
|
Fts5Data *pPg = apOut[(iVal % nOut)];
|
|
res = fts5IndexTombstoneAddToPage(pPg, 0, nOut, iVal);
|
|
if( res ) break;
|
|
}
|
|
}
|
|
|
|
/* If this is page 0 of the old hash, copy the rowid-0-flag from the
|
|
** old hash to the new. */
|
|
if( ii==0 ){
|
|
apOut[0]->p[1] = pData->p[1];
|
|
}
|
|
}
|
|
fts5DataRelease(pFree);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/*
|
|
** This is called to rebuild the hash table belonging to segment pSeg.
|
|
** If parameter pData1 is not NULL, then one page of the existing hash table
|
|
** has already been loaded - pData1, which is page iPg1. The key-size for
|
|
** the new hash table is szKey (4 or 8).
|
|
**
|
|
** If successful, the new hash table is not written to disk. Instead,
|
|
** output parameter (*pnOut) is set to the number of pages in the new
|
|
** hash table, and (*papOut) to point to an array of buffers containing
|
|
** the new page data.
|
|
**
|
|
** If an error occurs, an error code is left in the Fts5Index object and
|
|
** both output parameters set to 0 before returning.
|
|
*/
|
|
static void fts5IndexTombstoneRebuild(
|
|
Fts5Index *p,
|
|
Fts5StructureSegment *pSeg, /* Segment to rebuild hash of */
|
|
Fts5Data *pData1, /* One page of current hash - or NULL */
|
|
int iPg1, /* Which page of the current hash is pData1 */
|
|
int szKey, /* 4 or 8, the keysize */
|
|
int *pnOut, /* OUT: Number of output pages */
|
|
Fts5Data ***papOut /* OUT: Output hash pages */
|
|
){
|
|
const int MINSLOT = 32;
|
|
int nSlotPerPage = MAX(MINSLOT, (p->pConfig->pgsz - 8) / szKey);
|
|
int nSlot = 0; /* Number of slots in each output page */
|
|
int nOut = 0;
|
|
|
|
/* Figure out how many output pages (nOut) and how many slots per
|
|
** page (nSlot). There are three possibilities:
|
|
**
|
|
** 1. The hash table does not yet exist. In this case the new hash
|
|
** table will consist of a single page with MINSLOT slots.
|
|
**
|
|
** 2. The hash table exists but is currently a single page. In this
|
|
** case an attempt is made to grow the page to accommodate the new
|
|
** entry. The page is allowed to grow up to nSlotPerPage (see above)
|
|
** slots.
|
|
**
|
|
** 3. The hash table already consists of more than one page, or of
|
|
** a single page already so large that it cannot be grown. In this
|
|
** case the new hash consists of (nPg*2+1) pages of nSlotPerPage
|
|
** slots each, where nPg is the current number of pages in the
|
|
** hash table.
|
|
*/
|
|
if( pSeg->nPgTombstone==0 ){
|
|
/* Case 1. */
|
|
nOut = 1;
|
|
nSlot = MINSLOT;
|
|
}else if( pSeg->nPgTombstone==1 ){
|
|
/* Case 2. */
|
|
int nElem = (int)fts5GetU32(&pData1->p[4]);
|
|
assert( pData1 && iPg1==0 );
|
|
nOut = 1;
|
|
nSlot = MAX(nElem*4, MINSLOT);
|
|
if( nSlot>nSlotPerPage ) nOut = 0;
|
|
}
|
|
if( nOut==0 ){
|
|
/* Case 3. */
|
|
nOut = (pSeg->nPgTombstone * 2 + 1);
|
|
nSlot = nSlotPerPage;
|
|
}
|
|
|
|
/* Allocate the required array and output pages */
|
|
while( 1 ){
|
|
int res = 0;
|
|
int ii = 0;
|
|
int szPage = 0;
|
|
Fts5Data **apOut = 0;
|
|
|
|
/* Allocate space for the new hash table */
|
|
assert( nSlot>=MINSLOT );
|
|
apOut = (Fts5Data**)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5Data*) * nOut);
|
|
szPage = 8 + nSlot*szKey;
|
|
for(ii=0; ii<nOut; ii++){
|
|
Fts5Data *pNew = (Fts5Data*)sqlite3Fts5MallocZero(&p->rc,
|
|
sizeof(Fts5Data)+szPage
|
|
);
|
|
if( pNew ){
|
|
pNew->nn = szPage;
|
|
pNew->p = (u8*)&pNew[1];
|
|
apOut[ii] = pNew;
|
|
}
|
|
}
|
|
|
|
/* Rebuild the hash table. */
|
|
if( p->rc==SQLITE_OK ){
|
|
res = fts5IndexTombstoneRehash(p, pSeg, pData1, iPg1, szKey, nOut, apOut);
|
|
}
|
|
if( res==0 ){
|
|
if( p->rc ){
|
|
fts5IndexFreeArray(apOut, nOut);
|
|
apOut = 0;
|
|
nOut = 0;
|
|
}
|
|
*pnOut = nOut;
|
|
*papOut = apOut;
|
|
break;
|
|
}
|
|
|
|
/* If control flows to here, it was not possible to rebuild the hash
|
|
** table. Free all buffers and then try again with more pages. */
|
|
assert( p->rc==SQLITE_OK );
|
|
fts5IndexFreeArray(apOut, nOut);
|
|
nSlot = nSlotPerPage;
|
|
nOut = nOut*2 + 1;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
** Add a tombstone for rowid iRowid to segment pSeg.
|
|
*/
|
|
static void fts5IndexTombstoneAdd(
|
|
Fts5Index *p,
|
|
Fts5StructureSegment *pSeg,
|
|
u64 iRowid
|
|
){
|
|
Fts5Data *pPg = 0;
|
|
int iPg = -1;
|
|
int szKey = 0;
|
|
int nHash = 0;
|
|
Fts5Data **apHash = 0;
|
|
|
|
p->nContentlessDelete++;
|
|
|
|
if( pSeg->nPgTombstone>0 ){
|
|
iPg = iRowid % pSeg->nPgTombstone;
|
|
pPg = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg));
|
|
if( pPg==0 ){
|
|
assert( p->rc!=SQLITE_OK );
|
|
return;
|
|
}
|
|
|
|
if( 0==fts5IndexTombstoneAddToPage(pPg, 0, pSeg->nPgTombstone, iRowid) ){
|
|
fts5DataWrite(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg), pPg->p, pPg->nn);
|
|
fts5DataRelease(pPg);
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* Have to rebuild the hash table. First figure out the key-size (4 or 8). */
|
|
szKey = pPg ? TOMBSTONE_KEYSIZE(pPg) : 4;
|
|
if( iRowid>0xFFFFFFFF ) szKey = 8;
|
|
|
|
/* Rebuild the hash table */
|
|
fts5IndexTombstoneRebuild(p, pSeg, pPg, iPg, szKey, &nHash, &apHash);
|
|
assert( p->rc==SQLITE_OK || (nHash==0 && apHash==0) );
|
|
|
|
/* If all has succeeded, write the new rowid into one of the new hash
|
|
** table pages, then write them all out to disk. */
|
|
if( nHash ){
|
|
int ii = 0;
|
|
fts5IndexTombstoneAddToPage(apHash[iRowid % nHash], 1, nHash, iRowid);
|
|
for(ii=0; ii<nHash; ii++){
|
|
i64 iTombstoneRowid = FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii);
|
|
fts5DataWrite(p, iTombstoneRowid, apHash[ii]->p, apHash[ii]->nn);
|
|
}
|
|
pSeg->nPgTombstone = nHash;
|
|
fts5StructureWrite(p, p->pStruct);
|
|
}
|
|
|
|
fts5DataRelease(pPg);
|
|
fts5IndexFreeArray(apHash, nHash);
|
|
}
|
|
|
|
/*
|
|
** Add iRowid to the tombstone list of the segment or segments that contain
|
|
** rows from origin iOrigin. Return SQLITE_OK if successful, or an SQLite
|
|
** error code otherwise.
|
|
*/
|
|
int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid){
|
|
Fts5Structure *pStruct;
|
|
pStruct = fts5StructureRead(p);
|
|
if( pStruct ){
|
|
int bFound = 0; /* True after pSeg->nEntryTombstone incr. */
|
|
int iLvl;
|
|
for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
|
|
int iSeg;
|
|
for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){
|
|
Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
|
|
if( pSeg->iOrigin1<=(u64)iOrigin && pSeg->iOrigin2>=(u64)iOrigin ){
|
|
if( bFound==0 ){
|
|
pSeg->nEntryTombstone++;
|
|
bFound = 1;
|
|
}
|
|
fts5IndexTombstoneAdd(p, pSeg, iRowid);
|
|
}
|
|
}
|
|
}
|
|
fts5StructureRelease(pStruct);
|
|
}
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*************************************************************************
|
|
**************************************************************************
|
|
** Below this point is the implementation of the integrity-check
|
|
** functionality.
|
|
*/
|
|
|
|
/*
|
|
** Return a simple checksum value based on the arguments.
|
|
*/
|
|
u64 sqlite3Fts5IndexEntryCksum(
|
|
i64 iRowid,
|
|
int iCol,
|
|
int iPos,
|
|
int iIdx,
|
|
const char *pTerm,
|
|
int nTerm
|
|
){
|
|
int i;
|
|
u64 ret = iRowid;
|
|
ret += (ret<<3) + iCol;
|
|
ret += (ret<<3) + iPos;
|
|
if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
|
|
for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
|
|
return ret;
|
|
}
|
|
|
|
#ifdef SQLITE_DEBUG
|
|
/*
|
|
** This function is purely an internal test. It does not contribute to
|
|
** FTS functionality, or even the integrity-check, in any way.
|
|
**
|
|
** Instead, it tests that the same set of pgno/rowid combinations are
|
|
** visited regardless of whether the doclist-index identified by parameters
|
|
** iSegid/iLeaf is iterated in forwards or reverse order.
|
|
*/
|
|
static void fts5TestDlidxReverse(
|
|
Fts5Index *p,
|
|
int iSegid, /* Segment id to load from */
|
|
int iLeaf /* Load doclist-index for this leaf */
|
|
){
|
|
Fts5DlidxIter *pDlidx = 0;
|
|
u64 cksum1 = 13;
|
|
u64 cksum2 = 13;
|
|
|
|
for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
|
|
fts5DlidxIterEof(p, pDlidx)==0;
|
|
fts5DlidxIterNext(p, pDlidx)
|
|
){
|
|
i64 iRowid = fts5DlidxIterRowid(pDlidx);
|
|
int pgno = fts5DlidxIterPgno(pDlidx);
|
|
assert( pgno>iLeaf );
|
|
cksum1 += iRowid + ((i64)pgno<<32);
|
|
}
|
|
fts5DlidxIterFree(pDlidx);
|
|
pDlidx = 0;
|
|
|
|
for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
|
|
fts5DlidxIterEof(p, pDlidx)==0;
|
|
fts5DlidxIterPrev(p, pDlidx)
|
|
){
|
|
i64 iRowid = fts5DlidxIterRowid(pDlidx);
|
|
int pgno = fts5DlidxIterPgno(pDlidx);
|
|
assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
|
|
cksum2 += iRowid + ((i64)pgno<<32);
|
|
}
|
|
fts5DlidxIterFree(pDlidx);
|
|
pDlidx = 0;
|
|
|
|
if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
|
|
}
|
|
|
|
static int fts5QueryCksum(
|
|
Fts5Index *p, /* Fts5 index object */
|
|
int iIdx,
|
|
const char *z, /* Index key to query for */
|
|
int n, /* Size of index key in bytes */
|
|
int flags, /* Flags for Fts5IndexQuery */
|
|
u64 *pCksum /* IN/OUT: Checksum value */
|
|
){
|
|
int eDetail = p->pConfig->eDetail;
|
|
u64 cksum = *pCksum;
|
|
Fts5IndexIter *pIter = 0;
|
|
int rc = sqlite3Fts5IndexQuery(
|
|
p, z, n, (flags | FTS5INDEX_QUERY_NOTOKENDATA), 0, &pIter
|
|
);
|
|
|
|
while( rc==SQLITE_OK && ALWAYS(pIter!=0) && 0==sqlite3Fts5IterEof(pIter) ){
|
|
i64 rowid = pIter->iRowid;
|
|
|
|
if( eDetail==FTS5_DETAIL_NONE ){
|
|
cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
|
|
}else{
|
|
Fts5PoslistReader sReader;
|
|
for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
|
|
sReader.bEof==0;
|
|
sqlite3Fts5PoslistReaderNext(&sReader)
|
|
){
|
|
int iCol = FTS5_POS2COLUMN(sReader.iPos);
|
|
int iOff = FTS5_POS2OFFSET(sReader.iPos);
|
|
cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
|
|
}
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3Fts5IterNext(pIter);
|
|
}
|
|
}
|
|
sqlite3Fts5IterClose(pIter);
|
|
|
|
*pCksum = cksum;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Check if buffer z[], size n bytes, contains as series of valid utf-8
|
|
** encoded codepoints. If so, return 0. Otherwise, if the buffer does not
|
|
** contain valid utf-8, return non-zero.
|
|
*/
|
|
static int fts5TestUtf8(const char *z, int n){
|
|
int i = 0;
|
|
assert_nc( n>0 );
|
|
while( i<n ){
|
|
if( (z[i] & 0x80)==0x00 ){
|
|
i++;
|
|
}else
|
|
if( (z[i] & 0xE0)==0xC0 ){
|
|
if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1;
|
|
i += 2;
|
|
}else
|
|
if( (z[i] & 0xF0)==0xE0 ){
|
|
if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
|
|
i += 3;
|
|
}else
|
|
if( (z[i] & 0xF8)==0xF0 ){
|
|
if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
|
|
if( (z[i+2] & 0xC0)!=0x80 ) return 1;
|
|
i += 3;
|
|
}else{
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
** This function is also purely an internal test. It does not contribute to
|
|
** FTS functionality, or even the integrity-check, in any way.
|
|
*/
|
|
static void fts5TestTerm(
|
|
Fts5Index *p,
|
|
Fts5Buffer *pPrev, /* Previous term */
|
|
const char *z, int n, /* Possibly new term to test */
|
|
u64 expected,
|
|
u64 *pCksum
|
|
){
|
|
int rc = p->rc;
|
|
if( pPrev->n==0 ){
|
|
fts5BufferSet(&rc, pPrev, n, (const u8*)z);
|
|
}else
|
|
if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
|
|
u64 cksum3 = *pCksum;
|
|
const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */
|
|
int nTerm = pPrev->n-1; /* Size of zTerm in bytes */
|
|
int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
|
|
int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
|
|
u64 ck1 = 0;
|
|
u64 ck2 = 0;
|
|
|
|
/* Check that the results returned for ASC and DESC queries are
|
|
** the same. If not, call this corruption. */
|
|
rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
|
|
if( rc==SQLITE_OK ){
|
|
int f = flags|FTS5INDEX_QUERY_DESC;
|
|
rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
|
|
}
|
|
if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
|
|
|
|
/* If this is a prefix query, check that the results returned if the
|
|
** the index is disabled are the same. In both ASC and DESC order.
|
|
**
|
|
** This check may only be performed if the hash table is empty. This
|
|
** is because the hash table only supports a single scan query at
|
|
** a time, and the multi-iter loop from which this function is called
|
|
** is already performing such a scan.
|
|
**
|
|
** Also only do this if buffer zTerm contains nTerm bytes of valid
|
|
** utf-8. Otherwise, the last part of the buffer contents might contain
|
|
** a non-utf-8 sequence that happens to be a prefix of a valid utf-8
|
|
** character stored in the main fts index, which will cause the
|
|
** test to fail. */
|
|
if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){
|
|
if( iIdx>0 && rc==SQLITE_OK ){
|
|
int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
|
|
ck2 = 0;
|
|
rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
|
|
if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
|
|
}
|
|
if( iIdx>0 && rc==SQLITE_OK ){
|
|
int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
|
|
ck2 = 0;
|
|
rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
|
|
if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
|
|
}
|
|
}
|
|
|
|
cksum3 ^= ck1;
|
|
fts5BufferSet(&rc, pPrev, n, (const u8*)z);
|
|
|
|
if( rc==SQLITE_OK && cksum3!=expected ){
|
|
rc = FTS5_CORRUPT;
|
|
}
|
|
*pCksum = cksum3;
|
|
}
|
|
p->rc = rc;
|
|
}
|
|
|
|
#else
|
|
# define fts5TestDlidxReverse(x,y,z)
|
|
# define fts5TestTerm(u,v,w,x,y,z)
|
|
#endif
|
|
|
|
/*
|
|
** Check that:
|
|
**
|
|
** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
|
|
** contain zero terms.
|
|
** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
|
|
** contain zero rowids.
|
|
*/
|
|
static void fts5IndexIntegrityCheckEmpty(
|
|
Fts5Index *p,
|
|
Fts5StructureSegment *pSeg, /* Segment to check internal consistency */
|
|
int iFirst,
|
|
int iNoRowid,
|
|
int iLast
|
|
){
|
|
int i;
|
|
|
|
/* Now check that the iter.nEmpty leaves following the current leaf
|
|
** (a) exist and (b) contain no terms. */
|
|
for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
|
|
Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
|
|
if( pLeaf ){
|
|
if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
|
|
if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
|
|
}
|
|
fts5DataRelease(pLeaf);
|
|
}
|
|
}
|
|
|
|
static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
|
|
i64 iTermOff = 0;
|
|
int ii;
|
|
|
|
Fts5Buffer buf1 = {0,0,0};
|
|
Fts5Buffer buf2 = {0,0,0};
|
|
|
|
ii = pLeaf->szLeaf;
|
|
while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
|
|
int res;
|
|
i64 iOff;
|
|
int nIncr;
|
|
|
|
ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
|
|
iTermOff += nIncr;
|
|
iOff = iTermOff;
|
|
|
|
if( iOff>=pLeaf->szLeaf ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else if( iTermOff==nIncr ){
|
|
int nByte;
|
|
iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
|
|
if( (iOff+nByte)>pLeaf->szLeaf ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else{
|
|
fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
|
|
}
|
|
}else{
|
|
int nKeep, nByte;
|
|
iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
|
|
iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
|
|
if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else{
|
|
buf1.n = nKeep;
|
|
fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
|
|
}
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
res = fts5BufferCompare(&buf1, &buf2);
|
|
if( res<=0 ) p->rc = FTS5_CORRUPT;
|
|
}
|
|
}
|
|
fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
|
|
}
|
|
|
|
fts5BufferFree(&buf1);
|
|
fts5BufferFree(&buf2);
|
|
}
|
|
|
|
static void fts5IndexIntegrityCheckSegment(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5StructureSegment *pSeg /* Segment to check internal consistency */
|
|
){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
int bSecureDelete = (pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE);
|
|
sqlite3_stmt *pStmt = 0;
|
|
int rc2;
|
|
int iIdxPrevLeaf = pSeg->pgnoFirst-1;
|
|
int iDlidxPrevLeaf = pSeg->pgnoLast;
|
|
|
|
if( pSeg->pgnoFirst==0 ) return;
|
|
|
|
fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
|
|
"SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d "
|
|
"ORDER BY 1, 2",
|
|
pConfig->zDb, pConfig->zName, pSeg->iSegid
|
|
));
|
|
|
|
/* Iterate through the b-tree hierarchy. */
|
|
while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
|
|
i64 iRow; /* Rowid for this leaf */
|
|
Fts5Data *pLeaf; /* Data for this leaf */
|
|
|
|
const char *zIdxTerm = (const char*)sqlite3_column_blob(pStmt, 1);
|
|
int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
|
|
int iIdxLeaf = sqlite3_column_int(pStmt, 2);
|
|
int bIdxDlidx = sqlite3_column_int(pStmt, 3);
|
|
|
|
/* If the leaf in question has already been trimmed from the segment,
|
|
** ignore this b-tree entry. Otherwise, load it into memory. */
|
|
if( iIdxLeaf<pSeg->pgnoFirst ) continue;
|
|
iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
|
|
pLeaf = fts5LeafRead(p, iRow);
|
|
if( pLeaf==0 ) break;
|
|
|
|
/* Check that the leaf contains at least one term, and that it is equal
|
|
** to or larger than the split-key in zIdxTerm. Also check that if there
|
|
** is also a rowid pointer within the leaf page header, it points to a
|
|
** location before the term. */
|
|
if( pLeaf->nn<=pLeaf->szLeaf ){
|
|
|
|
if( nIdxTerm==0
|
|
&& pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE
|
|
&& pLeaf->nn==pLeaf->szLeaf
|
|
&& pLeaf->nn==4
|
|
){
|
|
/* special case - the very first page in a segment keeps its %_idx
|
|
** entry even if all the terms are removed from it by secure-delete
|
|
** operations. */
|
|
}else{
|
|
p->rc = FTS5_CORRUPT;
|
|
}
|
|
|
|
}else{
|
|
int iOff; /* Offset of first term on leaf */
|
|
int iRowidOff; /* Offset of first rowid on leaf */
|
|
int nTerm; /* Size of term on leaf in bytes */
|
|
int res; /* Comparison of term and split-key */
|
|
|
|
iOff = fts5LeafFirstTermOff(pLeaf);
|
|
iRowidOff = fts5LeafFirstRowidOff(pLeaf);
|
|
if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else{
|
|
iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
|
|
res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
|
|
if( res==0 ) res = nTerm - nIdxTerm;
|
|
if( res<0 ) p->rc = FTS5_CORRUPT;
|
|
}
|
|
|
|
fts5IntegrityCheckPgidx(p, pLeaf);
|
|
}
|
|
fts5DataRelease(pLeaf);
|
|
if( p->rc ) break;
|
|
|
|
/* Now check that the iter.nEmpty leaves following the current leaf
|
|
** (a) exist and (b) contain no terms. */
|
|
fts5IndexIntegrityCheckEmpty(
|
|
p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
|
|
);
|
|
if( p->rc ) break;
|
|
|
|
/* If there is a doclist-index, check that it looks right. */
|
|
if( bIdxDlidx ){
|
|
Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */
|
|
int iPrevLeaf = iIdxLeaf;
|
|
int iSegid = pSeg->iSegid;
|
|
int iPg = 0;
|
|
i64 iKey;
|
|
|
|
for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
|
|
fts5DlidxIterEof(p, pDlidx)==0;
|
|
fts5DlidxIterNext(p, pDlidx)
|
|
){
|
|
|
|
/* Check any rowid-less pages that occur before the current leaf. */
|
|
for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
|
|
iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
|
|
pLeaf = fts5DataRead(p, iKey);
|
|
if( pLeaf ){
|
|
if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
|
|
fts5DataRelease(pLeaf);
|
|
}
|
|
}
|
|
iPrevLeaf = fts5DlidxIterPgno(pDlidx);
|
|
|
|
/* Check that the leaf page indicated by the iterator really does
|
|
** contain the rowid suggested by the same. */
|
|
iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
|
|
pLeaf = fts5DataRead(p, iKey);
|
|
if( pLeaf ){
|
|
i64 iRowid;
|
|
int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
|
|
ASSERT_SZLEAF_OK(pLeaf);
|
|
if( iRowidOff>=pLeaf->szLeaf ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else if( bSecureDelete==0 || iRowidOff>0 ){
|
|
i64 iDlRowid = fts5DlidxIterRowid(pDlidx);
|
|
fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
|
|
if( iRowid<iDlRowid || (bSecureDelete==0 && iRowid!=iDlRowid) ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}
|
|
}
|
|
fts5DataRelease(pLeaf);
|
|
}
|
|
}
|
|
|
|
iDlidxPrevLeaf = iPg;
|
|
fts5DlidxIterFree(pDlidx);
|
|
fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
|
|
}else{
|
|
iDlidxPrevLeaf = pSeg->pgnoLast;
|
|
/* TODO: Check there is no doclist index */
|
|
}
|
|
|
|
iIdxPrevLeaf = iIdxLeaf;
|
|
}
|
|
|
|
rc2 = sqlite3_finalize(pStmt);
|
|
if( p->rc==SQLITE_OK ) p->rc = rc2;
|
|
|
|
/* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
|
|
#if 0
|
|
if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
/*
|
|
** Run internal checks to ensure that the FTS index (a) is internally
|
|
** consistent and (b) contains entries for which the XOR of the checksums
|
|
** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
|
|
**
|
|
** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
|
|
** checksum does not match. Return SQLITE_OK if all checks pass without
|
|
** error, or some other SQLite error code if another error (e.g. OOM)
|
|
** occurs.
|
|
*/
|
|
int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum){
|
|
int eDetail = p->pConfig->eDetail;
|
|
u64 cksum2 = 0; /* Checksum based on contents of indexes */
|
|
Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */
|
|
Fts5Iter *pIter; /* Used to iterate through entire index */
|
|
Fts5Structure *pStruct; /* Index structure */
|
|
int iLvl, iSeg;
|
|
|
|
#ifdef SQLITE_DEBUG
|
|
/* Used by extra internal tests only run if NDEBUG is not defined */
|
|
u64 cksum3 = 0; /* Checksum based on contents of indexes */
|
|
Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
|
|
#endif
|
|
const int flags = FTS5INDEX_QUERY_NOOUTPUT;
|
|
|
|
/* Load the FTS index structure */
|
|
pStruct = fts5StructureRead(p);
|
|
if( pStruct==0 ){
|
|
assert( p->rc!=SQLITE_OK );
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/* Check that the internal nodes of each segment match the leaves */
|
|
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
|
|
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
|
|
Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
|
|
fts5IndexIntegrityCheckSegment(p, pSeg);
|
|
}
|
|
}
|
|
|
|
/* The cksum argument passed to this function is a checksum calculated
|
|
** based on all expected entries in the FTS index (including prefix index
|
|
** entries). This block checks that a checksum calculated based on the
|
|
** actual contents of FTS index is identical.
|
|
**
|
|
** Two versions of the same checksum are calculated. The first (stack
|
|
** variable cksum2) based on entries extracted from the full-text index
|
|
** while doing a linear scan of each individual index in turn.
|
|
**
|
|
** As each term visited by the linear scans, a separate query for the
|
|
** same term is performed. cksum3 is calculated based on the entries
|
|
** extracted by these queries.
|
|
*/
|
|
for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
|
|
fts5MultiIterEof(p, pIter)==0;
|
|
fts5MultiIterNext(p, pIter, 0, 0)
|
|
){
|
|
int n; /* Size of term in bytes */
|
|
i64 iPos = 0; /* Position read from poslist */
|
|
int iOff = 0; /* Offset within poslist */
|
|
i64 iRowid = fts5MultiIterRowid(pIter);
|
|
char *z = (char*)fts5MultiIterTerm(pIter, &n);
|
|
|
|
/* If this is a new term, query for it. Update cksum3 with the results. */
|
|
fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
|
|
if( p->rc ) break;
|
|
|
|
if( eDetail==FTS5_DETAIL_NONE ){
|
|
if( 0==fts5MultiIterIsEmpty(p, pIter) ){
|
|
cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
|
|
}
|
|
}else{
|
|
poslist.n = 0;
|
|
fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
|
|
fts5BufferAppendBlob(&p->rc, &poslist, 4, (const u8*)"\0\0\0\0");
|
|
while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
|
|
int iCol = FTS5_POS2COLUMN(iPos);
|
|
int iTokOff = FTS5_POS2OFFSET(iPos);
|
|
cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
|
|
}
|
|
}
|
|
}
|
|
fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
|
|
|
|
fts5MultiIterFree(pIter);
|
|
if( p->rc==SQLITE_OK && bUseCksum && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
|
|
|
|
fts5StructureRelease(pStruct);
|
|
#ifdef SQLITE_DEBUG
|
|
fts5BufferFree(&term);
|
|
#endif
|
|
fts5BufferFree(&poslist);
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*************************************************************************
|
|
**************************************************************************
|
|
** Below this point is the implementation of the fts5_decode() scalar
|
|
** function only.
|
|
*/
|
|
|
|
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
|
|
/*
|
|
** Decode a segment-data rowid from the %_data table. This function is
|
|
** the opposite of macro FTS5_SEGMENT_ROWID().
|
|
*/
|
|
static void fts5DecodeRowid(
|
|
i64 iRowid, /* Rowid from %_data table */
|
|
int *pbTombstone, /* OUT: Tombstone hash flag */
|
|
int *piSegid, /* OUT: Segment id */
|
|
int *pbDlidx, /* OUT: Dlidx flag */
|
|
int *piHeight, /* OUT: Height */
|
|
int *piPgno /* OUT: Page number */
|
|
){
|
|
*piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
|
|
iRowid >>= FTS5_DATA_PAGE_B;
|
|
|
|
*piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
|
|
iRowid >>= FTS5_DATA_HEIGHT_B;
|
|
|
|
*pbDlidx = (int)(iRowid & 0x0001);
|
|
iRowid >>= FTS5_DATA_DLI_B;
|
|
|
|
*piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
|
|
iRowid >>= FTS5_DATA_ID_B;
|
|
|
|
*pbTombstone = (int)(iRowid & 0x0001);
|
|
}
|
|
#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
|
|
|
|
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
|
|
static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
|
|
int iSegid, iHeight, iPgno, bDlidx, bTomb; /* Rowid compenents */
|
|
fts5DecodeRowid(iKey, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno);
|
|
|
|
if( iSegid==0 ){
|
|
if( iKey==FTS5_AVERAGES_ROWID ){
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
|
|
}else{
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
|
|
}
|
|
}
|
|
else{
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%s%ssegid=%d h=%d pgno=%d}",
|
|
bDlidx ? "dlidx " : "",
|
|
bTomb ? "tombstone " : "",
|
|
iSegid, iHeight, iPgno
|
|
);
|
|
}
|
|
}
|
|
#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
|
|
|
|
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
|
|
static void fts5DebugStructure(
|
|
int *pRc, /* IN/OUT: error code */
|
|
Fts5Buffer *pBuf,
|
|
Fts5Structure *p
|
|
){
|
|
int iLvl, iSeg; /* Iterate through levels, segments */
|
|
|
|
for(iLvl=0; iLvl<p->nLevel; iLvl++){
|
|
Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
|
|
" {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
|
|
);
|
|
for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
|
|
Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d",
|
|
pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
|
|
);
|
|
if( pSeg->iOrigin1>0 ){
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " origin=%lld..%lld",
|
|
pSeg->iOrigin1, pSeg->iOrigin2
|
|
);
|
|
}
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
|
|
}
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
|
|
}
|
|
}
|
|
#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
|
|
|
|
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
|
|
/*
|
|
** This is part of the fts5_decode() debugging aid.
|
|
**
|
|
** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
|
|
** function appends a human-readable representation of the same object
|
|
** to the buffer passed as the second argument.
|
|
*/
|
|
static void fts5DecodeStructure(
|
|
int *pRc, /* IN/OUT: error code */
|
|
Fts5Buffer *pBuf,
|
|
const u8 *pBlob, int nBlob
|
|
){
|
|
int rc; /* Return code */
|
|
Fts5Structure *p = 0; /* Decoded structure object */
|
|
|
|
rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
|
|
if( rc!=SQLITE_OK ){
|
|
*pRc = rc;
|
|
return;
|
|
}
|
|
|
|
fts5DebugStructure(pRc, pBuf, p);
|
|
fts5StructureRelease(p);
|
|
}
|
|
#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
|
|
|
|
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
|
|
/*
|
|
** This is part of the fts5_decode() debugging aid.
|
|
**
|
|
** Arguments pBlob/nBlob contain an "averages" record. This function
|
|
** appends a human-readable representation of record to the buffer passed
|
|
** as the second argument.
|
|
*/
|
|
static void fts5DecodeAverages(
|
|
int *pRc, /* IN/OUT: error code */
|
|
Fts5Buffer *pBuf,
|
|
const u8 *pBlob, int nBlob
|
|
){
|
|
int i = 0;
|
|
const char *zSpace = "";
|
|
|
|
while( i<nBlob ){
|
|
u64 iVal;
|
|
i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
|
|
zSpace = " ";
|
|
}
|
|
}
|
|
#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
|
|
|
|
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
|
|
/*
|
|
** Buffer (a/n) is assumed to contain a list of serialized varints. Read
|
|
** each varint and append its string representation to buffer pBuf. Return
|
|
** after either the input buffer is exhausted or a 0 value is read.
|
|
**
|
|
** The return value is the number of bytes read from the input buffer.
|
|
*/
|
|
static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
|
|
int iOff = 0;
|
|
while( iOff<n ){
|
|
int iVal;
|
|
iOff += fts5GetVarint32(&a[iOff], iVal);
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
|
|
}
|
|
return iOff;
|
|
}
|
|
#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
|
|
|
|
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
|
|
/*
|
|
** The start of buffer (a/n) contains the start of a doclist. The doclist
|
|
** may or may not finish within the buffer. This function appends a text
|
|
** representation of the part of the doclist that is present to buffer
|
|
** pBuf.
|
|
**
|
|
** The return value is the number of bytes read from the input buffer.
|
|
*/
|
|
static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
|
|
i64 iDocid = 0;
|
|
int iOff = 0;
|
|
|
|
if( n>0 ){
|
|
iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
|
|
}
|
|
while( iOff<n ){
|
|
int nPos;
|
|
int bDel;
|
|
iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
|
|
iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
|
|
if( iOff<n ){
|
|
i64 iDelta;
|
|
iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
|
|
iDocid += iDelta;
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
|
|
}
|
|
}
|
|
|
|
return iOff;
|
|
}
|
|
#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
|
|
|
|
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
|
|
/*
|
|
** This function is part of the fts5_decode() debugging function. It is
|
|
** only ever used with detail=none tables.
|
|
**
|
|
** Buffer (pData/nData) contains a doclist in the format used by detail=none
|
|
** tables. This function appends a human-readable version of that list to
|
|
** buffer pBuf.
|
|
**
|
|
** If *pRc is other than SQLITE_OK when this function is called, it is a
|
|
** no-op. If an OOM or other error occurs within this function, *pRc is
|
|
** set to an SQLite error code before returning. The final state of buffer
|
|
** pBuf is undefined in this case.
|
|
*/
|
|
static void fts5DecodeRowidList(
|
|
int *pRc, /* IN/OUT: Error code */
|
|
Fts5Buffer *pBuf, /* Buffer to append text to */
|
|
const u8 *pData, int nData /* Data to decode list-of-rowids from */
|
|
){
|
|
int i = 0;
|
|
i64 iRowid = 0;
|
|
|
|
while( i<nData ){
|
|
const char *zApp = "";
|
|
u64 iVal;
|
|
i += sqlite3Fts5GetVarint(&pData[i], &iVal);
|
|
iRowid += iVal;
|
|
|
|
if( i<nData && pData[i]==0x00 ){
|
|
i++;
|
|
if( i<nData && pData[i]==0x00 ){
|
|
i++;
|
|
zApp = "+";
|
|
}else{
|
|
zApp = "*";
|
|
}
|
|
}
|
|
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
|
|
}
|
|
}
|
|
#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
|
|
|
|
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
|
|
static void fts5BufferAppendTerm(int *pRc, Fts5Buffer *pBuf, Fts5Buffer *pTerm){
|
|
int ii;
|
|
fts5BufferGrow(pRc, pBuf, pTerm->n*2 + 1);
|
|
if( *pRc==SQLITE_OK ){
|
|
for(ii=0; ii<pTerm->n; ii++){
|
|
if( pTerm->p[ii]==0x00 ){
|
|
pBuf->p[pBuf->n++] = '\\';
|
|
pBuf->p[pBuf->n++] = '0';
|
|
}else{
|
|
pBuf->p[pBuf->n++] = pTerm->p[ii];
|
|
}
|
|
}
|
|
pBuf->p[pBuf->n] = 0x00;
|
|
}
|
|
}
|
|
#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
|
|
|
|
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
|
|
/*
|
|
** The implementation of user-defined scalar function fts5_decode().
|
|
*/
|
|
static void fts5DecodeFunction(
|
|
sqlite3_context *pCtx, /* Function call context */
|
|
int nArg, /* Number of args (always 2) */
|
|
sqlite3_value **apVal /* Function arguments */
|
|
){
|
|
i64 iRowid; /* Rowid for record being decoded */
|
|
int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
|
|
int bTomb;
|
|
const u8 *aBlob; int n; /* Record to decode */
|
|
u8 *a = 0;
|
|
Fts5Buffer s; /* Build up text to return here */
|
|
int rc = SQLITE_OK; /* Return code */
|
|
sqlite3_int64 nSpace = 0;
|
|
int eDetailNone = (sqlite3_user_data(pCtx)!=0);
|
|
|
|
assert( nArg==2 );
|
|
UNUSED_PARAM(nArg);
|
|
memset(&s, 0, sizeof(Fts5Buffer));
|
|
iRowid = sqlite3_value_int64(apVal[0]);
|
|
|
|
/* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
|
|
** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
|
|
** buffer overreads even if the record is corrupt. */
|
|
n = sqlite3_value_bytes(apVal[1]);
|
|
aBlob = sqlite3_value_blob(apVal[1]);
|
|
nSpace = n + FTS5_DATA_ZERO_PADDING;
|
|
a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
|
|
if( a==0 ) goto decode_out;
|
|
if( n>0 ) memcpy(a, aBlob, n);
|
|
|
|
fts5DecodeRowid(iRowid, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno);
|
|
|
|
fts5DebugRowid(&rc, &s, iRowid);
|
|
if( bDlidx ){
|
|
Fts5Data dlidx;
|
|
Fts5DlidxLvl lvl;
|
|
|
|
dlidx.p = a;
|
|
dlidx.nn = n;
|
|
|
|
memset(&lvl, 0, sizeof(Fts5DlidxLvl));
|
|
lvl.pData = &dlidx;
|
|
lvl.iLeafPgno = iPgno;
|
|
|
|
for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
|
|
sqlite3Fts5BufferAppendPrintf(&rc, &s,
|
|
" %d(%lld)", lvl.iLeafPgno, lvl.iRowid
|
|
);
|
|
}
|
|
}else if( bTomb ){
|
|
u32 nElem = fts5GetU32(&a[4]);
|
|
int szKey = (aBlob[0]==4 || aBlob[0]==8) ? aBlob[0] : 8;
|
|
int nSlot = (n - 8) / szKey;
|
|
int ii;
|
|
sqlite3Fts5BufferAppendPrintf(&rc, &s, " nElem=%d", (int)nElem);
|
|
if( aBlob[1] ){
|
|
sqlite3Fts5BufferAppendPrintf(&rc, &s, " 0");
|
|
}
|
|
for(ii=0; ii<nSlot; ii++){
|
|
u64 iVal = 0;
|
|
if( szKey==4 ){
|
|
u32 *aSlot = (u32*)&aBlob[8];
|
|
if( aSlot[ii] ) iVal = fts5GetU32((u8*)&aSlot[ii]);
|
|
}else{
|
|
u64 *aSlot = (u64*)&aBlob[8];
|
|
if( aSlot[ii] ) iVal = fts5GetU64((u8*)&aSlot[ii]);
|
|
}
|
|
if( iVal!=0 ){
|
|
sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", (i64)iVal);
|
|
}
|
|
}
|
|
}else if( iSegid==0 ){
|
|
if( iRowid==FTS5_AVERAGES_ROWID ){
|
|
fts5DecodeAverages(&rc, &s, a, n);
|
|
}else{
|
|
fts5DecodeStructure(&rc, &s, a, n);
|
|
}
|
|
}else if( eDetailNone ){
|
|
Fts5Buffer term; /* Current term read from page */
|
|
int szLeaf;
|
|
int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
|
|
int iTermOff;
|
|
int nKeep = 0;
|
|
int iOff;
|
|
|
|
memset(&term, 0, sizeof(Fts5Buffer));
|
|
|
|
/* Decode any entries that occur before the first term. */
|
|
if( szLeaf<n ){
|
|
iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
|
|
}else{
|
|
iTermOff = szLeaf;
|
|
}
|
|
fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
|
|
|
|
iOff = iTermOff;
|
|
while( iOff<szLeaf && rc==SQLITE_OK ){
|
|
int nAppend;
|
|
|
|
/* Read the term data for the next term*/
|
|
iOff += fts5GetVarint32(&a[iOff], nAppend);
|
|
term.n = nKeep;
|
|
fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
|
|
sqlite3Fts5BufferAppendPrintf(&rc, &s, " term=");
|
|
fts5BufferAppendTerm(&rc, &s, &term);
|
|
iOff += nAppend;
|
|
|
|
/* Figure out where the doclist for this term ends */
|
|
if( iPgidxOff<n ){
|
|
int nIncr;
|
|
iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
|
|
iTermOff += nIncr;
|
|
}else{
|
|
iTermOff = szLeaf;
|
|
}
|
|
if( iTermOff>szLeaf ){
|
|
rc = FTS5_CORRUPT;
|
|
}else{
|
|
fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
|
|
}
|
|
iOff = iTermOff;
|
|
if( iOff<szLeaf ){
|
|
iOff += fts5GetVarint32(&a[iOff], nKeep);
|
|
}
|
|
}
|
|
|
|
fts5BufferFree(&term);
|
|
}else{
|
|
Fts5Buffer term; /* Current term read from page */
|
|
int szLeaf; /* Offset of pgidx in a[] */
|
|
int iPgidxOff;
|
|
int iPgidxPrev = 0; /* Previous value read from pgidx */
|
|
int iTermOff = 0;
|
|
int iRowidOff = 0;
|
|
int iOff;
|
|
int nDoclist;
|
|
|
|
memset(&term, 0, sizeof(Fts5Buffer));
|
|
|
|
if( n<4 ){
|
|
sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
|
|
goto decode_out;
|
|
}else{
|
|
iRowidOff = fts5GetU16(&a[0]);
|
|
iPgidxOff = szLeaf = fts5GetU16(&a[2]);
|
|
if( iPgidxOff<n ){
|
|
fts5GetVarint32(&a[iPgidxOff], iTermOff);
|
|
}else if( iPgidxOff>n ){
|
|
rc = FTS5_CORRUPT;
|
|
goto decode_out;
|
|
}
|
|
}
|
|
|
|
/* Decode the position list tail at the start of the page */
|
|
if( iRowidOff!=0 ){
|
|
iOff = iRowidOff;
|
|
}else if( iTermOff!=0 ){
|
|
iOff = iTermOff;
|
|
}else{
|
|
iOff = szLeaf;
|
|
}
|
|
if( iOff>n ){
|
|
rc = FTS5_CORRUPT;
|
|
goto decode_out;
|
|
}
|
|
fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
|
|
|
|
/* Decode any more doclist data that appears on the page before the
|
|
** first term. */
|
|
nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
|
|
if( nDoclist+iOff>n ){
|
|
rc = FTS5_CORRUPT;
|
|
goto decode_out;
|
|
}
|
|
fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
|
|
|
|
while( iPgidxOff<n && rc==SQLITE_OK ){
|
|
int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
|
|
int nByte; /* Bytes of data */
|
|
int iEnd;
|
|
|
|
iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
|
|
iPgidxPrev += nByte;
|
|
iOff = iPgidxPrev;
|
|
|
|
if( iPgidxOff<n ){
|
|
fts5GetVarint32(&a[iPgidxOff], nByte);
|
|
iEnd = iPgidxPrev + nByte;
|
|
}else{
|
|
iEnd = szLeaf;
|
|
}
|
|
if( iEnd>szLeaf ){
|
|
rc = FTS5_CORRUPT;
|
|
break;
|
|
}
|
|
|
|
if( bFirst==0 ){
|
|
iOff += fts5GetVarint32(&a[iOff], nByte);
|
|
if( nByte>term.n ){
|
|
rc = FTS5_CORRUPT;
|
|
break;
|
|
}
|
|
term.n = nByte;
|
|
}
|
|
iOff += fts5GetVarint32(&a[iOff], nByte);
|
|
if( iOff+nByte>n ){
|
|
rc = FTS5_CORRUPT;
|
|
break;
|
|
}
|
|
fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
|
|
iOff += nByte;
|
|
|
|
sqlite3Fts5BufferAppendPrintf(&rc, &s, " term=");
|
|
fts5BufferAppendTerm(&rc, &s, &term);
|
|
iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
|
|
}
|
|
|
|
fts5BufferFree(&term);
|
|
}
|
|
|
|
decode_out:
|
|
sqlite3_free(a);
|
|
if( rc==SQLITE_OK ){
|
|
sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
|
|
}else{
|
|
sqlite3_result_error_code(pCtx, rc);
|
|
}
|
|
fts5BufferFree(&s);
|
|
}
|
|
#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
|
|
|
|
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
|
|
/*
|
|
** The implementation of user-defined scalar function fts5_rowid().
|
|
*/
|
|
static void fts5RowidFunction(
|
|
sqlite3_context *pCtx, /* Function call context */
|
|
int nArg, /* Number of args (always 2) */
|
|
sqlite3_value **apVal /* Function arguments */
|
|
){
|
|
const char *zArg;
|
|
if( nArg==0 ){
|
|
sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
|
|
}else{
|
|
zArg = (const char*)sqlite3_value_text(apVal[0]);
|
|
if( 0==sqlite3_stricmp(zArg, "segment") ){
|
|
i64 iRowid;
|
|
int segid, pgno;
|
|
if( nArg!=3 ){
|
|
sqlite3_result_error(pCtx,
|
|
"should be: fts5_rowid('segment', segid, pgno))", -1
|
|
);
|
|
}else{
|
|
segid = sqlite3_value_int(apVal[1]);
|
|
pgno = sqlite3_value_int(apVal[2]);
|
|
iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
|
|
sqlite3_result_int64(pCtx, iRowid);
|
|
}
|
|
}else{
|
|
sqlite3_result_error(pCtx,
|
|
"first arg to fts5_rowid() must be 'segment'" , -1
|
|
);
|
|
}
|
|
}
|
|
}
|
|
#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
|
|
|
|
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
|
|
|
|
typedef struct Fts5StructVtab Fts5StructVtab;
|
|
struct Fts5StructVtab {
|
|
sqlite3_vtab base;
|
|
};
|
|
|
|
typedef struct Fts5StructVcsr Fts5StructVcsr;
|
|
struct Fts5StructVcsr {
|
|
sqlite3_vtab_cursor base;
|
|
Fts5Structure *pStruct;
|
|
int iLevel;
|
|
int iSeg;
|
|
int iRowid;
|
|
};
|
|
|
|
/*
|
|
** Create a new fts5_structure() table-valued function.
|
|
*/
|
|
static int fts5structConnectMethod(
|
|
sqlite3 *db,
|
|
void *pAux,
|
|
int argc, const char *const*argv,
|
|
sqlite3_vtab **ppVtab,
|
|
char **pzErr
|
|
){
|
|
Fts5StructVtab *pNew = 0;
|
|
int rc = SQLITE_OK;
|
|
|
|
rc = sqlite3_declare_vtab(db,
|
|
"CREATE TABLE xyz("
|
|
"level, segment, merge, segid, leaf1, leaf2, loc1, loc2, "
|
|
"npgtombstone, nentrytombstone, nentry, struct HIDDEN);"
|
|
);
|
|
if( rc==SQLITE_OK ){
|
|
pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew));
|
|
}
|
|
|
|
*ppVtab = (sqlite3_vtab*)pNew;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** We must have a single struct=? constraint that will be passed through
|
|
** into the xFilter method. If there is no valid stmt=? constraint,
|
|
** then return an SQLITE_CONSTRAINT error.
|
|
*/
|
|
static int fts5structBestIndexMethod(
|
|
sqlite3_vtab *tab,
|
|
sqlite3_index_info *pIdxInfo
|
|
){
|
|
int i;
|
|
int rc = SQLITE_CONSTRAINT;
|
|
struct sqlite3_index_constraint *p;
|
|
pIdxInfo->estimatedCost = (double)100;
|
|
pIdxInfo->estimatedRows = 100;
|
|
pIdxInfo->idxNum = 0;
|
|
for(i=0, p=pIdxInfo->aConstraint; i<pIdxInfo->nConstraint; i++, p++){
|
|
if( p->usable==0 ) continue;
|
|
if( p->op==SQLITE_INDEX_CONSTRAINT_EQ && p->iColumn==11 ){
|
|
rc = SQLITE_OK;
|
|
pIdxInfo->aConstraintUsage[i].omit = 1;
|
|
pIdxInfo->aConstraintUsage[i].argvIndex = 1;
|
|
break;
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** This method is the destructor for bytecodevtab objects.
|
|
*/
|
|
static int fts5structDisconnectMethod(sqlite3_vtab *pVtab){
|
|
Fts5StructVtab *p = (Fts5StructVtab*)pVtab;
|
|
sqlite3_free(p);
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Constructor for a new bytecodevtab_cursor object.
|
|
*/
|
|
static int fts5structOpenMethod(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCsr){
|
|
int rc = SQLITE_OK;
|
|
Fts5StructVcsr *pNew = 0;
|
|
|
|
pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew));
|
|
*ppCsr = (sqlite3_vtab_cursor*)pNew;
|
|
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Destructor for a bytecodevtab_cursor.
|
|
*/
|
|
static int fts5structCloseMethod(sqlite3_vtab_cursor *cur){
|
|
Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
|
|
fts5StructureRelease(pCsr->pStruct);
|
|
sqlite3_free(pCsr);
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
|
|
/*
|
|
** Advance a bytecodevtab_cursor to its next row of output.
|
|
*/
|
|
static int fts5structNextMethod(sqlite3_vtab_cursor *cur){
|
|
Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
|
|
Fts5Structure *p = pCsr->pStruct;
|
|
|
|
assert( pCsr->pStruct );
|
|
pCsr->iSeg++;
|
|
pCsr->iRowid++;
|
|
while( pCsr->iLevel<p->nLevel && pCsr->iSeg>=p->aLevel[pCsr->iLevel].nSeg ){
|
|
pCsr->iLevel++;
|
|
pCsr->iSeg = 0;
|
|
}
|
|
if( pCsr->iLevel>=p->nLevel ){
|
|
fts5StructureRelease(pCsr->pStruct);
|
|
pCsr->pStruct = 0;
|
|
}
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Return TRUE if the cursor has been moved off of the last
|
|
** row of output.
|
|
*/
|
|
static int fts5structEofMethod(sqlite3_vtab_cursor *cur){
|
|
Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
|
|
return pCsr->pStruct==0;
|
|
}
|
|
|
|
static int fts5structRowidMethod(
|
|
sqlite3_vtab_cursor *cur,
|
|
sqlite_int64 *piRowid
|
|
){
|
|
Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
|
|
*piRowid = pCsr->iRowid;
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Return values of columns for the row at which the bytecodevtab_cursor
|
|
** is currently pointing.
|
|
*/
|
|
static int fts5structColumnMethod(
|
|
sqlite3_vtab_cursor *cur, /* The cursor */
|
|
sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
|
|
int i /* Which column to return */
|
|
){
|
|
Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
|
|
Fts5Structure *p = pCsr->pStruct;
|
|
Fts5StructureSegment *pSeg = &p->aLevel[pCsr->iLevel].aSeg[pCsr->iSeg];
|
|
|
|
switch( i ){
|
|
case 0: /* level */
|
|
sqlite3_result_int(ctx, pCsr->iLevel);
|
|
break;
|
|
case 1: /* segment */
|
|
sqlite3_result_int(ctx, pCsr->iSeg);
|
|
break;
|
|
case 2: /* merge */
|
|
sqlite3_result_int(ctx, pCsr->iSeg < p->aLevel[pCsr->iLevel].nMerge);
|
|
break;
|
|
case 3: /* segid */
|
|
sqlite3_result_int(ctx, pSeg->iSegid);
|
|
break;
|
|
case 4: /* leaf1 */
|
|
sqlite3_result_int(ctx, pSeg->pgnoFirst);
|
|
break;
|
|
case 5: /* leaf2 */
|
|
sqlite3_result_int(ctx, pSeg->pgnoLast);
|
|
break;
|
|
case 6: /* origin1 */
|
|
sqlite3_result_int64(ctx, pSeg->iOrigin1);
|
|
break;
|
|
case 7: /* origin2 */
|
|
sqlite3_result_int64(ctx, pSeg->iOrigin2);
|
|
break;
|
|
case 8: /* npgtombstone */
|
|
sqlite3_result_int(ctx, pSeg->nPgTombstone);
|
|
break;
|
|
case 9: /* nentrytombstone */
|
|
sqlite3_result_int64(ctx, pSeg->nEntryTombstone);
|
|
break;
|
|
case 10: /* nentry */
|
|
sqlite3_result_int64(ctx, pSeg->nEntry);
|
|
break;
|
|
}
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Initialize a cursor.
|
|
**
|
|
** idxNum==0 means show all subprograms
|
|
** idxNum==1 means show only the main bytecode and omit subprograms.
|
|
*/
|
|
static int fts5structFilterMethod(
|
|
sqlite3_vtab_cursor *pVtabCursor,
|
|
int idxNum, const char *idxStr,
|
|
int argc, sqlite3_value **argv
|
|
){
|
|
Fts5StructVcsr *pCsr = (Fts5StructVcsr *)pVtabCursor;
|
|
int rc = SQLITE_OK;
|
|
|
|
const u8 *aBlob = 0;
|
|
int nBlob = 0;
|
|
|
|
assert( argc==1 );
|
|
fts5StructureRelease(pCsr->pStruct);
|
|
pCsr->pStruct = 0;
|
|
|
|
nBlob = sqlite3_value_bytes(argv[0]);
|
|
aBlob = (const u8*)sqlite3_value_blob(argv[0]);
|
|
rc = fts5StructureDecode(aBlob, nBlob, 0, &pCsr->pStruct);
|
|
if( rc==SQLITE_OK ){
|
|
pCsr->iLevel = 0;
|
|
pCsr->iRowid = 0;
|
|
pCsr->iSeg = -1;
|
|
rc = fts5structNextMethod(pVtabCursor);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
#endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
|
|
|
|
/*
|
|
** This is called as part of registering the FTS5 module with database
|
|
** connection db. It registers several user-defined scalar functions useful
|
|
** with FTS5.
|
|
**
|
|
** If successful, SQLITE_OK is returned. If an error occurs, some other
|
|
** SQLite error code is returned instead.
|
|
*/
|
|
int sqlite3Fts5IndexInit(sqlite3 *db){
|
|
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
|
|
int rc = sqlite3_create_function(
|
|
db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
|
|
);
|
|
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3_create_function(
|
|
db, "fts5_decode_none", 2,
|
|
SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
|
|
);
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3_create_function(
|
|
db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
|
|
);
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
static const sqlite3_module fts5structure_module = {
|
|
0, /* iVersion */
|
|
0, /* xCreate */
|
|
fts5structConnectMethod, /* xConnect */
|
|
fts5structBestIndexMethod, /* xBestIndex */
|
|
fts5structDisconnectMethod, /* xDisconnect */
|
|
0, /* xDestroy */
|
|
fts5structOpenMethod, /* xOpen */
|
|
fts5structCloseMethod, /* xClose */
|
|
fts5structFilterMethod, /* xFilter */
|
|
fts5structNextMethod, /* xNext */
|
|
fts5structEofMethod, /* xEof */
|
|
fts5structColumnMethod, /* xColumn */
|
|
fts5structRowidMethod, /* xRowid */
|
|
0, /* xUpdate */
|
|
0, /* xBegin */
|
|
0, /* xSync */
|
|
0, /* xCommit */
|
|
0, /* xRollback */
|
|
0, /* xFindFunction */
|
|
0, /* xRename */
|
|
0, /* xSavepoint */
|
|
0, /* xRelease */
|
|
0, /* xRollbackTo */
|
|
0, /* xShadowName */
|
|
0 /* xIntegrity */
|
|
};
|
|
rc = sqlite3_create_module(db, "fts5_structure", &fts5structure_module, 0);
|
|
}
|
|
return rc;
|
|
#else
|
|
return SQLITE_OK;
|
|
UNUSED_PARAM(db);
|
|
#endif
|
|
}
|
|
|
|
|
|
int sqlite3Fts5IndexReset(Fts5Index *p){
|
|
assert( p->pStruct==0 || p->iStructVersion!=0 );
|
|
if( fts5IndexDataVersion(p)!=p->iStructVersion ){
|
|
fts5StructureInvalidate(p);
|
|
}
|
|
return fts5IndexReturn(p);
|
|
}
|