0
0
mirror of https://github.com/tursodatabase/libsql.git synced 2025-05-12 17:03:11 +00:00
Files
libsql/libsql-sqlite3/ext/fts5/fts5_expr.c
Pekka Enberg f996bf9f18 Merge upstream SQLite 3.45.1 (#1054)
* Remove unused elements from the json_tree() cursor.

FossilOrigin-Name: 914a50117d477b2cd30d58388fb8d1b71ff7ff6842ba025f38efc6e9647d06d0

* Same results as the legacy JsonNode implementation on a small set of test cases.

FossilOrigin-Name: c3da4b079a1a15a4c0b1a6e71f876648b1d9eb32eddc67b9946c2475c7b6d085

* Fix corner-case error conditions.

FossilOrigin-Name: ec23d34ab75e1d7e9366e59c633e0d30def8759f6d4717583ebeb4c90aeccf0d

* All tests passing.

FossilOrigin-Name: b5a5660ca22437640c9bf32c44d92c76a7293dafcbaf4fa6a4c171128d64871d

* Give the json_valid() function an optional second argument that determines
what is meant by "valid".

FossilOrigin-Name: a4e19ad43dac81e7655ec03ff69bb99d1d02b0c227034c90fb41415fd4793fe3

* Enhance the (SQLITE_DEBUG-only) json_parse() routine so that it shows a 
decoding of JSONB when given a BLOB argument.

FossilOrigin-Name: af267868562e0799ad691dccad05f17afbc34d609eede8c55f57d209290246ef

* In SQLITE_ENABLE_SETLK_TIMEOUT builds, use blocking locks in place of sleep() when opening a read-transaction.

FossilOrigin-Name: a51ef39998e25e86bd0600e71d15011b12e05f4319608018293bdaecb09e8c97

* Have SQLITE_ENABLE_SETLK_TIMEOUT builds block when locking a read-lock slot.

FossilOrigin-Name: f797baf47cf7859cfd8ce248f4f3087af4551a7040af990333426e5a7c269504

* Add untested (#ifdefed-out) code for the MergePatch algorithm against JSONB.
Add (and test) the jsonBlobEdit() routine that is needed by the new MergePatch.

FossilOrigin-Name: 4d353387fc10e1038cfdd86e66007bf728c231a928e588897bbee0fbfe76f225

* More aggressive use of jsonBlobEdit().  Improvements to the MergePatch
implementation sketch.

FossilOrigin-Name: fbca9570fd2e1465739e4d3a8d9bb40fad594fd78ab49b2cb34efa27ebdd8361

* The json_patch() code for JSONB compiles and works sometimes, but there are
still issues.  Incremental check-in.

FossilOrigin-Name: e0099464a0045a04f4ccf29bc2b8325fc8c7f39ccf4847e74818f928c9153588

* All legacy tests are passing.

FossilOrigin-Name: 2c436806b8d5f57de99c00f6154b038454fb9ae427d00d7b4a46ab9c7c69bcb9

* Handle an SQLITE_BUSY_TIMEOUT error if one occurs while attempting a shared lock on a read-lock slot.

FossilOrigin-Name: 5fbf3906d272df3eb981f67455eb35f649ad2774cba9fc3f077b28d9bef3f0cb

* The json_remove() function now uses only JSONB, never JsonNodes, internally.

FossilOrigin-Name: b69786e746ae2b927b64d9871fd120b7f8f06cc53739fd46a4da51aa16cf8576

* Attempt to get json_extract() working with pure JSONB only, and without
the use of JsonNode.  Mostly working, but there are some differences from
legacy in corner cases.

FossilOrigin-Name: 8c324af1eca27e86adc45622af4f3b06a67a3f968596ac58aa7434b1f6f05f3c

* Preserve flexibility in the format of the RHS of -> and ->> operators found
in legacy.

FossilOrigin-Name: 6231ec43adb7436195eb1497de39a6c13c6b4f1c5032e6ea52515d214e61fdbc

* Do not set the J subtype when the output is JSONB.

FossilOrigin-Name: 4f106b64fe8988435872806bd0a6c223b61f53af0dd1c47c847bb4eec4e03e27

* Convert the json_array_length() function to use JSONB instead of JsonNodes.

FossilOrigin-Name: 5ab790736d943e08f097efcee5cfbf0d83c65b0a53f273060330ba719affa5e5

* The assertion change at check-in [7946c79567b0ccd3] is insufficient to fix
the problem of a Table object being deleted out from under the OP_VCheck
opcode.  We need to reference count the Table, which is accomplished here.

FossilOrigin-Name: cad269d5e274443c39203a56603b991accc0399135d436996fc039d1d28ec9db

* In the recovery extension, if a payload size is unreasonably large, it is
probably corrupt, so truncate it.

FossilOrigin-Name: 988c3179e978a3a6d42541e9c7a2ab98150383671810926503376ed808f150ff

* Fix signed integer overflow in fts5.

FossilOrigin-Name: 60e46c7ec68fd8caaed960ca06d98fb06855b2d0bb860dd2fb7b5e89a5e9c7b4

* The json_patch() function now operates exclusively on JSONB.  This patch
also includes improvements to JSONB debug printing routines.

FossilOrigin-Name: fee19d0098242110d2c44ec7b9620c1210ef3f87913305f66ec85d277dd96ab6

* Convert the json_error_position() routine to use only JSONB internally.

FossilOrigin-Name: e7a8ba35bff6fde55827f978de5b343b6c134c7fa53827f5c63915a9dc2598ad

* Convert json_insert(), json_replace(), json_set() to use JSONB internally.
Mostly working, but some corner cases are still not quite right.

FossilOrigin-Name: 99c8f6bd5c9a31b6d00f92e383bec8a8235ed553916ad59adbb1b7663f6ebff1

* Update some OPFS-related help text in WASM tests. Minor cleanups in speedtest1-worker.js.

FossilOrigin-Name: 263f6d3a7784ef7d032dbf7a3265aca8dd70bf50797f28f6b2e8ddb6a301f83a

* New test cases for insert/set/replace with paths that indicate substructure
that does not yet exist.

FossilOrigin-Name: 146c717c51940b2139befc45ac74e7a1c36ef3c32fd3cfe35b334488eebe6298

* New JSON test cases showing insert or set with missing substructure.

FossilOrigin-Name: 6802b6459d0d16c961ff41d240a6c88287f197d8f609090f79308707490a49c2

* Simplification of the new JSON insert/set test cases.

FossilOrigin-Name: 04c0d5644372446c924a2e31a26edf51ddc563a1990d170b0ed4739e3e8b239b

* Enhance json_set() and json_insert() so that they create missing
substructure.

FossilOrigin-Name: cc7a641ab5ae739d31c24f0ad0caeb15a481a63fa8f13720718ea922c25862ff

* Convert json_type() to use JSONB internally.

FossilOrigin-Name: 83074835b900ce85cf67059e674ce959801505c37592671af25ca0af7ed483f1

* Add a basic batch-mode SQL runner for the SAH Pool VFS, for use in comparing it against WebSQL. Bring the WebSQL batch runner up to date, noting that it cannot run without addition of an "origin trial" activation key from Google because that's now the only way to enable WebSQL in Chrome (that part is not checked in because that key is private). Minor code-adjacent cleanups.

FossilOrigin-Name: 883990e7938c1f63906300a6113f0fadce143913b7c384e8aeb5f886f0be7c62

* Convert json_valid() over to using only JSONB as its internal format.

FossilOrigin-Name: 7b5756fa6d00b093bf083a8d7a5ef5485f7a09e4eac473785c8380688f861a1b

* Remove all trace of JsonNode from the JSON implementation.  The JSONB format
is used as the internal binary encoding for searching and editing.

FossilOrigin-Name: 11ebb5f712cc7a515e2e0f2be8c1d71de20c97fe5b74c4f4d72c84fd21182d35

* First attempt to get the JSON text-to-binary cache working.  All test cases
pass, but the cache seems not to help much.

FossilOrigin-Name: 25ed295f300fea6185104a73721076bccd2b2a6e411c78564266fa6dca4ff70c

* Cache is working better, but does not preserve the hasJson5 flag.

FossilOrigin-Name: a12add7ab9f5aee5bb2ede0c4d22e599dd28f7a107dce72b2ea48ef92d233e8a

* Fix up the JSON cache to work better.

FossilOrigin-Name: 1fdbc39521f63aedc6f08ecaafa54ea467b8c6316a692a18ad01eecbf22a0977

* Different approach to querying a tokendata=1 table. Saves cpu and memory.

FossilOrigin-Name: c523f40895866e6fc979a26483dbea8206126b4bbdf4b73b77263c09e13c855e

* Remove old code for tokendata=1 queries.

FossilOrigin-Name: b0a489e8e1bf0290c2117ab32d78b1cc7d67bcb226b55ec044c8367ebde3815b

* Performance optimization in the JSON parser.

FossilOrigin-Name: 68d191f40e708962ec88e0c245b4496bc4a671300484b1cc0f3fc7e6d199a6e6

* Fix harmless compiler warnings and enhance performance the parser.

FossilOrigin-Name: 285633da6d188547e52f07779e209c9e5f3dc33ce0668e14858f3337889ef4b8

* Unroll a loop in the parser for a performance increase.

FossilOrigin-Name: a6dc29e4d5e13949e0fcd9d5dde575c2670eb10a230ab9df3806fc8c3016c540

* Remove a NEVER that can be true if a virtual table column is declared to have
a DEFAULT.  See
[forum:/forumpost/3d4de8917627d058|forum post 3d4de8917627d058].

FossilOrigin-Name: 8abc2ccaf8106f20243568cd7fa74174386eb85d7ea381201e97e2fd527033e0

* Simplification and optimization of the JSON parser.

FossilOrigin-Name: f5ec9485119a2a6cb33eb864c7ca9b41d4a2ed08ab6ad9a6b0dd9358ab253576

* Performance optimization in jsonAppendString().

FossilOrigin-Name: fdf00e96239c73fb67e2acecc5b95f55a1fc51c3deed4512613c0d6070ce5805

* Minor fix to the header comment on jsonXlateTextToBlob().

FossilOrigin-Name: c3677ba410208c07b711f5f526eb5cf039a8eee49f632c7ae04fa55cdfbb9058

* Fix potential unsigned integer underflow in jsonAppendString().

FossilOrigin-Name: d2fba2cbdc3870d34228c1a9446eced884325acc183900d7dd0b96132570fb4a

* Do not allow a JsonParse object to be considered "editable" after an OOM.

FossilOrigin-Name: c6bacf57bd6fe0fee00c9d41163a270b60997c20659949971bbf5c6c62622bfe

* Protect a memcpy() against OOM conditions.

FossilOrigin-Name: 26144d1c25ae0435db568009ba05e485d23d146f2b1f29f3a426c87860316aed

* Ensure that tokendata=1 queries avoid loading large doclists for queries like "common AND uncommon", just as tokendata=0 queries do.

FossilOrigin-Name: 7bda09ab404a110d57449e149a3281fca8dc4cacf7bd9832ea2a1356ad20fe8e

* Take extra care to ensure that JSONB values that are in cache are actually
owned by the JSON subsystem, and that ownership of such values is not handed
back to the bytecode engine.

FossilOrigin-Name: 1304534001e9ef66c6b12752b69d790bfa3427cc803f87cc48ca22ae12df0fdf

* When tokendata=1 queries require multiple segment-cursors, allow those cursors to share a single array of in-memory tombstone pages.

FossilOrigin-Name: e0175d07e4094db5ea4b0378a5ff480dafb6ba9da86a113fa767c4c89c3c866f

* Fix harmless compiler warnings.  Refactor some identifier names for
clearer presentation.

FossilOrigin-Name: 7e3941502789c5afaf19b08112f464abf5e3cba7f92fc9290af2a0f96127ad9a

* Code and comment cleanup.  Everything should work the same.

FossilOrigin-Name: c640754df0d3ffdad994745f0d0e10c8f19f424b87f6a6e6e269491a0350b950

* Fix various compiler warnings and other problems with the new code on this branch.

FossilOrigin-Name: 3a623cfa173b4035c759cb84985d11d8727053beb383648503987d6ab15c0ef0

* Fix harmless compiler warnings reported by MSVC.

FossilOrigin-Name: 419652c0c82980bd043584dcd2976f91dfff7b926b216d597698299850b855c0

* Implement strict JSONB checking in the json_valid() function.

FossilOrigin-Name: 0f26d38880fcbc207abcc94dbc170a7428bab1b4f0b7731aaf5bee0224000994

* Minor code changes for consistency and to simplify testing.

FossilOrigin-Name: df272bd837910ad9e03e222716a1201a601399664365f1dcf73d5932372518ed

* Do not let bad hexadecimal digits in malformed JSONB cause an assertion fault.

FossilOrigin-Name: 8dec1ba1e5076ff596756e00c1e2ada0245f168a503dd1cadadf848331acfac3

* Enable incorrect JSONB to be rendered into text without hitting an
assertion for a bad whitespace escape in a string.

FossilOrigin-Name: 4d6a9a217df6792b41766b774fb0c0553b45f9104c26a0955bf4a30862d7d7bf

* Ensure that OOM conditions in the generation of the "bad JSON path" error
message result in an SQLITE_NOMEM error.

FossilOrigin-Name: aa0e02b5c26a2ef3d6216a0ed8bc01382be43173485f898cb63f2a8c559f2e74

* Avoid problems when the path argument to json_tree() contains embedded U+0000
characters.

FossilOrigin-Name: 9f055091af01a5dddba1a7e9868ad030c8f206237e1569215cb161e53e54aa71

* Remove dead code.  Improved reporting of errors in JSON inputs.

FossilOrigin-Name: 2eaa738e6b5c1b67b3e57c868d9c3a30eea38a0b3b8b02482f06d57a45b10921

* Back off on the use of strlen() for situations where sqlite3_value_bytes()
will work as well, for performance.

FossilOrigin-Name: 79fb54fbb8b9c30f47cdbd437d24a21542716241e822749e5e28c9fbc449bfa8

* Better pre-scan size estimations for objects in the JSON parser resulting
in fewer reallocations and memmove operations.

FossilOrigin-Name: 526b27f90897f5e35dfff7257daf6c4ce4798d649b09b8aecfb02df0449e3c51

* Repair issues and inefficiencies found during testing.

FossilOrigin-Name: ae973cb1515f9d76409c92a2ca2ffd6b71f32b0b490a4886770e7c1b90f12611

* Add tests for using tokendata=1 and contentless_delete=1 together.

FossilOrigin-Name: a2506b8c9718054912270055638204753c4156bbc115e55194e6df9d7e76cb10

* Two new NEVER macros.

FossilOrigin-Name: 52632c92cb06faf0e804654b3490fd6c199521107bd30c8fcbc3a2a5a488098f

* Remove reachable ALWAYS and NEVER macros.

FossilOrigin-Name: f601de3eeabd85993c1f5ee96b62de6fdabbeae2fe8950e00d08feb48d42c498

* Fix bug in xInstToken() causing the wrong token to be returned.

FossilOrigin-Name: da78d07e77cbc783fbc725758911c230fd6a1c1885d9576125de955dcc2bd37f

* Continuing simplifications and code cleanup.

FossilOrigin-Name: ddf92b5059a9106753fd18b82ba8daa269a62af947561c460790107b83416f0b

* Fix a problem with the xInstCount() API and "ORDER BY rank" queries.

FossilOrigin-Name: 317a50563d9e8586fda136e513727241b414e7267d50a06571c8ebd0eae710bc

* Fix memory leak in new code on this branch.

FossilOrigin-Name: ebc160b9a05568df66f86e30804399ee29d34b44a60c57e062f98cb92826353f

* Fixes for xInstToken() with tokendata=0 tables. And with prefix queries.

FossilOrigin-Name: 78fbb71598b1ca756acc078253880a1d0f7983a5a26b9efc683e6488122505a1

* Fix errors in rendering JSON5 escape sequences embedded in JSONB.

FossilOrigin-Name: f1a51ae3863557526a51c6e98e71fcdf4f1ed14a36212b3c90f7408f926345e4

* Do not make the input JSONB editable in json_remove() if there are no PATH
argument.

FossilOrigin-Name: 66594544f3ba9977475a3e3f74404eb2b2fb845053b28bd24c2b52c7df94e9d7

* Fixes to error handling in json_array_length().

FossilOrigin-Name: aa85df2d26b74c171c55bde19ef17c4f11f40b8af7181bbf7162f87cdea7e88b

* Add further tests for xInstToken().

FossilOrigin-Name: 8582707f16133f003a6687f68cbea03d4eb6c2a0e2e07746b7cace0c44e84fa4

* Rename the internal routine jsonMergePatchBlob() to just jsonMergePatch().

FossilOrigin-Name: ebf667b616235bb64b83832008342ba5e7b10b2c170d7cebc431f040fef7ecfb

* Fix OOM and corrupt JSONB handling in json_patch().

FossilOrigin-Name: 1910feb0b7d5cc2b810c3322f6cca281d8730182d30d162bd7bb56800979ea91

* Use an assert() to fix a harmless static analyzer warning.

FossilOrigin-Name: a249ca657e624028bc6b3d2c2bcedd7162d118addb7d62ce519920cecebf1860

* Clean up the JSONB performance test script.

FossilOrigin-Name: 905301075a7fc1010ee7e754867b1b698c9b8576d50e98125def32a5dfb7ee9d

* Small performance gain by unwinding the string literal delimiter search
loop in the JSON parser by one more level.

FossilOrigin-Name: 4c587feac153e8ebe526559ec3d254f545f81e8d1ed3126f91a5ff25ec4aa72e

* Use strspn() to accelerate whitespace bypass in the JSON parser.

FossilOrigin-Name: 843197df08352bdff4b87be91d160e574572aded0d0c66142fd960000c0b4701

* Miscellaneous comment cleanup and typo fixes.

FossilOrigin-Name: 59446dc0bd0091572122a3c8b4653d7a2dc867d16c4a5919f79b81bc3a673ce3

* Further tests for the new code on this branch.

FossilOrigin-Name: 59d008b6c23ab900377bc696ee19381feb7614bac80546eae361e401c3620c4e

* Use extra assert() statement to silence harmless static analyzer warnings.

FossilOrigin-Name: 174c2b2eef5fecd96a5fc89b81032fe81f7801f12097cea10e7e7f0a02114813

* README.md typo fix reported in the forum and update all links from http: to https:.

FossilOrigin-Name: 5c48acdbb44185b352b54911a57a6986d6c7e624bdeba2af48b985d29f0292bf

* Increased rigor in comparisons between object labels in JSON.

FossilOrigin-Name: 2bc86d145fccc07107b7753cb1a69122676d4096fe59c454497bd81a6142d45e

* The rule for the RHS of the ->> and -> operators when the RHS does not begin
with $ is that it must be (1) all digits, or (2) all alphanumerics, or
(3) contained within [..] or else it will become a quoted label.

FossilOrigin-Name: 0e059a546ec11fa5c6d007bd65c249ee2422f1facbdb2792c53e0bc0ccc97e14

* Test cases for object label matching with escape sequences.

FossilOrigin-Name: c6f2aa38e95b7888650cfa7bb773b18a28e01d883033ac77be6d504ffe417d18

* In CLI, move -interactive flag handling back to arg-loop pass 2.

FossilOrigin-Name: 63cb05a862532d2d56e9e81fe32ced09bf58f03146587a118f11c2a84e195e69

* Fix the routine that determines the json_tree.path value for the first row
so that it correctly takes into account escape sequences in the path
argument.

FossilOrigin-Name: b9243ee8a37c62eb8848e765bd4af83bc1b3d3eb24fb4268a1357ad1f8b2e1fb

* Correctly handle 8-byte sizes in the JSONB format.
[forum:/forumpost/283daf08e91183fc|Forum post 283daf08e91183fc].

FossilOrigin-Name: 73d390f39c0bbbc017e01544e4d43c76761f2599bd57f900131c706270dfd202

* Update documentation comments in fts5.h.

FossilOrigin-Name: 38c50e22c98607e6c1fd78d7615cda534773b6d4fd85c712b54749fcd7af0c83

* Work around LLVM's newfound hatred of function pointer casts.
[forum:/forumpost/1a7d257346636292|Forum post 1a7d257346636292].

FossilOrigin-Name: ec0ae4030968c782af48d1c776351c14b2ada21d40aeb97915f33df30706e18f

* Fix compiler warning about shadowed variable in fts5_index.c.

FossilOrigin-Name: ee70e4c1c9c41617850228e48d8df44f105cf2fbbe789340ceca6f27ad6ce5eb

* Improved detection of corrupt JSONB in the jsonReturnFromBlob() function.

FossilOrigin-Name: b014736c1f80ccc46fb4b24ac04310a6ce5cb5b6653665efff366cb3bc742257

* Add ALWAYS() on branches added in [ec0ae4030968c782] that are always true.

FossilOrigin-Name: 451cef8609e96dd9244818adc5c6f240544694bcb4ae620e88f90e403e59d70f

* Rework the jsonEachPathLength() routine in json_tree() so that it is
less susceptible to problems due to goofy object labels.

FossilOrigin-Name: 858b76a00e8ff55215f7a2e6a4cd77fc4d4f98dea7224cd90488744f5ce246a4

* Different fix for the fts5 COMMIT-following-OOM problem first fixed by [fba3129d]. This one does not cause problems if an fts5 table is renamed and then dropped within the same transaction.

FossilOrigin-Name: d8c6b246944934a7a6e027b3f5b986fd64a19dd5c5c5175f4ea8586da59a6764

* Fix a problem with handling OOM and other errors in fts5 when querying tokendata=1 tables.

FossilOrigin-Name: bc911ab5953532956510c199be72b1d3c556f2d0ddbd7fc0ae6f5f917b337b48

* Fix a null-pointer dereference in fts5 tokendata=1 code.

FossilOrigin-Name: d69fa8f0504887f968d9a190ecb889ddb40bb1b56d0d4479f9819c106aec719b

* Avoid an assert() failure when querying an fts5vocab table that accesses a tokendata=1 fts5 table with corrupt %_data records.

FossilOrigin-Name: 386ba9e20423fb2f623d6adc9d3c310fb1b135f54a1dad15ef3b593d97886926

* Ensure an fts5vocab table never uses a special tokendata=1 merge cursor.

FossilOrigin-Name: 1e26510e83b40c9bd2e8bfa2a0e81f2cb915e78fed773204ef537683e48b61dc

* Avoid dropping an error code in new fts5 tokendata=1 code.

FossilOrigin-Name: a66596e33dc9aa4bab2ec3ff45546e1321d0a11bdc764f8381b315292ca92423

* Fix a harmless compiler warning about "confusing indentation".

FossilOrigin-Name: 34f9e9a8c4bea13f60f43062e25cd7d9422f2e7f5b371ed0ddadc9abeb3ca256

* Fix a potential problem RCStr access on a JsonString object that is not
really and RCStr.  Fuzzer/UBSAN find.

FossilOrigin-Name: d2f2174ce2cc89606034e158149a2d05fc3627ec4d5cdb772add7a2250f29d78

* Fix a harmless UBSAN warning.

FossilOrigin-Name: 1503cba6d17e9bade7a5c103ddd23241ff4741f9a2e3032ffe2987af243dae65

* Fix a potential use of uninitialized value in json_valid() with 2nd
argument of 8.

FossilOrigin-Name: fa102036fe46eeb71b7df3e265be1935ae5c78e0b939b08841bcfb8abadbc77a

* Work toward enhanced functionality for json_valid() with deep checking
of the JSONB (second argument has bit 0x08).

FossilOrigin-Name: c370d573198b151767f04e91bf8baa4ae0076751ae468c5709742a0b0ed16770

* Add SQLITE_TESTCTRL_VALIDATE_JSONB, which if enabled under SQLITE_DEBUG causes
cross-checking of generate JSONB.

FossilOrigin-Name: b410a4db74a650003539ffaaea18519d5159b504daac47db6a4874b730f40ac8

* Rename the new test-control to SQLITE_TESTCTRL_JSON_SELFCHECK.  Make it so
that the current value of the setting can be interrogated.

FossilOrigin-Name: 7aff1d9a4cb17ecd5abab21ab032f35a78741dd641ddd8cbcc85fc4a81a0707d

* Activate JSON_SELFCHECK within fuzzcheck.

FossilOrigin-Name: 4d14e733bb521aed65e98533969d2303738232ae87dab70fdf7962e6513195f5

* json_valid(*,8) allows minus-signs on hexadecimal literals.

FossilOrigin-Name: c0d7f4520d839a268b3fd2474d0897a9832aa608bd6238b3e287fabecf07a350

* json_error_position() now uses jsonValidityCheck() to find the approximate
position of an error in a JSONB blob.

FossilOrigin-Name: c3d60cf7028a333b825d5b89516945a73e0c158ac81d8bcc117d21bfd98602c8

* The json_error_position() function now reports an approximate byte offset
to the problem in a JSONB if there is a problem.

FossilOrigin-Name: 80d5d94dff6a2d2557039be3d7d47c1a6003c4b98defe0bd411acfeb963ad5dd

* Validity checking of text nodes in JSONB.

FossilOrigin-Name: fa5160687c2f970d407e8af73c246f7cd806bb4ce35f29a79ac534a8646a6c8e

* Improvements to JSONB validation - catch more cases where the input does
not conform to spec.

FossilOrigin-Name: be1864eac4eb75cc30bf98f73092c8608467f4bd956240df6a0cbea9f1e09e85

* Add NEVER to two unreachable branches in JSON.

FossilOrigin-Name: c96ebb086feb89341565cc52b970ae7799ce1327fe1ad4fc790f1b0dcaa6e229

* Worker1 Promiser API: when multiple db connections are active then use the requested connection instead of always the first-opened connection. Bug reported in [forum:894c330e7f23b177|forum post 894c330e7f23b177].

FossilOrigin-Name: 194276e18e0268829061c09317e7f9f527a703eb45f1755ff1dd30bd99dc1b68

* Fix the JSON object label comparison object so that it works correctly even
if the label ends with escaped whitespace.

FossilOrigin-Name: 4d5353cadd7b7c5f105bc197f3ec739e2d041472d6b3e939654c9f9cfc2749ae

* Improvements to UTF8 handling, and especially the handling of invalid UTF8,
in the JSON routines.

FossilOrigin-Name: 1b229c1101d6c384a30f343c5e47b471ab084b2d8e81170eb8f642afc1c67e3b

* Bug fix in the JSONB validator.
dbsqlfuzz ac6fa521a08609a642198e7decf64180e750b3c4

FossilOrigin-Name: 3e940a6a08b0a0434650cd3d8dd286e09ad8ab805b0a4d515e57bba5d3608577

* Avoid invoking sqlite3ExprColUsage() on an unresolve column reference.
dbsqlfuzz fc34aa62df4de103705d11b807074687ffafbda5.

FossilOrigin-Name: ac9314c0e335694b48c613145f5397247bb88c51806cd0dc3ed4ec306db4bbad

* In CLI, fix .read inability to open 2GB+ files on WIN32.

FossilOrigin-Name: 56c80a62d2e033d64ba5d545ae9cbe3ed7c9d046c0a3fafb6cfa2f0b562d1ef0

* Pass subtype information through the aggregate ORDER BY sorter for
aggregate functions that use subtype information.

FossilOrigin-Name: 3536f4030eab6d650b7ed729d2f71eb6cc3b5fbe16b4e96b99008d66522aaccb

* Improve the error message returned by an fts5 'rebuild' command on an external content table if there is a problem with the content table or view.

FossilOrigin-Name: 0fbf4b8a58fde1c187908934da6f59999b146f32e07ac255cc531c5c4d7007fd

* Fix harmless compiler warnings in JSON and FTS5.

FossilOrigin-Name: 90135efccfeb1046f002bfcbd8dfec9a1a3b40cbe1b5c714ae065b06368e354f

* Add assert()s to FTS5 to fix static analyzer warnings.

FossilOrigin-Name: 27d4a89a5ff96b7b7fc5dc9650e1269f7c7edf91de9b9aafce40be9ecc8b95e9

* Use SQLITE_STRICT_SUBTYPE=1 by default for the JNI and WASM builds unless they're explicitly built with SQLITE_STRICT_SUBTYPE=0.

FossilOrigin-Name: 990211357badf0ab08bd34cf6d25b58849d0fd8503e289c1839fc837a74e1909

* Correct --enable-sab flag in ext/wasm/GNUmakefile to fix a silent alhttpd args-parsing error.

FossilOrigin-Name: 7b9b757d872a31395b0f6454e2309a6a4664b8bdd8749f6a15371cbe72c05b60

* Avoid running the "no_mutex_try" tests with SQLITE_ENABLE_SETLK_TIMEOUT builds as part of the release test.

FossilOrigin-Name: 6b4e1344a28c213cbe8fb97f7f3f6688de93fb73ed96bf460ff74c959da1a712

* Do not run test script fts5origintest4.test with either "memsubsys1" or "mmap" permutations.

FossilOrigin-Name: 05a63d9603ef42cbee6dadff72d97583a9c78e549f70e9a808534d5c1ae7c28a

* Fix a new JSON test case so that it works even if SQLITE_OMIT_VIRTUALTABLE
is defined.

FossilOrigin-Name: b995aae510888a9746b46545d176a0885d4738e1f1bc0b7ad7937ed023efd7d6

* Add mention of --buildonly and --dryrun to the testrunner.tcl usage screen.

FossilOrigin-Name: 23b92d915c12ee768857e2c3c961832f390cad9b53b8bcfc2b97664baab25bb7

* Avoid expiring prepared statements in the middle of an integrity-check.

FossilOrigin-Name: 88beb48472da4667c0727c8ebabe046ea526450ff837fe789d041ed3f1ff105e

* In the count-of-view optimization, deferring freeing obsolete parts of the
parse tree, on the off-chance that some other part of the code might be
holding a pointer to those parts.

FossilOrigin-Name: da442578856c87137eb1677d9b13b7c1cf15828cc41d4756572b278060f69bae

* New test case based on Chromium bug report 1511689.

FossilOrigin-Name: 2c7ef4b4d215f99f8d6787adb64e2037ae96e5dd6cb49c8b81634249f5e1b328

* Enable SQLITE_STRICT_SUBTYPE for default builds of the shell, fuzzcheck,
and testfixture.

FossilOrigin-Name: 5a0c517ed7e46c0f8a3db752cf5b9f8010c60f35084606abe9e7c1c4f993b4a7

* Enhancements to the "randomjson.c" extension.  Automatically load that extension
into fuzzcheck.

FossilOrigin-Name: 70620405ab01d6a5d38bafa9ae175fd6e4eabaf2efb7854734278dafd7b05c99

* Enhancements to ext/misc/randomjson.c.

FossilOrigin-Name: a4e6d1f86f3a502e4170f5a90031e269e48363e95114a66b84d373e3ce0b2704

* Bug fix in the randomjson.c extension.

FossilOrigin-Name: 1f3a33df530dbe330ea8b14a69369b807b413b25a167d1a3938f8f0faf97cc91

* Ensure that all object labels for individual objects generated by
randomjson.c are unique.

FossilOrigin-Name: 29c46aca231b3f1e997ef306a5a651408185bf3ad09ab9fc1fe21ed18caa4d02

* Add randomjson.c to testfixture.  Use it for a new set of invariant tests
against JSON functions.

FossilOrigin-Name: f1c040606bfe784804134d8f3ca130908fad5212b47e3c32792baab977470943

* Ensure that the insert/delete size delta on JSONB objects in the JSON cache
are always set to zero.

FossilOrigin-Name: 4b4581668a908473dbf1322a3e98bc7cca122998c44518ea183af7f0d1ba9f95

* Fix JSON to JSONB translation so that it deals correctly with Infinity
and NaN.

FossilOrigin-Name: 178cb84f36bdb45ba17511900d6d8ea8dfa14912fc5bf7094a20348174a36c95

* Add NEVER() to an unfalsifiable branch.

FossilOrigin-Name: 9a0c67db366d38a0b0741f6a1ae333cf27cfe6f6b7c6eed94bdec9686f9f9f8a

* New JSON invariant test cases.

FossilOrigin-Name: a6a1367b0bf364b1a2e20e153c5f4a578624b8846f9ec0b7c9c3cba0ea2ec346

* Remove a stray comment in the JSON code.

FossilOrigin-Name: 6618bdf0679405b43911ea8cd94050b12a5dc469f3dfe4759ee3ff850a55229e

* Extra ALWAYS() macros to verify state in the sqlite3ExprCanBeNull() routine.

FossilOrigin-Name: be19b84c9f3fe127165809908add148dbe9a827a55608b0490de7e69b7f7f191

* Always make the sqlite_dbdata virtual table available in the CLI.

FossilOrigin-Name: e5fd3b32ad87586a7413570e568c9c1859a37a4f836cca074126471b125fb682

* When unable to resolve an identifier, change the Expr node into TK_NULL
rather than TK_COLUMN, to prevent any downstream misuse of the non-existent
column.  dbsqlfuzz 71869261db80a95e4733afa10ff5724bf3c78592.

FossilOrigin-Name: d2e6117e4f97ab98b01deb5fcad5520f8181d00bed8d904d34963c01d73df857

* Test case for the previous check-in.

FossilOrigin-Name: df5a07e1a5122e08c2fa6076ac08adb2820f997ee11dd88b84863666899dfb57

* Ignore COLLATE operators when determining whether the result of a subexpression
should be shallow-copied or deep-copied.

FossilOrigin-Name: 34ae36a45e814bed7c8340412c7ef3fc849b82357656d0eb5f0f805e59d846d0

* Add ALWAYS() and NEVER() on branches made unreachable by recent changes.

FossilOrigin-Name: c50e6c2ace49d0928b05cbfd877c621e9a0f77dc4e056ccb1dbe5cf118a00d00

* More precise computation of the size of data structures in the query planner.
Response to [forum:/forumpost/7d8685d49d|Forum post 7d8685d49d].

FossilOrigin-Name: 0c8d88e41167ea92341dd1129be01b596a73f46bdcd5b0dd931441a979c013d0

* Fix harmless compiler warning in the randomjson.c extension.

FossilOrigin-Name: debe7060b16669ada7304ffb9bf7616c8fa30bd286d8be871ed17fd6d64a3d4c

* On second thought, we don't really need sqlite_dbdata accessible to the CLI.

FossilOrigin-Name: 36fe6a61ef8fb393281a5e15119d716521219c7b971fbfd63bdea07d27a78ac9

* Remove redundant conditional from sqlite3ExprCanBeNull().

FossilOrigin-Name: 257f96a2d22c605885fa66220c28cf7dc5941c330bccee3f132b9e7b70d89d30

* In JSON - minor code cleanup and refactoring with a small size reduction
and performance increase.

FossilOrigin-Name: 215fabda38daecdbd38b1eca5a6aafbc61b6a36a8303f1d7164d5a1138e63134

* Avoid harmless integer overflow in pager status statistics gathering.
Response to [forum:/forumpost/7f4cdf23f9|forum post 7f4cdf23f9].

FossilOrigin-Name: 206d8c650d937bc700946c40a82a62ea6bc4a80e5f3fb42d0ae2968de25f0644

* Fix SQLITE_ENABLE_SETLK_TIMEOUT assert() statements in os_unix.c to avoid reading past the end of the unixShmNode.aMutex[] array.

FossilOrigin-Name: 029a05cd2928d43d81e4549cce5388c432e2c9e75e3fa0b2fe6e91021b2fb9ac

* Add internal core-developer-only documentation of the JSONB format.

FossilOrigin-Name: 4d30478863b2a60512010de9ec6e3099bfaf75d4afee20acec536713fe94334d

* Add a new comment to debugging output routine sqlite3WhereLoopPrint() to
remind us of what the various fields of the debug output mean.  No changes
to code.

FossilOrigin-Name: da5f34fd4052432b1ae27bb12e56b358cdc5c1282653d60ed0f0fe62f727e4ee

* Fix a usan complaint about signed integer overflow.

FossilOrigin-Name: e65907e0279f4814ec957f0790777d8b94a86926cd27c52442b311b27efc0185

* Update #ifdef checks in pager.c and util.c to account for [0462a2612d1fc1d0] to resolve the build problem reported in [forum:9819032aac|forum post 9819032aac].

FossilOrigin-Name: 0f22d809a1c6c80e381f6bcd931fe4ec36dca0e28d07ab4f4f7f83c813424f60

* Add the -fno-sanitize-recover=undefined to the sanitizer builds used for sdevtest and release testing. To ensure that any test that provokes undefined behaviour fails.

FossilOrigin-Name: 89563311adb0ab7c7a3eadb11c2e27fbca50c56fce8ca616628facbc00d72b88

* Change parameters on a debugging function to include "const".

FossilOrigin-Name: 94c3e1110c6590261bd30ba317fba4dd94023d69b81a94f4b216cce748fe7489

* Add debugging output routines sqlite3ShowWhereLoop(X) and
sqlite3ShowWhereLoopList(X) that can be invoked from a debugger to show
a summary of the content of a single WhereLoop object or a list of WhereLoop
objects.  No change in release builds.

FossilOrigin-Name: 5db30bcc338aac1cf081de2deec7e60749ae012e2b6f95ccf745623adb4a31dc

* Improvements to the query planner to address the inefficiency described
by [forum/forumpost/2568d1f6e6|forum post 2568d1f6e6].

FossilOrigin-Name: 72fcc12cda910a0e3f7875eb3d117b2a5608705c97703985427a02960f1ab5c5

* Avoid signed integer overflow during integrity_check of FTS5.

FossilOrigin-Name: 5937df3b25799eceaadfb04d7226c9995d44c8d8edb5ac3ad02af9d7e3570726

* Fix harmless compiler warnings associated with [5db30bcc338aac1c]

FossilOrigin-Name: e55d1c2333f35fc20615aa83a7843d08cae7945710a2156d44eee0cc37d90ade

* Remove an ALWAYS() added in [c50e6c2ace49d092] because it is sometimes false.
dbsqlfuzz c393a4f783d42efd9552772110aff7e5d937f15e.

FossilOrigin-Name: b9daf37e57cde12c4de271a2b1995e8e91b6411f8c2e8882e536241929609b3a

* Improved handling of malformed unicode within JSON strings.

FossilOrigin-Name: e252bdf5f5de26ba8e2bcc6b0ad94121ed6fc4d86c02fe4a2a058ada93747beb

* Ensure that the xColumnText(), xQueryPhrase() and xPhraseFirstColumn() APIs all return SQLITE_RANGE if they are passed a bad column or phrase number.

FossilOrigin-Name: 1a8a9b1c89519d265869251e8b6d3c5db733f0d3a7dea6c7962811a8f1157dff

* Fix a problem in the shell tool (not library) causing an out-of-bounds write if an ".open" command failed, then the user pressed ctrl-c to interrupt a query running on the substitute in-memory database.

FossilOrigin-Name: 026618b9e321576f616a32e41329066ba629814170c6cfeef35430343f5003f3

* Enhance the (undocumented, debug-only) json_parse() SQL function so that it
returns the text rendering of the JSONB parse of the input, rather than printing
the rendering on stdout.

FossilOrigin-Name: 056de8d551dcbdf1d162e2db15ed418fa9c786f900cd3972ef8a1dea3f4f3aa1

* Fix harmless compiler warnings in FTS5.

FossilOrigin-Name: 3cd5ef44e40570c357f913a9483fa1cd72e7f2827a5ed5826bff99febae213b1

* Performance improvement by unwinding a loop in jsonAppendString().

FossilOrigin-Name: 190ab3c08431a0ba24d76392eab251f5c1792add05e4ec780998b299208eca95

* Update fts5origintext4.test to work with SQLITE_DIRECT_OVERFLOW_READ.

FossilOrigin-Name: 15ed002aed12556aeb9bbe537c4ba839f0c95bac65a69d03401b37cc3fd11b92

* Enable SQLITE_DIRECT_OVERFLOW_READ unless it is specifically disabled using
the -DSQLITE_DIRECT_OVERFLOW_READ=0 compile-time option.

FossilOrigin-Name: 630604a4e604bfb36c31602917bfa8d42c10c82966d0819932bf8f827b9158b8

* Minor doc touchup in the JS bits.

FossilOrigin-Name: 8d2120c35425081e2158d6a8a6b083c4adf8d694046b2d98f5fd235520920432

* Use SQLITE_ENABLE_STAT4 in both the WASM and JNI builds.

FossilOrigin-Name: 99d11e6d0ae687ff6bac5119027f7b04d5e7185214e79cf8c56289cfa809b0f9

* WASM: various build cleanups and add initial infrastructure for a build which elides the oo1 API and its dependents (worker1 and promiser). Sidebar: an attempt was made to move generation of the build rules to an external script, but the mixed-mode make/script was even less legible than the $(eval) indirection going on in the makefile.

FossilOrigin-Name: 563d313163c02b398ae85b7c2ed231019a14e006726f09a7c1f294a58bf4363f

* JNI: move the ByteBuffer-using APIs from public to package visibility for the time being because they have UB-inducing possibilities which need to be worked out. Update test code to account for a change in custom FTS5 columntext() impls.

FossilOrigin-Name: dc501275fcfab3ad9b6ebbadf7588b225a9dd07a0abac5be83d96f15bfba99e9

* Extra steps taken to avoid using low-quality indexes in a query plan.
This branch accomplishes the same end as the nearby enhanced-stat1 branch,
but with much less change and hence less risk.

FossilOrigin-Name: c030e646262fee43a59b45fdc1630d972f8bf88ac3c142b6bdaf4cbb36695a4f

* Remove some unnecessary computations from ANALYZE so that ANALYZE runs with
fewer CPU cycles.  These changes were spotted while working on the nearby
enhanced-stat1 branch.  So even if enhanced-stat1 is abandoned, that effort
put into it will not have been in vain.

FossilOrigin-Name: 5527e8c4abb904b1a438ec1c353d4a960bf82faaf3a2c742af1df7c613850441

* Back out [99d11e6d0ae6] (enabling of STAT4 in WASM/JNI), per /chat discussion.

FossilOrigin-Name: cd7929ee2e2c305475fa5a4dff2edaccf90067126ef04a1c2714cf464925453f

* Update and clean up the in-makefile docs for ext/wasm.

FossilOrigin-Name: 7a7b295e6d7e95ee4a46cc42761895d11700ab295870c5a4380072bb4a5b7099

* Elaborate on the various build flavors used by ext/wasm/. Doc changes only.

FossilOrigin-Name: d489232aa492618d4c8e5817addb2323d0ca067742d7140216914239a66fb221

* Increase the default "max_page_count" to its theoretical maximum of
4294967294.

FossilOrigin-Name: ffb35f1784a4305b979a850485f57f56938104a3a03f4a7aececde92864c4879

* Fix a problem in fts5 caused by a COMMIT involving fts5 data that immediately follows a ROLLBACK TO that does not.

FossilOrigin-Name: 55c61f6a8d6a1bc79497b05669beac5c5397b06382bf24b6bec54845962d219b

* Adjust the sqlite3PagerDirectReadOk() routine (part of the
SQLITE_DIRECT_OVERFLOW_READ optimization) to use less code and to be
more easily testable.

FossilOrigin-Name: eed670ea2a9424f7df4eeb01c152fc38f7190a5e39aa891651b28dc91fcdc019

* Back out [b517a52fa36df0a0] which is no longer reachable due to early
error detection enhancements in [166e82dd20efbfd3].

FossilOrigin-Name: 704943e96f2620b99260667ac9922c2f72bc3e92e2dfe1d9c2a91c7b704564d9

* Update the sqldiff.exe utility program so that it uses the sqlite3_str
string interface, and so that it does console output using the
ext/consio extension.

FossilOrigin-Name: 4443b7e592da97d1cb1b3b79ed0559452d8057a33aba4d184c2fffbf200e05f5

* Enhance sqlite3_analyzer.exe so that it uses the ext/consio extension.

FossilOrigin-Name: 769de0b98e136e4a0945b80216d0c9583c1ccd9de69cb0494875c2300e172646

* Change a constant from decimal to hex to avoid a compiler warning on Mac.

FossilOrigin-Name: e3acb8a43ad544fd5b5341058276bd3b61b6bdb6b719790476a90e0de4320f90

* Convert the JSON functions to use lookaside memory allocation whenever
feasible, to avoid hitting the global memory allocator mutex.

FossilOrigin-Name: a79a244954f728596da3c0e28fa3b887258d1bd831f53881970f418f3fba84c7

* Fix a #ifdef in sqlite3_test_control() that was preventing builds with
SQLITE_OMIT_WSD.

FossilOrigin-Name: d546a9c94caf7408cc6e4530ec190d3a13fae09dc15b71b03d6369e02ee62abd

* Restructure some code to fix what appears to be a false-positive UBSAN warning.

FossilOrigin-Name: fe952c12903ea2150880c8bb57cda2efc00ce9fa801568a68c619e0745f30567

* Avoid errors with SQLITE_OMIT_VIRTUALTABLE builds in json106.test and unionall.test.

FossilOrigin-Name: 90e8a233549a2d31e6959ce3fec927693b772ab3c0abce65e81d7350d2ca5cc6

* Update extension ext/misc/totext.c to avoid both ubsan warnings and dubious real->integer conversions.

FossilOrigin-Name: c626aa108a7a30cef54af8d93ac9e45749568ed38e4e06623a6bad6b4bf6e8ec

* Update JSON performance testing procedures for clarity and to describe how to
do performance testing of JSONB.

FossilOrigin-Name: b115b4f75bc7c4e6d9bab5edf13297f27a36f30083c80d2c502b01208da5dfc0

* Ensure that SQLITE_PROTOCOL is not returned too early when a SQLITE_ENABLE_SETLK_TIMEOUT build fails to open a transaction on a wal mode database in cases where blocking locks are not being used.

FossilOrigin-Name: b934a33671d8a0190082ad7e5e68c78fe0c558d102404eafc1de26e4e7d65b92

* Updates to RTREE to facility testing.

FossilOrigin-Name: 7a5b42ff74882c58493dc8b710fde73d4ff251f5d42271d84be73ceaabc01698

* Remove an ALWAYS() from RTREE.  Dbsqlfuzz found a way to make it false.

FossilOrigin-Name: 40f0a29e6dd90fcb969d7c0e49728ba0ee8f31d9e8f502b9a21469620a8ad283

* Minor change to os_unix.c to facilitate 100% MC/DC testing.

FossilOrigin-Name: 0dfa7b4da134db281c3c4eddb4569c53a450f955f0af2f410e13db801aff4ea2

* Automatically turn off DEFENSIVE mode in the shell tool when executing scripts generated by the ".dump" command against an empty database. Add a warning to the top of generated ".dump" scripts that populate virtual tables.

FossilOrigin-Name: 6e9e96b7e7afb9420110f4b93d10b945c9eadfde5e9c81e59ae9ee8167e75707

* Fix date on new file shell9.test.

FossilOrigin-Name: c82da712113d5dcd63b764dbc68842026989627abc840acb4a33f3a4972b832a

* Improved resolution of unqualified names in the REINDEX command.
[forum:/info/74cd0ceabd|Forum thread 74cd0ceabd].

FossilOrigin-Name: 97709ce2a1f5ae05495e412ca27108048e5b8a63a1e3bca4be13933f7527da7b

* Put an SQLITE_ENABLE_SETLK_TIMEOUT branch inside the appropriate ifdef with
an assert on the else since the condition is always false if SETLK_TIMEOUT
is not available.

FossilOrigin-Name: d81e7a036ac5d70b6a6ee6ab7d81e041c1f5fc04b70bcee47e203d521caf7e93

* In fts5, flush the contents of the in-memory hash table whenever the secure-delete option is toggled. This prevents spurious corruption reports under some circumstances.

FossilOrigin-Name: ccf552319a62bfb329820a3bc1f490bacbaa6e90694a257fc65a568a605542c3

* Fix a comment in sessions.  No functional changes.
[forum:/forumpost/8c20dc935b|Forum post 8c20dc935b].

FossilOrigin-Name: b0eb6d3628c1f70399a22d9fd3b79a796bc343adfeba50515440db609565961a

* Have the shell tool automatically enable SQLITE_CONFIG_DQS_DDL when executing a ".dump" script against an empty db.

FossilOrigin-Name: f47a5f4e0ce078e6cc1183e6cbb3c4013af379b496efae94863a42e5c39928ed

* Version 3.45.0

FossilOrigin-Name: 1066602b2b1976fe58b5150777cced894af17c803e068f5918390d6915b46e1d

* wasm build: reformulate an awk invocation to account for awks which do not support the -e flag. Problem reported on the forum via a docker-hosted build.

FossilOrigin-Name: 90dd51153fd0a6197e2ee49b5492ad120f0bfc324b60651f3d4f47c286887b46

* When backing out a character in a constructed string in JSON, first make sure
the string has not been reset by on OOM.

FossilOrigin-Name: 950bf9fe7829864e0abe6d71ca0495f346feb5d7943d76c95e55a6b86ea855da

* Ensure that the xIntegrity methods of fts3 and fts5 work on read-only databases.

FossilOrigin-Name: e79b97369fa740f62f695057d4a2cf8dae48a683982ec879f04a19039c9cb418

* When a JSON input is a blob, but it looks like valid JSON when cast to text,
then accept it as valid JSON.  This replicates a long-standing bug in the
behavior of JSON routines, and thus avoids breaking legacy apps.

FossilOrigin-Name: 4c2c1b97dce46a279846380c937ac6de5c367927c6843516641eead7ea6db472

* Bump the version number to 3.45.1

FossilOrigin-Name: 54d34edb89430b266221b7e6eea0afbd2c9dafbe774344469473abc8ad1e13fd

* Fix harmless "unused parameter" compiler warning in the new fts3IntegrityMethod
implementation.

FossilOrigin-Name: 9d459f6b50fb6f995e6284a0815c5e211cacac44aad0b96bf01ba68af97f51fc

* In os_unix.c and os_win.c, do not allow xFetch() to return a pointer to a page buffer that is right at the end of the mapped region - if the database is corrupted in a specific way such a page buffer might be overread by several bytes.

FossilOrigin-Name: d131cab652ac11795322af13d0b330e7e44ab91587a1a3e73fe7b9a14b2dd531

* Slight adjustment to test results for Windows in mmap1.test due to
the previous check-in.

FossilOrigin-Name: a8043eaed899285b5cf4aab0c23c3dabb8975910c353cb579fd1f1655db390f6

* Apply the same fix found in [99057383acc8f920] to descending scans.

FossilOrigin-Name: 593d6a1c2e9256d797f160e867278414e882a3d04d7fea269bea86965eaa7576

* Automatically disable the DISTINCT optimization during query planning if the
ORDER BY clause exceeds 63 terms.

FossilOrigin-Name: 6edbdcc02d18727f68f0236e15dde4ecfc77e6f452b522eb4e1e895929b1fb63

* When rendering JSONB back into text JSON, report an error if a zero-length
integer or floating-point node is encountered.  Otherwise, if the node occurs
at the very end of the JSONB, the rendering logic might read one byte past
the end of the initialized part of the BLOB byte array.  OSSFuzz 66284.

FossilOrigin-Name: 3ab08ac75d97ffd9920f5c924362a4819560b40faa8a4f9100068057f5fa420a

* Avoid a potential buffer overread when handling corrupt json blobs.

FossilOrigin-Name: ac402cc551b2cbe3f8fbbc9c711a04942eab5eeb9d2f4a394e9370d2380427b5

* Detect malformed nested JSONB earlier and stop rendering to avoid long
delays.

FossilOrigin-Name: ab40e282465c989bf249453d7c6f60072a38b691f579411cdf9aad234b20f0f7

* Version 3.45.1

FossilOrigin-Name: e876e51a0ed5c5b3126f52e532044363a014bc594cfefa87ffb5b82257cc467a

---------

Co-authored-by: drh <>
Co-authored-by: dan <Dan Kennedy>
Co-authored-by: stephan <stephan@noemail.net>
Co-authored-by: larrybr <larrybr@noemail.net>
2024-07-25 08:55:22 +00:00

3258 lines
91 KiB
C

/*
** 2014 May 31
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
*/
#include "fts5Int.h"
#include "fts5parse.h"
#ifndef SQLITE_FTS5_MAX_EXPR_DEPTH
# define SQLITE_FTS5_MAX_EXPR_DEPTH 256
#endif
/*
** All token types in the generated fts5parse.h file are greater than 0.
*/
#define FTS5_EOF 0
#define FTS5_LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32))
typedef struct Fts5ExprTerm Fts5ExprTerm;
/*
** Functions generated by lemon from fts5parse.y.
*/
void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64));
void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*));
void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*);
#ifndef NDEBUG
#include <stdio.h>
void sqlite3Fts5ParserTrace(FILE*, char*);
#endif
int sqlite3Fts5ParserFallback(int);
struct Fts5Expr {
Fts5Index *pIndex;
Fts5Config *pConfig;
Fts5ExprNode *pRoot;
int bDesc; /* Iterate in descending rowid order */
int nPhrase; /* Number of phrases in expression */
Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */
};
/*
** eType:
** Expression node type. Always one of:
**
** FTS5_AND (nChild, apChild valid)
** FTS5_OR (nChild, apChild valid)
** FTS5_NOT (nChild, apChild valid)
** FTS5_STRING (pNear valid)
** FTS5_TERM (pNear valid)
**
** iHeight:
** Distance from this node to furthest leaf. This is always 0 for nodes
** of type FTS5_STRING and FTS5_TERM. For all other nodes it is one
** greater than the largest child value.
*/
struct Fts5ExprNode {
int eType; /* Node type */
int bEof; /* True at EOF */
int bNomatch; /* True if entry is not a match */
int iHeight; /* Distance to tree leaf nodes */
/* Next method for this node. */
int (*xNext)(Fts5Expr*, Fts5ExprNode*, int, i64);
i64 iRowid; /* Current rowid */
Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */
/* Child nodes. For a NOT node, this array always contains 2 entries. For
** AND or OR nodes, it contains 2 or more entries. */
int nChild; /* Number of child nodes */
Fts5ExprNode *apChild[1]; /* Array of child nodes */
};
#define Fts5NodeIsString(p) ((p)->eType==FTS5_TERM || (p)->eType==FTS5_STRING)
/*
** Invoke the xNext method of an Fts5ExprNode object. This macro should be
** used as if it has the same signature as the xNext() methods themselves.
*/
#define fts5ExprNodeNext(a,b,c,d) (b)->xNext((a), (b), (c), (d))
/*
** An instance of the following structure represents a single search term
** or term prefix.
*/
struct Fts5ExprTerm {
u8 bPrefix; /* True for a prefix term */
u8 bFirst; /* True if token must be first in column */
char *pTerm; /* Term data */
int nQueryTerm; /* Effective size of term in bytes */
int nFullTerm; /* Size of term in bytes incl. tokendata */
Fts5IndexIter *pIter; /* Iterator for this term */
Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */
};
/*
** A phrase. One or more terms that must appear in a contiguous sequence
** within a document for it to match.
*/
struct Fts5ExprPhrase {
Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */
Fts5Buffer poslist; /* Current position list */
int nTerm; /* Number of entries in aTerm[] */
Fts5ExprTerm aTerm[1]; /* Terms that make up this phrase */
};
/*
** One or more phrases that must appear within a certain token distance of
** each other within each matching document.
*/
struct Fts5ExprNearset {
int nNear; /* NEAR parameter */
Fts5Colset *pColset; /* Columns to search (NULL -> all columns) */
int nPhrase; /* Number of entries in aPhrase[] array */
Fts5ExprPhrase *apPhrase[1]; /* Array of phrase pointers */
};
/*
** Parse context.
*/
struct Fts5Parse {
Fts5Config *pConfig;
char *zErr;
int rc;
int nPhrase; /* Size of apPhrase array */
Fts5ExprPhrase **apPhrase; /* Array of all phrases */
Fts5ExprNode *pExpr; /* Result of a successful parse */
int bPhraseToAnd; /* Convert "a+b" to "a AND b" */
};
/*
** Check that the Fts5ExprNode.iHeight variables are set correctly in
** the expression tree passed as the only argument.
*/
#ifndef NDEBUG
static void assert_expr_depth_ok(int rc, Fts5ExprNode *p){
if( rc==SQLITE_OK ){
if( p->eType==FTS5_TERM || p->eType==FTS5_STRING || p->eType==0 ){
assert( p->iHeight==0 );
}else{
int ii;
int iMaxChild = 0;
for(ii=0; ii<p->nChild; ii++){
Fts5ExprNode *pChild = p->apChild[ii];
iMaxChild = MAX(iMaxChild, pChild->iHeight);
assert_expr_depth_ok(SQLITE_OK, pChild);
}
assert( p->iHeight==iMaxChild+1 );
}
}
}
#else
# define assert_expr_depth_ok(rc, p)
#endif
void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){
va_list ap;
va_start(ap, zFmt);
if( pParse->rc==SQLITE_OK ){
assert( pParse->zErr==0 );
pParse->zErr = sqlite3_vmprintf(zFmt, ap);
pParse->rc = SQLITE_ERROR;
}
va_end(ap);
}
static int fts5ExprIsspace(char t){
return t==' ' || t=='\t' || t=='\n' || t=='\r';
}
/*
** Read the first token from the nul-terminated string at *pz.
*/
static int fts5ExprGetToken(
Fts5Parse *pParse,
const char **pz, /* IN/OUT: Pointer into buffer */
Fts5Token *pToken
){
const char *z = *pz;
int tok;
/* Skip past any whitespace */
while( fts5ExprIsspace(*z) ) z++;
pToken->p = z;
pToken->n = 1;
switch( *z ){
case '(': tok = FTS5_LP; break;
case ')': tok = FTS5_RP; break;
case '{': tok = FTS5_LCP; break;
case '}': tok = FTS5_RCP; break;
case ':': tok = FTS5_COLON; break;
case ',': tok = FTS5_COMMA; break;
case '+': tok = FTS5_PLUS; break;
case '*': tok = FTS5_STAR; break;
case '-': tok = FTS5_MINUS; break;
case '^': tok = FTS5_CARET; break;
case '\0': tok = FTS5_EOF; break;
case '"': {
const char *z2;
tok = FTS5_STRING;
for(z2=&z[1]; 1; z2++){
if( z2[0]=='"' ){
z2++;
if( z2[0]!='"' ) break;
}
if( z2[0]=='\0' ){
sqlite3Fts5ParseError(pParse, "unterminated string");
return FTS5_EOF;
}
}
pToken->n = (z2 - z);
break;
}
default: {
const char *z2;
if( sqlite3Fts5IsBareword(z[0])==0 ){
sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z);
return FTS5_EOF;
}
tok = FTS5_STRING;
for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
pToken->n = (z2 - z);
if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR;
if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT;
if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND;
break;
}
}
*pz = &pToken->p[pToken->n];
return tok;
}
static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc64((sqlite3_int64)t);}
static void fts5ParseFree(void *p){ sqlite3_free(p); }
int sqlite3Fts5ExprNew(
Fts5Config *pConfig, /* FTS5 Configuration */
int bPhraseToAnd,
int iCol,
const char *zExpr, /* Expression text */
Fts5Expr **ppNew,
char **pzErr
){
Fts5Parse sParse;
Fts5Token token;
const char *z = zExpr;
int t; /* Next token type */
void *pEngine;
Fts5Expr *pNew;
*ppNew = 0;
*pzErr = 0;
memset(&sParse, 0, sizeof(sParse));
sParse.bPhraseToAnd = bPhraseToAnd;
pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc);
if( pEngine==0 ){ return SQLITE_NOMEM; }
sParse.pConfig = pConfig;
do {
t = fts5ExprGetToken(&sParse, &z, &token);
sqlite3Fts5Parser(pEngine, t, token, &sParse);
}while( sParse.rc==SQLITE_OK && t!=FTS5_EOF );
sqlite3Fts5ParserFree(pEngine, fts5ParseFree);
assert_expr_depth_ok(sParse.rc, sParse.pExpr);
/* If the LHS of the MATCH expression was a user column, apply the
** implicit column-filter. */
if( iCol<pConfig->nCol && sParse.pExpr && sParse.rc==SQLITE_OK ){
int n = sizeof(Fts5Colset);
Fts5Colset *pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&sParse.rc, n);
if( pColset ){
pColset->nCol = 1;
pColset->aiCol[0] = iCol;
sqlite3Fts5ParseSetColset(&sParse, sParse.pExpr, pColset);
}
}
assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 );
if( sParse.rc==SQLITE_OK ){
*ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr));
if( pNew==0 ){
sParse.rc = SQLITE_NOMEM;
sqlite3Fts5ParseNodeFree(sParse.pExpr);
}else{
if( !sParse.pExpr ){
const int nByte = sizeof(Fts5ExprNode);
pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&sParse.rc, nByte);
if( pNew->pRoot ){
pNew->pRoot->bEof = 1;
}
}else{
pNew->pRoot = sParse.pExpr;
}
pNew->pIndex = 0;
pNew->pConfig = pConfig;
pNew->apExprPhrase = sParse.apPhrase;
pNew->nPhrase = sParse.nPhrase;
pNew->bDesc = 0;
sParse.apPhrase = 0;
}
}else{
sqlite3Fts5ParseNodeFree(sParse.pExpr);
}
sqlite3_free(sParse.apPhrase);
*pzErr = sParse.zErr;
return sParse.rc;
}
/*
** Assuming that buffer z is at least nByte bytes in size and contains a
** valid utf-8 string, return the number of characters in the string.
*/
static int fts5ExprCountChar(const char *z, int nByte){
int nRet = 0;
int ii;
for(ii=0; ii<nByte; ii++){
if( (z[ii] & 0xC0)!=0x80 ) nRet++;
}
return nRet;
}
/*
** This function is only called when using the special 'trigram' tokenizer.
** Argument zText contains the text of a LIKE or GLOB pattern matched
** against column iCol. This function creates and compiles an FTS5 MATCH
** expression that will match a superset of the rows matched by the LIKE or
** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error
** code.
*/
int sqlite3Fts5ExprPattern(
Fts5Config *pConfig, int bGlob, int iCol, const char *zText, Fts5Expr **pp
){
i64 nText = strlen(zText);
char *zExpr = (char*)sqlite3_malloc64(nText*4 + 1);
int rc = SQLITE_OK;
if( zExpr==0 ){
rc = SQLITE_NOMEM;
}else{
char aSpec[3];
int iOut = 0;
int i = 0;
int iFirst = 0;
if( bGlob==0 ){
aSpec[0] = '_';
aSpec[1] = '%';
aSpec[2] = 0;
}else{
aSpec[0] = '*';
aSpec[1] = '?';
aSpec[2] = '[';
}
while( i<=nText ){
if( i==nText
|| zText[i]==aSpec[0] || zText[i]==aSpec[1] || zText[i]==aSpec[2]
){
if( fts5ExprCountChar(&zText[iFirst], i-iFirst)>=3 ){
int jj;
zExpr[iOut++] = '"';
for(jj=iFirst; jj<i; jj++){
zExpr[iOut++] = zText[jj];
if( zText[jj]=='"' ) zExpr[iOut++] = '"';
}
zExpr[iOut++] = '"';
zExpr[iOut++] = ' ';
}
if( zText[i]==aSpec[2] ){
i += 2;
if( zText[i-1]=='^' ) i++;
while( i<nText && zText[i]!=']' ) i++;
}
iFirst = i+1;
}
i++;
}
if( iOut>0 ){
int bAnd = 0;
if( pConfig->eDetail!=FTS5_DETAIL_FULL ){
bAnd = 1;
if( pConfig->eDetail==FTS5_DETAIL_NONE ){
iCol = pConfig->nCol;
}
}
zExpr[iOut] = '\0';
rc = sqlite3Fts5ExprNew(pConfig, bAnd, iCol, zExpr, pp,pConfig->pzErrmsg);
}else{
*pp = 0;
}
sqlite3_free(zExpr);
}
return rc;
}
/*
** Free the expression node object passed as the only argument.
*/
void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){
if( p ){
int i;
for(i=0; i<p->nChild; i++){
sqlite3Fts5ParseNodeFree(p->apChild[i]);
}
sqlite3Fts5ParseNearsetFree(p->pNear);
sqlite3_free(p);
}
}
/*
** Free the expression object passed as the only argument.
*/
void sqlite3Fts5ExprFree(Fts5Expr *p){
if( p ){
sqlite3Fts5ParseNodeFree(p->pRoot);
sqlite3_free(p->apExprPhrase);
sqlite3_free(p);
}
}
int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2){
Fts5Parse sParse;
memset(&sParse, 0, sizeof(sParse));
if( *pp1 && p2 ){
Fts5Expr *p1 = *pp1;
int nPhrase = p1->nPhrase + p2->nPhrase;
p1->pRoot = sqlite3Fts5ParseNode(&sParse, FTS5_AND, p1->pRoot, p2->pRoot,0);
p2->pRoot = 0;
if( sParse.rc==SQLITE_OK ){
Fts5ExprPhrase **ap = (Fts5ExprPhrase**)sqlite3_realloc(
p1->apExprPhrase, nPhrase * sizeof(Fts5ExprPhrase*)
);
if( ap==0 ){
sParse.rc = SQLITE_NOMEM;
}else{
int i;
memmove(&ap[p2->nPhrase], ap, p1->nPhrase*sizeof(Fts5ExprPhrase*));
for(i=0; i<p2->nPhrase; i++){
ap[i] = p2->apExprPhrase[i];
}
p1->nPhrase = nPhrase;
p1->apExprPhrase = ap;
}
}
sqlite3_free(p2->apExprPhrase);
sqlite3_free(p2);
}else if( p2 ){
*pp1 = p2;
}
return sParse.rc;
}
/*
** Argument pTerm must be a synonym iterator. Return the current rowid
** that it points to.
*/
static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){
i64 iRet = 0;
int bRetValid = 0;
Fts5ExprTerm *p;
assert( pTerm );
assert( pTerm->pSynonym );
assert( bDesc==0 || bDesc==1 );
for(p=pTerm; p; p=p->pSynonym){
if( 0==sqlite3Fts5IterEof(p->pIter) ){
i64 iRowid = p->pIter->iRowid;
if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){
iRet = iRowid;
bRetValid = 1;
}
}
}
if( pbEof && bRetValid==0 ) *pbEof = 1;
return iRet;
}
/*
** Argument pTerm must be a synonym iterator.
*/
static int fts5ExprSynonymList(
Fts5ExprTerm *pTerm,
i64 iRowid,
Fts5Buffer *pBuf, /* Use this buffer for space if required */
u8 **pa, int *pn
){
Fts5PoslistReader aStatic[4];
Fts5PoslistReader *aIter = aStatic;
int nIter = 0;
int nAlloc = 4;
int rc = SQLITE_OK;
Fts5ExprTerm *p;
assert( pTerm->pSynonym );
for(p=pTerm; p; p=p->pSynonym){
Fts5IndexIter *pIter = p->pIter;
if( sqlite3Fts5IterEof(pIter)==0 && pIter->iRowid==iRowid ){
if( pIter->nData==0 ) continue;
if( nIter==nAlloc ){
sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc64(nByte);
if( aNew==0 ){
rc = SQLITE_NOMEM;
goto synonym_poslist_out;
}
memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter);
nAlloc = nAlloc*2;
if( aIter!=aStatic ) sqlite3_free(aIter);
aIter = aNew;
}
sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]);
assert( aIter[nIter].bEof==0 );
nIter++;
}
}
if( nIter==1 ){
*pa = (u8*)aIter[0].a;
*pn = aIter[0].n;
}else{
Fts5PoslistWriter writer = {0};
i64 iPrev = -1;
fts5BufferZero(pBuf);
while( 1 ){
int i;
i64 iMin = FTS5_LARGEST_INT64;
for(i=0; i<nIter; i++){
if( aIter[i].bEof==0 ){
if( aIter[i].iPos==iPrev ){
if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue;
}
if( aIter[i].iPos<iMin ){
iMin = aIter[i].iPos;
}
}
}
if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break;
rc = sqlite3Fts5PoslistWriterAppend(pBuf, &writer, iMin);
iPrev = iMin;
}
if( rc==SQLITE_OK ){
*pa = pBuf->p;
*pn = pBuf->n;
}
}
synonym_poslist_out:
if( aIter!=aStatic ) sqlite3_free(aIter);
return rc;
}
/*
** All individual term iterators in pPhrase are guaranteed to be valid and
** pointing to the same rowid when this function is called. This function
** checks if the current rowid really is a match, and if so populates
** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch
** is set to true if this is really a match, or false otherwise.
**
** SQLITE_OK is returned if an error occurs, or an SQLite error code
** otherwise. It is not considered an error code if the current rowid is
** not a match.
*/
static int fts5ExprPhraseIsMatch(
Fts5ExprNode *pNode, /* Node pPhrase belongs to */
Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */
int *pbMatch /* OUT: Set to true if really a match */
){
Fts5PoslistWriter writer = {0};
Fts5PoslistReader aStatic[4];
Fts5PoslistReader *aIter = aStatic;
int i;
int rc = SQLITE_OK;
int bFirst = pPhrase->aTerm[0].bFirst;
fts5BufferZero(&pPhrase->poslist);
/* If the aStatic[] array is not large enough, allocate a large array
** using sqlite3_malloc(). This approach could be improved upon. */
if( pPhrase->nTerm>ArraySize(aStatic) ){
sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
aIter = (Fts5PoslistReader*)sqlite3_malloc64(nByte);
if( !aIter ) return SQLITE_NOMEM;
}
memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm);
/* Initialize a term iterator for each term in the phrase */
for(i=0; i<pPhrase->nTerm; i++){
Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
int n = 0;
int bFlag = 0;
u8 *a = 0;
if( pTerm->pSynonym ){
Fts5Buffer buf = {0, 0, 0};
rc = fts5ExprSynonymList(pTerm, pNode->iRowid, &buf, &a, &n);
if( rc ){
sqlite3_free(a);
goto ismatch_out;
}
if( a==buf.p ) bFlag = 1;
}else{
a = (u8*)pTerm->pIter->pData;
n = pTerm->pIter->nData;
}
sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
aIter[i].bFlag = (u8)bFlag;
if( aIter[i].bEof ) goto ismatch_out;
}
while( 1 ){
int bMatch;
i64 iPos = aIter[0].iPos;
do {
bMatch = 1;
for(i=0; i<pPhrase->nTerm; i++){
Fts5PoslistReader *pPos = &aIter[i];
i64 iAdj = iPos + i;
if( pPos->iPos!=iAdj ){
bMatch = 0;
while( pPos->iPos<iAdj ){
if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out;
}
if( pPos->iPos>iAdj ) iPos = pPos->iPos-i;
}
}
}while( bMatch==0 );
/* Append position iPos to the output */
if( bFirst==0 || FTS5_POS2OFFSET(iPos)==0 ){
rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos);
if( rc!=SQLITE_OK ) goto ismatch_out;
}
for(i=0; i<pPhrase->nTerm; i++){
if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out;
}
}
ismatch_out:
*pbMatch = (pPhrase->poslist.n>0);
for(i=0; i<pPhrase->nTerm; i++){
if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a);
}
if( aIter!=aStatic ) sqlite3_free(aIter);
return rc;
}
typedef struct Fts5LookaheadReader Fts5LookaheadReader;
struct Fts5LookaheadReader {
const u8 *a; /* Buffer containing position list */
int n; /* Size of buffer a[] in bytes */
int i; /* Current offset in position list */
i64 iPos; /* Current position */
i64 iLookahead; /* Next position */
};
#define FTS5_LOOKAHEAD_EOF (((i64)1) << 62)
static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){
p->iPos = p->iLookahead;
if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){
p->iLookahead = FTS5_LOOKAHEAD_EOF;
}
return (p->iPos==FTS5_LOOKAHEAD_EOF);
}
static int fts5LookaheadReaderInit(
const u8 *a, int n, /* Buffer to read position list from */
Fts5LookaheadReader *p /* Iterator object to initialize */
){
memset(p, 0, sizeof(Fts5LookaheadReader));
p->a = a;
p->n = n;
fts5LookaheadReaderNext(p);
return fts5LookaheadReaderNext(p);
}
typedef struct Fts5NearTrimmer Fts5NearTrimmer;
struct Fts5NearTrimmer {
Fts5LookaheadReader reader; /* Input iterator */
Fts5PoslistWriter writer; /* Writer context */
Fts5Buffer *pOut; /* Output poslist */
};
/*
** The near-set object passed as the first argument contains more than
** one phrase. All phrases currently point to the same row. The
** Fts5ExprPhrase.poslist buffers are populated accordingly. This function
** tests if the current row contains instances of each phrase sufficiently
** close together to meet the NEAR constraint. Non-zero is returned if it
** does, or zero otherwise.
**
** If in/out parameter (*pRc) is set to other than SQLITE_OK when this
** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM)
** occurs within this function (*pRc) is set accordingly before returning.
** The return value is undefined in both these cases.
**
** If no error occurs and non-zero (a match) is returned, the position-list
** of each phrase object is edited to contain only those entries that
** meet the constraint before returning.
*/
static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){
Fts5NearTrimmer aStatic[4];
Fts5NearTrimmer *a = aStatic;
Fts5ExprPhrase **apPhrase = pNear->apPhrase;
int i;
int rc = *pRc;
int bMatch;
assert( pNear->nPhrase>1 );
/* If the aStatic[] array is not large enough, allocate a large array
** using sqlite3_malloc(). This approach could be improved upon. */
if( pNear->nPhrase>ArraySize(aStatic) ){
sqlite3_int64 nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase;
a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte);
}else{
memset(aStatic, 0, sizeof(aStatic));
}
if( rc!=SQLITE_OK ){
*pRc = rc;
return 0;
}
/* Initialize a lookahead iterator for each phrase. After passing the
** buffer and buffer size to the lookaside-reader init function, zero
** the phrase poslist buffer. The new poslist for the phrase (containing
** the same entries as the original with some entries removed on account
** of the NEAR constraint) is written over the original even as it is
** being read. This is safe as the entries for the new poslist are a
** subset of the old, so it is not possible for data yet to be read to
** be overwritten. */
for(i=0; i<pNear->nPhrase; i++){
Fts5Buffer *pPoslist = &apPhrase[i]->poslist;
fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader);
pPoslist->n = 0;
a[i].pOut = pPoslist;
}
while( 1 ){
int iAdv;
i64 iMin;
i64 iMax;
/* This block advances the phrase iterators until they point to a set of
** entries that together comprise a match. */
iMax = a[0].reader.iPos;
do {
bMatch = 1;
for(i=0; i<pNear->nPhrase; i++){
Fts5LookaheadReader *pPos = &a[i].reader;
iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear;
if( pPos->iPos<iMin || pPos->iPos>iMax ){
bMatch = 0;
while( pPos->iPos<iMin ){
if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out;
}
if( pPos->iPos>iMax ) iMax = pPos->iPos;
}
}
}while( bMatch==0 );
/* Add an entry to each output position list */
for(i=0; i<pNear->nPhrase; i++){
i64 iPos = a[i].reader.iPos;
Fts5PoslistWriter *pWriter = &a[i].writer;
if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){
sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos);
}
}
iAdv = 0;
iMin = a[0].reader.iLookahead;
for(i=0; i<pNear->nPhrase; i++){
if( a[i].reader.iLookahead < iMin ){
iMin = a[i].reader.iLookahead;
iAdv = i;
}
}
if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out;
}
ismatch_out: {
int bRet = a[0].pOut->n>0;
*pRc = rc;
if( a!=aStatic ) sqlite3_free(a);
return bRet;
}
}
/*
** Advance iterator pIter until it points to a value equal to or laster
** than the initial value of *piLast. If this means the iterator points
** to a value laster than *piLast, update *piLast to the new lastest value.
**
** If the iterator reaches EOF, set *pbEof to true before returning. If
** an error occurs, set *pRc to an error code. If either *pbEof or *pRc
** are set, return a non-zero value. Otherwise, return zero.
*/
static int fts5ExprAdvanceto(
Fts5IndexIter *pIter, /* Iterator to advance */
int bDesc, /* True if iterator is "rowid DESC" */
i64 *piLast, /* IN/OUT: Lastest rowid seen so far */
int *pRc, /* OUT: Error code */
int *pbEof /* OUT: Set to true if EOF */
){
i64 iLast = *piLast;
i64 iRowid;
iRowid = pIter->iRowid;
if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
int rc = sqlite3Fts5IterNextFrom(pIter, iLast);
if( rc || sqlite3Fts5IterEof(pIter) ){
*pRc = rc;
*pbEof = 1;
return 1;
}
iRowid = pIter->iRowid;
assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) );
}
*piLast = iRowid;
return 0;
}
static int fts5ExprSynonymAdvanceto(
Fts5ExprTerm *pTerm, /* Term iterator to advance */
int bDesc, /* True if iterator is "rowid DESC" */
i64 *piLast, /* IN/OUT: Lastest rowid seen so far */
int *pRc /* OUT: Error code */
){
int rc = SQLITE_OK;
i64 iLast = *piLast;
Fts5ExprTerm *p;
int bEof = 0;
for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){
if( sqlite3Fts5IterEof(p->pIter)==0 ){
i64 iRowid = p->pIter->iRowid;
if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
rc = sqlite3Fts5IterNextFrom(p->pIter, iLast);
}
}
}
if( rc!=SQLITE_OK ){
*pRc = rc;
bEof = 1;
}else{
*piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof);
}
return bEof;
}
static int fts5ExprNearTest(
int *pRc,
Fts5Expr *pExpr, /* Expression that pNear is a part of */
Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */
){
Fts5ExprNearset *pNear = pNode->pNear;
int rc = *pRc;
if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL ){
Fts5ExprTerm *pTerm;
Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
pPhrase->poslist.n = 0;
for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
Fts5IndexIter *pIter = pTerm->pIter;
if( sqlite3Fts5IterEof(pIter)==0 ){
if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){
pPhrase->poslist.n = 1;
}
}
}
return pPhrase->poslist.n;
}else{
int i;
/* Check that each phrase in the nearset matches the current row.
** Populate the pPhrase->poslist buffers at the same time. If any
** phrase is not a match, break out of the loop early. */
for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym
|| pNear->pColset || pPhrase->aTerm[0].bFirst
){
int bMatch = 0;
rc = fts5ExprPhraseIsMatch(pNode, pPhrase, &bMatch);
if( bMatch==0 ) break;
}else{
Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData);
}
}
*pRc = rc;
if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){
return 1;
}
return 0;
}
}
/*
** Initialize all term iterators in the pNear object. If any term is found
** to match no documents at all, return immediately without initializing any
** further iterators.
**
** If an error occurs, return an SQLite error code. Otherwise, return
** SQLITE_OK. It is not considered an error if some term matches zero
** documents.
*/
static int fts5ExprNearInitAll(
Fts5Expr *pExpr,
Fts5ExprNode *pNode
){
Fts5ExprNearset *pNear = pNode->pNear;
int i;
assert( pNode->bNomatch==0 );
for(i=0; i<pNear->nPhrase; i++){
Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
if( pPhrase->nTerm==0 ){
pNode->bEof = 1;
return SQLITE_OK;
}else{
int j;
for(j=0; j<pPhrase->nTerm; j++){
Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
Fts5ExprTerm *p;
int bHit = 0;
for(p=pTerm; p; p=p->pSynonym){
int rc;
if( p->pIter ){
sqlite3Fts5IterClose(p->pIter);
p->pIter = 0;
}
rc = sqlite3Fts5IndexQuery(
pExpr->pIndex, p->pTerm, p->nQueryTerm,
(pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
(pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
pNear->pColset,
&p->pIter
);
assert( (rc==SQLITE_OK)==(p->pIter!=0) );
if( rc!=SQLITE_OK ) return rc;
if( 0==sqlite3Fts5IterEof(p->pIter) ){
bHit = 1;
}
}
if( bHit==0 ){
pNode->bEof = 1;
return SQLITE_OK;
}
}
}
}
pNode->bEof = 0;
return SQLITE_OK;
}
/*
** If pExpr is an ASC iterator, this function returns a value with the
** same sign as:
**
** (iLhs - iRhs)
**
** Otherwise, if this is a DESC iterator, the opposite is returned:
**
** (iRhs - iLhs)
*/
static int fts5RowidCmp(
Fts5Expr *pExpr,
i64 iLhs,
i64 iRhs
){
assert( pExpr->bDesc==0 || pExpr->bDesc==1 );
if( pExpr->bDesc==0 ){
if( iLhs<iRhs ) return -1;
return (iLhs > iRhs);
}else{
if( iLhs>iRhs ) return -1;
return (iLhs < iRhs);
}
}
static void fts5ExprSetEof(Fts5ExprNode *pNode){
int i;
pNode->bEof = 1;
pNode->bNomatch = 0;
for(i=0; i<pNode->nChild; i++){
fts5ExprSetEof(pNode->apChild[i]);
}
}
static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){
if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){
Fts5ExprNearset *pNear = pNode->pNear;
int i;
for(i=0; i<pNear->nPhrase; i++){
Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
pPhrase->poslist.n = 0;
}
}else{
int i;
for(i=0; i<pNode->nChild; i++){
fts5ExprNodeZeroPoslist(pNode->apChild[i]);
}
}
}
/*
** Compare the values currently indicated by the two nodes as follows:
**
** res = (*p1) - (*p2)
**
** Nodes that point to values that come later in the iteration order are
** considered to be larger. Nodes at EOF are the largest of all.
**
** This means that if the iteration order is ASC, then numerically larger
** rowids are considered larger. Or if it is the default DESC, numerically
** smaller rowids are larger.
*/
static int fts5NodeCompare(
Fts5Expr *pExpr,
Fts5ExprNode *p1,
Fts5ExprNode *p2
){
if( p2->bEof ) return -1;
if( p1->bEof ) return +1;
return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid);
}
/*
** All individual term iterators in pNear are guaranteed to be valid when
** this function is called. This function checks if all term iterators
** point to the same rowid, and if not, advances them until they do.
** If an EOF is reached before this happens, *pbEof is set to true before
** returning.
**
** SQLITE_OK is returned if an error occurs, or an SQLite error code
** otherwise. It is not considered an error code if an iterator reaches
** EOF.
*/
static int fts5ExprNodeTest_STRING(
Fts5Expr *pExpr, /* Expression pPhrase belongs to */
Fts5ExprNode *pNode
){
Fts5ExprNearset *pNear = pNode->pNear;
Fts5ExprPhrase *pLeft = pNear->apPhrase[0];
int rc = SQLITE_OK;
i64 iLast; /* Lastest rowid any iterator points to */
int i, j; /* Phrase and token index, respectively */
int bMatch; /* True if all terms are at the same rowid */
const int bDesc = pExpr->bDesc;
/* Check that this node should not be FTS5_TERM */
assert( pNear->nPhrase>1
|| pNear->apPhrase[0]->nTerm>1
|| pNear->apPhrase[0]->aTerm[0].pSynonym
|| pNear->apPhrase[0]->aTerm[0].bFirst
);
/* Initialize iLast, the "lastest" rowid any iterator points to. If the
** iterator skips through rowids in the default ascending order, this means
** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
** means the minimum rowid. */
if( pLeft->aTerm[0].pSynonym ){
iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0);
}else{
iLast = pLeft->aTerm[0].pIter->iRowid;
}
do {
bMatch = 1;
for(i=0; i<pNear->nPhrase; i++){
Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
for(j=0; j<pPhrase->nTerm; j++){
Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
if( pTerm->pSynonym ){
i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0);
if( iRowid==iLast ) continue;
bMatch = 0;
if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){
pNode->bNomatch = 0;
pNode->bEof = 1;
return rc;
}
}else{
Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
if( pIter->iRowid==iLast || pIter->bEof ) continue;
bMatch = 0;
if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){
return rc;
}
}
}
}
}while( bMatch==0 );
pNode->iRowid = iLast;
pNode->bNomatch = ((0==fts5ExprNearTest(&rc, pExpr, pNode)) && rc==SQLITE_OK);
assert( pNode->bEof==0 || pNode->bNomatch==0 );
return rc;
}
/*
** Advance the first term iterator in the first phrase of pNear. Set output
** variable *pbEof to true if it reaches EOF or if an error occurs.
**
** Return SQLITE_OK if successful, or an SQLite error code if an error
** occurs.
*/
static int fts5ExprNodeNext_STRING(
Fts5Expr *pExpr, /* Expression pPhrase belongs to */
Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */
int bFromValid,
i64 iFrom
){
Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0];
int rc = SQLITE_OK;
pNode->bNomatch = 0;
if( pTerm->pSynonym ){
int bEof = 1;
Fts5ExprTerm *p;
/* Find the firstest rowid any synonym points to. */
i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0);
/* Advance each iterator that currently points to iRowid. Or, if iFrom
** is valid - each iterator that points to a rowid before iFrom. */
for(p=pTerm; p; p=p->pSynonym){
if( sqlite3Fts5IterEof(p->pIter)==0 ){
i64 ii = p->pIter->iRowid;
if( ii==iRowid
|| (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc)
){
if( bFromValid ){
rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom);
}else{
rc = sqlite3Fts5IterNext(p->pIter);
}
if( rc!=SQLITE_OK ) break;
if( sqlite3Fts5IterEof(p->pIter)==0 ){
bEof = 0;
}
}else{
bEof = 0;
}
}
}
/* Set the EOF flag if either all synonym iterators are at EOF or an
** error has occurred. */
pNode->bEof = (rc || bEof);
}else{
Fts5IndexIter *pIter = pTerm->pIter;
assert( Fts5NodeIsString(pNode) );
if( bFromValid ){
rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
}else{
rc = sqlite3Fts5IterNext(pIter);
}
pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
}
if( pNode->bEof==0 ){
assert( rc==SQLITE_OK );
rc = fts5ExprNodeTest_STRING(pExpr, pNode);
}
return rc;
}
static int fts5ExprNodeTest_TERM(
Fts5Expr *pExpr, /* Expression that pNear is a part of */
Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */
){
/* As this "NEAR" object is actually a single phrase that consists
** of a single term only, grab pointers into the poslist managed by the
** fts5_index.c iterator object. This is much faster than synthesizing
** a new poslist the way we have to for more complicated phrase or NEAR
** expressions. */
Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0];
Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
assert( pNode->eType==FTS5_TERM );
assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 );
assert( pPhrase->aTerm[0].pSynonym==0 );
pPhrase->poslist.n = pIter->nData;
if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL ){
pPhrase->poslist.p = (u8*)pIter->pData;
}
pNode->iRowid = pIter->iRowid;
pNode->bNomatch = (pPhrase->poslist.n==0);
return SQLITE_OK;
}
/*
** xNext() method for a node of type FTS5_TERM.
*/
static int fts5ExprNodeNext_TERM(
Fts5Expr *pExpr,
Fts5ExprNode *pNode,
int bFromValid,
i64 iFrom
){
int rc;
Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
assert( pNode->bEof==0 );
if( bFromValid ){
rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
}else{
rc = sqlite3Fts5IterNext(pIter);
}
if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){
rc = fts5ExprNodeTest_TERM(pExpr, pNode);
}else{
pNode->bEof = 1;
pNode->bNomatch = 0;
}
return rc;
}
static void fts5ExprNodeTest_OR(
Fts5Expr *pExpr, /* Expression of which pNode is a part */
Fts5ExprNode *pNode /* Expression node to test */
){
Fts5ExprNode *pNext = pNode->apChild[0];
int i;
for(i=1; i<pNode->nChild; i++){
Fts5ExprNode *pChild = pNode->apChild[i];
int cmp = fts5NodeCompare(pExpr, pNext, pChild);
if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){
pNext = pChild;
}
}
pNode->iRowid = pNext->iRowid;
pNode->bEof = pNext->bEof;
pNode->bNomatch = pNext->bNomatch;
}
static int fts5ExprNodeNext_OR(
Fts5Expr *pExpr,
Fts5ExprNode *pNode,
int bFromValid,
i64 iFrom
){
int i;
i64 iLast = pNode->iRowid;
for(i=0; i<pNode->nChild; i++){
Fts5ExprNode *p1 = pNode->apChild[i];
assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 );
if( p1->bEof==0 ){
if( (p1->iRowid==iLast)
|| (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0)
){
int rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom);
if( rc!=SQLITE_OK ){
pNode->bNomatch = 0;
return rc;
}
}
}
}
fts5ExprNodeTest_OR(pExpr, pNode);
return SQLITE_OK;
}
/*
** Argument pNode is an FTS5_AND node.
*/
static int fts5ExprNodeTest_AND(
Fts5Expr *pExpr, /* Expression pPhrase belongs to */
Fts5ExprNode *pAnd /* FTS5_AND node to advance */
){
int iChild;
i64 iLast = pAnd->iRowid;
int rc = SQLITE_OK;
int bMatch;
assert( pAnd->bEof==0 );
do {
pAnd->bNomatch = 0;
bMatch = 1;
for(iChild=0; iChild<pAnd->nChild; iChild++){
Fts5ExprNode *pChild = pAnd->apChild[iChild];
int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid);
if( cmp>0 ){
/* Advance pChild until it points to iLast or laster */
rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast);
if( rc!=SQLITE_OK ){
pAnd->bNomatch = 0;
return rc;
}
}
/* If the child node is now at EOF, so is the parent AND node. Otherwise,
** the child node is guaranteed to have advanced at least as far as
** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the
** new lastest rowid seen so far. */
assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 );
if( pChild->bEof ){
fts5ExprSetEof(pAnd);
bMatch = 1;
break;
}else if( iLast!=pChild->iRowid ){
bMatch = 0;
iLast = pChild->iRowid;
}
if( pChild->bNomatch ){
pAnd->bNomatch = 1;
}
}
}while( bMatch==0 );
if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){
fts5ExprNodeZeroPoslist(pAnd);
}
pAnd->iRowid = iLast;
return SQLITE_OK;
}
static int fts5ExprNodeNext_AND(
Fts5Expr *pExpr,
Fts5ExprNode *pNode,
int bFromValid,
i64 iFrom
){
int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom);
if( rc==SQLITE_OK ){
rc = fts5ExprNodeTest_AND(pExpr, pNode);
}else{
pNode->bNomatch = 0;
}
return rc;
}
static int fts5ExprNodeTest_NOT(
Fts5Expr *pExpr, /* Expression pPhrase belongs to */
Fts5ExprNode *pNode /* FTS5_NOT node to advance */
){
int rc = SQLITE_OK;
Fts5ExprNode *p1 = pNode->apChild[0];
Fts5ExprNode *p2 = pNode->apChild[1];
assert( pNode->nChild==2 );
while( rc==SQLITE_OK && p1->bEof==0 ){
int cmp = fts5NodeCompare(pExpr, p1, p2);
if( cmp>0 ){
rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid);
cmp = fts5NodeCompare(pExpr, p1, p2);
}
assert( rc!=SQLITE_OK || cmp<=0 );
if( cmp || p2->bNomatch ) break;
rc = fts5ExprNodeNext(pExpr, p1, 0, 0);
}
pNode->bEof = p1->bEof;
pNode->bNomatch = p1->bNomatch;
pNode->iRowid = p1->iRowid;
if( p1->bEof ){
fts5ExprNodeZeroPoslist(p2);
}
return rc;
}
static int fts5ExprNodeNext_NOT(
Fts5Expr *pExpr,
Fts5ExprNode *pNode,
int bFromValid,
i64 iFrom
){
int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom);
if( rc==SQLITE_OK ){
rc = fts5ExprNodeTest_NOT(pExpr, pNode);
}
if( rc!=SQLITE_OK ){
pNode->bNomatch = 0;
}
return rc;
}
/*
** If pNode currently points to a match, this function returns SQLITE_OK
** without modifying it. Otherwise, pNode is advanced until it does point
** to a match or EOF is reached.
*/
static int fts5ExprNodeTest(
Fts5Expr *pExpr, /* Expression of which pNode is a part */
Fts5ExprNode *pNode /* Expression node to test */
){
int rc = SQLITE_OK;
if( pNode->bEof==0 ){
switch( pNode->eType ){
case FTS5_STRING: {
rc = fts5ExprNodeTest_STRING(pExpr, pNode);
break;
}
case FTS5_TERM: {
rc = fts5ExprNodeTest_TERM(pExpr, pNode);
break;
}
case FTS5_AND: {
rc = fts5ExprNodeTest_AND(pExpr, pNode);
break;
}
case FTS5_OR: {
fts5ExprNodeTest_OR(pExpr, pNode);
break;
}
default: assert( pNode->eType==FTS5_NOT ); {
rc = fts5ExprNodeTest_NOT(pExpr, pNode);
break;
}
}
}
return rc;
}
/*
** Set node pNode, which is part of expression pExpr, to point to the first
** match. If there are no matches, set the Node.bEof flag to indicate EOF.
**
** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise.
** It is not an error if there are no matches.
*/
static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){
int rc = SQLITE_OK;
pNode->bEof = 0;
pNode->bNomatch = 0;
if( Fts5NodeIsString(pNode) ){
/* Initialize all term iterators in the NEAR object. */
rc = fts5ExprNearInitAll(pExpr, pNode);
}else if( pNode->xNext==0 ){
pNode->bEof = 1;
}else{
int i;
int nEof = 0;
for(i=0; i<pNode->nChild && rc==SQLITE_OK; i++){
Fts5ExprNode *pChild = pNode->apChild[i];
rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]);
assert( pChild->bEof==0 || pChild->bEof==1 );
nEof += pChild->bEof;
}
pNode->iRowid = pNode->apChild[0]->iRowid;
switch( pNode->eType ){
case FTS5_AND:
if( nEof>0 ) fts5ExprSetEof(pNode);
break;
case FTS5_OR:
if( pNode->nChild==nEof ) fts5ExprSetEof(pNode);
break;
default:
assert( pNode->eType==FTS5_NOT );
pNode->bEof = pNode->apChild[0]->bEof;
break;
}
}
if( rc==SQLITE_OK ){
rc = fts5ExprNodeTest(pExpr, pNode);
}
return rc;
}
/*
** Begin iterating through the set of documents in index pIdx matched by
** the MATCH expression passed as the first argument. If the "bDesc"
** parameter is passed a non-zero value, iteration is in descending rowid
** order. Or, if it is zero, in ascending order.
**
** If iterating in ascending rowid order (bDesc==0), the first document
** visited is that with the smallest rowid that is larger than or equal
** to parameter iFirst. Or, if iterating in ascending order (bDesc==1),
** then the first document visited must have a rowid smaller than or
** equal to iFirst.
**
** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
** is not considered an error if the query does not match any documents.
*/
int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){
Fts5ExprNode *pRoot = p->pRoot;
int rc; /* Return code */
p->pIndex = pIdx;
p->bDesc = bDesc;
rc = fts5ExprNodeFirst(p, pRoot);
/* If not at EOF but the current rowid occurs earlier than iFirst in
** the iteration order, move to document iFirst or later. */
if( rc==SQLITE_OK
&& 0==pRoot->bEof
&& fts5RowidCmp(p, pRoot->iRowid, iFirst)<0
){
rc = fts5ExprNodeNext(p, pRoot, 1, iFirst);
}
/* If the iterator is not at a real match, skip forward until it is. */
while( pRoot->bNomatch && rc==SQLITE_OK ){
assert( pRoot->bEof==0 );
rc = fts5ExprNodeNext(p, pRoot, 0, 0);
}
return rc;
}
/*
** Move to the next document
**
** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
** is not considered an error if the query does not match any documents.
*/
int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){
int rc;
Fts5ExprNode *pRoot = p->pRoot;
assert( pRoot->bEof==0 && pRoot->bNomatch==0 );
do {
rc = fts5ExprNodeNext(p, pRoot, 0, 0);
assert( pRoot->bNomatch==0 || (rc==SQLITE_OK && pRoot->bEof==0) );
}while( pRoot->bNomatch );
if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){
pRoot->bEof = 1;
}
return rc;
}
int sqlite3Fts5ExprEof(Fts5Expr *p){
return p->pRoot->bEof;
}
i64 sqlite3Fts5ExprRowid(Fts5Expr *p){
return p->pRoot->iRowid;
}
static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){
int rc = SQLITE_OK;
*pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n);
return rc;
}
/*
** Free the phrase object passed as the only argument.
*/
static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
if( pPhrase ){
int i;
for(i=0; i<pPhrase->nTerm; i++){
Fts5ExprTerm *pSyn;
Fts5ExprTerm *pNext;
Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
sqlite3_free(pTerm->pTerm);
sqlite3Fts5IterClose(pTerm->pIter);
for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
pNext = pSyn->pSynonym;
sqlite3Fts5IterClose(pSyn->pIter);
fts5BufferFree((Fts5Buffer*)&pSyn[1]);
sqlite3_free(pSyn);
}
}
if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
sqlite3_free(pPhrase);
}
}
/*
** Set the "bFirst" flag on the first token of the phrase passed as the
** only argument.
*/
void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase *pPhrase){
if( pPhrase && pPhrase->nTerm ){
pPhrase->aTerm[0].bFirst = 1;
}
}
/*
** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated
** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is
** appended to it and the results returned.
**
** If an OOM error occurs, both the pNear and pPhrase objects are freed and
** NULL returned.
*/
Fts5ExprNearset *sqlite3Fts5ParseNearset(
Fts5Parse *pParse, /* Parse context */
Fts5ExprNearset *pNear, /* Existing nearset, or NULL */
Fts5ExprPhrase *pPhrase /* Recently parsed phrase */
){
const int SZALLOC = 8;
Fts5ExprNearset *pRet = 0;
if( pParse->rc==SQLITE_OK ){
if( pPhrase==0 ){
return pNear;
}
if( pNear==0 ){
sqlite3_int64 nByte;
nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*);
pRet = sqlite3_malloc64(nByte);
if( pRet==0 ){
pParse->rc = SQLITE_NOMEM;
}else{
memset(pRet, 0, (size_t)nByte);
}
}else if( (pNear->nPhrase % SZALLOC)==0 ){
int nNew = pNear->nPhrase + SZALLOC;
sqlite3_int64 nByte;
nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*);
pRet = (Fts5ExprNearset*)sqlite3_realloc64(pNear, nByte);
if( pRet==0 ){
pParse->rc = SQLITE_NOMEM;
}
}else{
pRet = pNear;
}
}
if( pRet==0 ){
assert( pParse->rc!=SQLITE_OK );
sqlite3Fts5ParseNearsetFree(pNear);
sqlite3Fts5ParsePhraseFree(pPhrase);
}else{
if( pRet->nPhrase>0 ){
Fts5ExprPhrase *pLast = pRet->apPhrase[pRet->nPhrase-1];
assert( pParse!=0 );
assert( pParse->apPhrase!=0 );
assert( pParse->nPhrase>=2 );
assert( pLast==pParse->apPhrase[pParse->nPhrase-2] );
if( pPhrase->nTerm==0 ){
fts5ExprPhraseFree(pPhrase);
pRet->nPhrase--;
pParse->nPhrase--;
pPhrase = pLast;
}else if( pLast->nTerm==0 ){
fts5ExprPhraseFree(pLast);
pParse->apPhrase[pParse->nPhrase-2] = pPhrase;
pParse->nPhrase--;
pRet->nPhrase--;
}
}
pRet->apPhrase[pRet->nPhrase++] = pPhrase;
}
return pRet;
}
typedef struct TokenCtx TokenCtx;
struct TokenCtx {
Fts5ExprPhrase *pPhrase;
Fts5Config *pConfig;
int rc;
};
/*
** Callback for tokenizing terms used by ParseTerm().
*/
static int fts5ParseTokenize(
void *pContext, /* Pointer to Fts5InsertCtx object */
int tflags, /* Mask of FTS5_TOKEN_* flags */
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iUnused1, /* Start offset of token */
int iUnused2 /* End offset of token */
){
int rc = SQLITE_OK;
const int SZALLOC = 8;
TokenCtx *pCtx = (TokenCtx*)pContext;
Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
UNUSED_PARAM2(iUnused1, iUnused2);
/* If an error has already occurred, this is a no-op */
if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;
if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED) ){
Fts5ExprTerm *pSyn;
sqlite3_int64 nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1;
pSyn = (Fts5ExprTerm*)sqlite3_malloc64(nByte);
if( pSyn==0 ){
rc = SQLITE_NOMEM;
}else{
memset(pSyn, 0, (size_t)nByte);
pSyn->pTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer);
pSyn->nFullTerm = pSyn->nQueryTerm = nToken;
if( pCtx->pConfig->bTokendata ){
pSyn->nQueryTerm = (int)strlen(pSyn->pTerm);
}
memcpy(pSyn->pTerm, pToken, nToken);
pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
}
}else{
Fts5ExprTerm *pTerm;
if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
Fts5ExprPhrase *pNew;
int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
pNew = (Fts5ExprPhrase*)sqlite3_realloc64(pPhrase,
sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
);
if( pNew==0 ){
rc = SQLITE_NOMEM;
}else{
if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
pCtx->pPhrase = pPhrase = pNew;
pNew->nTerm = nNew - SZALLOC;
}
}
if( rc==SQLITE_OK ){
pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
memset(pTerm, 0, sizeof(Fts5ExprTerm));
pTerm->pTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
pTerm->nFullTerm = pTerm->nQueryTerm = nToken;
if( pCtx->pConfig->bTokendata && rc==SQLITE_OK ){
pTerm->nQueryTerm = (int)strlen(pTerm->pTerm);
}
}
}
pCtx->rc = rc;
return rc;
}
/*
** Free the phrase object passed as the only argument.
*/
void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){
fts5ExprPhraseFree(pPhrase);
}
/*
** Free the phrase object passed as the second argument.
*/
void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){
if( pNear ){
int i;
for(i=0; i<pNear->nPhrase; i++){
fts5ExprPhraseFree(pNear->apPhrase[i]);
}
sqlite3_free(pNear->pColset);
sqlite3_free(pNear);
}
}
void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){
assert( pParse->pExpr==0 );
pParse->pExpr = p;
}
static int parseGrowPhraseArray(Fts5Parse *pParse){
if( (pParse->nPhrase % 8)==0 ){
sqlite3_int64 nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8);
Fts5ExprPhrase **apNew;
apNew = (Fts5ExprPhrase**)sqlite3_realloc64(pParse->apPhrase, nByte);
if( apNew==0 ){
pParse->rc = SQLITE_NOMEM;
return SQLITE_NOMEM;
}
pParse->apPhrase = apNew;
}
return SQLITE_OK;
}
/*
** This function is called by the parser to process a string token. The
** string may or may not be quoted. In any case it is tokenized and a
** phrase object consisting of all tokens returned.
*/
Fts5ExprPhrase *sqlite3Fts5ParseTerm(
Fts5Parse *pParse, /* Parse context */
Fts5ExprPhrase *pAppend, /* Phrase to append to */
Fts5Token *pToken, /* String to tokenize */
int bPrefix /* True if there is a trailing "*" */
){
Fts5Config *pConfig = pParse->pConfig;
TokenCtx sCtx; /* Context object passed to callback */
int rc; /* Tokenize return code */
char *z = 0;
memset(&sCtx, 0, sizeof(TokenCtx));
sCtx.pPhrase = pAppend;
sCtx.pConfig = pConfig;
rc = fts5ParseStringFromToken(pToken, &z);
if( rc==SQLITE_OK ){
int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_PREFIX : 0);
int n;
sqlite3Fts5Dequote(z);
n = (int)strlen(z);
rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
}
sqlite3_free(z);
if( rc || (rc = sCtx.rc) ){
pParse->rc = rc;
fts5ExprPhraseFree(sCtx.pPhrase);
sCtx.pPhrase = 0;
}else{
if( pAppend==0 ){
if( parseGrowPhraseArray(pParse) ){
fts5ExprPhraseFree(sCtx.pPhrase);
return 0;
}
pParse->nPhrase++;
}
if( sCtx.pPhrase==0 ){
/* This happens when parsing a token or quoted phrase that contains
** no token characters at all. (e.g ... MATCH '""'). */
sCtx.pPhrase = sqlite3Fts5MallocZero(&pParse->rc, sizeof(Fts5ExprPhrase));
}else if( sCtx.pPhrase->nTerm ){
sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = (u8)bPrefix;
}
pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase;
}
return sCtx.pPhrase;
}
/*
** Create a new FTS5 expression by cloning phrase iPhrase of the
** expression passed as the second argument.
*/
int sqlite3Fts5ExprClonePhrase(
Fts5Expr *pExpr,
int iPhrase,
Fts5Expr **ppNew
){
int rc = SQLITE_OK; /* Return code */
Fts5ExprPhrase *pOrig = 0; /* The phrase extracted from pExpr */
Fts5Expr *pNew = 0; /* Expression to return via *ppNew */
TokenCtx sCtx = {0,0,0}; /* Context object for fts5ParseTokenize */
if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){
rc = SQLITE_RANGE;
}else{
pOrig = pExpr->apExprPhrase[iPhrase];
pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
}
if( rc==SQLITE_OK ){
pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc,
sizeof(Fts5ExprPhrase*));
}
if( rc==SQLITE_OK ){
pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc,
sizeof(Fts5ExprNode));
}
if( rc==SQLITE_OK ){
pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc,
sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*));
}
if( rc==SQLITE_OK && ALWAYS(pOrig!=0) ){
Fts5Colset *pColsetOrig = pOrig->pNode->pNear->pColset;
if( pColsetOrig ){
sqlite3_int64 nByte;
Fts5Colset *pColset;
nByte = sizeof(Fts5Colset) + (pColsetOrig->nCol-1) * sizeof(int);
pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&rc, nByte);
if( pColset ){
memcpy(pColset, pColsetOrig, (size_t)nByte);
}
pNew->pRoot->pNear->pColset = pColset;
}
}
if( rc==SQLITE_OK ){
if( pOrig->nTerm ){
int i; /* Used to iterate through phrase terms */
sCtx.pConfig = pExpr->pConfig;
for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
int tflags = 0;
Fts5ExprTerm *p;
for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){
rc = fts5ParseTokenize((void*)&sCtx,tflags,p->pTerm,p->nFullTerm,0,0);
tflags = FTS5_TOKEN_COLOCATED;
}
if( rc==SQLITE_OK ){
sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
sCtx.pPhrase->aTerm[i].bFirst = pOrig->aTerm[i].bFirst;
}
}
}else{
/* This happens when parsing a token or quoted phrase that contains
** no token characters at all. (e.g ... MATCH '""'). */
sCtx.pPhrase = sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprPhrase));
}
}
if( rc==SQLITE_OK && ALWAYS(sCtx.pPhrase) ){
/* All the allocations succeeded. Put the expression object together. */
pNew->pIndex = pExpr->pIndex;
pNew->pConfig = pExpr->pConfig;
pNew->nPhrase = 1;
pNew->apExprPhrase[0] = sCtx.pPhrase;
pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase;
pNew->pRoot->pNear->nPhrase = 1;
sCtx.pPhrase->pNode = pNew->pRoot;
if( pOrig->nTerm==1
&& pOrig->aTerm[0].pSynonym==0
&& pOrig->aTerm[0].bFirst==0
){
pNew->pRoot->eType = FTS5_TERM;
pNew->pRoot->xNext = fts5ExprNodeNext_TERM;
}else{
pNew->pRoot->eType = FTS5_STRING;
pNew->pRoot->xNext = fts5ExprNodeNext_STRING;
}
}else{
sqlite3Fts5ExprFree(pNew);
fts5ExprPhraseFree(sCtx.pPhrase);
pNew = 0;
}
*ppNew = pNew;
return rc;
}
/*
** Token pTok has appeared in a MATCH expression where the NEAR operator
** is expected. If token pTok does not contain "NEAR", store an error
** in the pParse object.
*/
void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){
if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){
sqlite3Fts5ParseError(
pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p
);
}
}
void sqlite3Fts5ParseSetDistance(
Fts5Parse *pParse,
Fts5ExprNearset *pNear,
Fts5Token *p
){
if( pNear ){
int nNear = 0;
int i;
if( p->n ){
for(i=0; i<p->n; i++){
char c = (char)p->p[i];
if( c<'0' || c>'9' ){
sqlite3Fts5ParseError(
pParse, "expected integer, got \"%.*s\"", p->n, p->p
);
return;
}
nNear = nNear * 10 + (p->p[i] - '0');
}
}else{
nNear = FTS5_DEFAULT_NEARDIST;
}
pNear->nNear = nNear;
}
}
/*
** The second argument passed to this function may be NULL, or it may be
** an existing Fts5Colset object. This function returns a pointer to
** a new colset object containing the contents of (p) with new value column
** number iCol appended.
**
** If an OOM error occurs, store an error code in pParse and return NULL.
** The old colset object (if any) is not freed in this case.
*/
static Fts5Colset *fts5ParseColset(
Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */
Fts5Colset *p, /* Existing colset object */
int iCol /* New column to add to colset object */
){
int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */
Fts5Colset *pNew; /* New colset object to return */
assert( pParse->rc==SQLITE_OK );
assert( iCol>=0 && iCol<pParse->pConfig->nCol );
pNew = sqlite3_realloc64(p, sizeof(Fts5Colset) + sizeof(int)*nCol);
if( pNew==0 ){
pParse->rc = SQLITE_NOMEM;
}else{
int *aiCol = pNew->aiCol;
int i, j;
for(i=0; i<nCol; i++){
if( aiCol[i]==iCol ) return pNew;
if( aiCol[i]>iCol ) break;
}
for(j=nCol; j>i; j--){
aiCol[j] = aiCol[j-1];
}
aiCol[i] = iCol;
pNew->nCol = nCol+1;
#ifndef NDEBUG
/* Check that the array is in order and contains no duplicate entries. */
for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] );
#endif
}
return pNew;
}
/*
** Allocate and return an Fts5Colset object specifying the inverse of
** the colset passed as the second argument. Free the colset passed
** as the second argument before returning.
*/
Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse *pParse, Fts5Colset *p){
Fts5Colset *pRet;
int nCol = pParse->pConfig->nCol;
pRet = (Fts5Colset*)sqlite3Fts5MallocZero(&pParse->rc,
sizeof(Fts5Colset) + sizeof(int)*nCol
);
if( pRet ){
int i;
int iOld = 0;
for(i=0; i<nCol; i++){
if( iOld>=p->nCol || p->aiCol[iOld]!=i ){
pRet->aiCol[pRet->nCol++] = i;
}else{
iOld++;
}
}
}
sqlite3_free(p);
return pRet;
}
Fts5Colset *sqlite3Fts5ParseColset(
Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */
Fts5Colset *pColset, /* Existing colset object */
Fts5Token *p
){
Fts5Colset *pRet = 0;
int iCol;
char *z; /* Dequoted copy of token p */
z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n);
if( pParse->rc==SQLITE_OK ){
Fts5Config *pConfig = pParse->pConfig;
sqlite3Fts5Dequote(z);
for(iCol=0; iCol<pConfig->nCol; iCol++){
if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ) break;
}
if( iCol==pConfig->nCol ){
sqlite3Fts5ParseError(pParse, "no such column: %s", z);
}else{
pRet = fts5ParseColset(pParse, pColset, iCol);
}
sqlite3_free(z);
}
if( pRet==0 ){
assert( pParse->rc!=SQLITE_OK );
sqlite3_free(pColset);
}
return pRet;
}
/*
** If argument pOrig is NULL, or if (*pRc) is set to anything other than
** SQLITE_OK when this function is called, NULL is returned.
**
** Otherwise, a copy of (*pOrig) is made into memory obtained from
** sqlite3Fts5MallocZero() and a pointer to it returned. If the allocation
** fails, (*pRc) is set to SQLITE_NOMEM and NULL is returned.
*/
static Fts5Colset *fts5CloneColset(int *pRc, Fts5Colset *pOrig){
Fts5Colset *pRet;
if( pOrig ){
sqlite3_int64 nByte = sizeof(Fts5Colset) + (pOrig->nCol-1) * sizeof(int);
pRet = (Fts5Colset*)sqlite3Fts5MallocZero(pRc, nByte);
if( pRet ){
memcpy(pRet, pOrig, (size_t)nByte);
}
}else{
pRet = 0;
}
return pRet;
}
/*
** Remove from colset pColset any columns that are not also in colset pMerge.
*/
static void fts5MergeColset(Fts5Colset *pColset, Fts5Colset *pMerge){
int iIn = 0; /* Next input in pColset */
int iMerge = 0; /* Next input in pMerge */
int iOut = 0; /* Next output slot in pColset */
while( iIn<pColset->nCol && iMerge<pMerge->nCol ){
int iDiff = pColset->aiCol[iIn] - pMerge->aiCol[iMerge];
if( iDiff==0 ){
pColset->aiCol[iOut++] = pMerge->aiCol[iMerge];
iMerge++;
iIn++;
}else if( iDiff>0 ){
iMerge++;
}else{
iIn++;
}
}
pColset->nCol = iOut;
}
/*
** Recursively apply colset pColset to expression node pNode and all of
** its decendents. If (*ppFree) is not NULL, it contains a spare copy
** of pColset. This function may use the spare copy and set (*ppFree) to
** zero, or it may create copies of pColset using fts5CloneColset().
*/
static void fts5ParseSetColset(
Fts5Parse *pParse,
Fts5ExprNode *pNode,
Fts5Colset *pColset,
Fts5Colset **ppFree
){
if( pParse->rc==SQLITE_OK ){
assert( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING
|| pNode->eType==FTS5_AND || pNode->eType==FTS5_OR
|| pNode->eType==FTS5_NOT || pNode->eType==FTS5_EOF
);
if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){
Fts5ExprNearset *pNear = pNode->pNear;
if( pNear->pColset ){
fts5MergeColset(pNear->pColset, pColset);
if( pNear->pColset->nCol==0 ){
pNode->eType = FTS5_EOF;
pNode->xNext = 0;
}
}else if( *ppFree ){
pNear->pColset = pColset;
*ppFree = 0;
}else{
pNear->pColset = fts5CloneColset(&pParse->rc, pColset);
}
}else{
int i;
assert( pNode->eType!=FTS5_EOF || pNode->nChild==0 );
for(i=0; i<pNode->nChild; i++){
fts5ParseSetColset(pParse, pNode->apChild[i], pColset, ppFree);
}
}
}
}
/*
** Apply colset pColset to expression node pExpr and all of its descendents.
*/
void sqlite3Fts5ParseSetColset(
Fts5Parse *pParse,
Fts5ExprNode *pExpr,
Fts5Colset *pColset
){
Fts5Colset *pFree = pColset;
if( pParse->pConfig->eDetail==FTS5_DETAIL_NONE ){
sqlite3Fts5ParseError(pParse,
"fts5: column queries are not supported (detail=none)"
);
}else{
fts5ParseSetColset(pParse, pExpr, pColset, &pFree);
}
sqlite3_free(pFree);
}
static void fts5ExprAssignXNext(Fts5ExprNode *pNode){
switch( pNode->eType ){
case FTS5_STRING: {
Fts5ExprNearset *pNear = pNode->pNear;
if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1
&& pNear->apPhrase[0]->aTerm[0].pSynonym==0
&& pNear->apPhrase[0]->aTerm[0].bFirst==0
){
pNode->eType = FTS5_TERM;
pNode->xNext = fts5ExprNodeNext_TERM;
}else{
pNode->xNext = fts5ExprNodeNext_STRING;
}
break;
};
case FTS5_OR: {
pNode->xNext = fts5ExprNodeNext_OR;
break;
};
case FTS5_AND: {
pNode->xNext = fts5ExprNodeNext_AND;
break;
};
default: assert( pNode->eType==FTS5_NOT ); {
pNode->xNext = fts5ExprNodeNext_NOT;
break;
};
}
}
static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){
int ii = p->nChild;
if( p->eType!=FTS5_NOT && pSub->eType==p->eType ){
int nByte = sizeof(Fts5ExprNode*) * pSub->nChild;
memcpy(&p->apChild[p->nChild], pSub->apChild, nByte);
p->nChild += pSub->nChild;
sqlite3_free(pSub);
}else{
p->apChild[p->nChild++] = pSub;
}
for( ; ii<p->nChild; ii++){
p->iHeight = MAX(p->iHeight, p->apChild[ii]->iHeight + 1);
}
}
/*
** This function is used when parsing LIKE or GLOB patterns against
** trigram indexes that specify either detail=column or detail=none.
** It converts a phrase:
**
** abc + def + ghi
**
** into an AND tree:
**
** abc AND def AND ghi
*/
static Fts5ExprNode *fts5ParsePhraseToAnd(
Fts5Parse *pParse,
Fts5ExprNearset *pNear
){
int nTerm = pNear->apPhrase[0]->nTerm;
int ii;
int nByte;
Fts5ExprNode *pRet;
assert( pNear->nPhrase==1 );
assert( pParse->bPhraseToAnd );
nByte = sizeof(Fts5ExprNode) + nTerm*sizeof(Fts5ExprNode*);
pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte);
if( pRet ){
pRet->eType = FTS5_AND;
pRet->nChild = nTerm;
pRet->iHeight = 1;
fts5ExprAssignXNext(pRet);
pParse->nPhrase--;
for(ii=0; ii<nTerm; ii++){
Fts5ExprPhrase *pPhrase = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(
&pParse->rc, sizeof(Fts5ExprPhrase)
);
if( pPhrase ){
if( parseGrowPhraseArray(pParse) ){
fts5ExprPhraseFree(pPhrase);
}else{
Fts5ExprTerm *p = &pNear->apPhrase[0]->aTerm[ii];
Fts5ExprTerm *pTo = &pPhrase->aTerm[0];
pParse->apPhrase[pParse->nPhrase++] = pPhrase;
pPhrase->nTerm = 1;
pTo->pTerm = sqlite3Fts5Strndup(&pParse->rc, p->pTerm, p->nFullTerm);
pTo->nQueryTerm = p->nQueryTerm;
pTo->nFullTerm = p->nFullTerm;
pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING,
0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase)
);
}
}
}
if( pParse->rc ){
sqlite3Fts5ParseNodeFree(pRet);
pRet = 0;
}else{
sqlite3Fts5ParseNearsetFree(pNear);
}
}
return pRet;
}
/*
** Allocate and return a new expression object. If anything goes wrong (i.e.
** OOM error), leave an error code in pParse and return NULL.
*/
Fts5ExprNode *sqlite3Fts5ParseNode(
Fts5Parse *pParse, /* Parse context */
int eType, /* FTS5_STRING, AND, OR or NOT */
Fts5ExprNode *pLeft, /* Left hand child expression */
Fts5ExprNode *pRight, /* Right hand child expression */
Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */
){
Fts5ExprNode *pRet = 0;
if( pParse->rc==SQLITE_OK ){
int nChild = 0; /* Number of children of returned node */
sqlite3_int64 nByte; /* Bytes of space to allocate for this node */
assert( (eType!=FTS5_STRING && !pNear)
|| (eType==FTS5_STRING && !pLeft && !pRight)
);
if( eType==FTS5_STRING && pNear==0 ) return 0;
if( eType!=FTS5_STRING && pLeft==0 ) return pRight;
if( eType!=FTS5_STRING && pRight==0 ) return pLeft;
if( eType==FTS5_STRING
&& pParse->bPhraseToAnd
&& pNear->apPhrase[0]->nTerm>1
){
pRet = fts5ParsePhraseToAnd(pParse, pNear);
}else{
if( eType==FTS5_NOT ){
nChild = 2;
}else if( eType==FTS5_AND || eType==FTS5_OR ){
nChild = 2;
if( pLeft->eType==eType ) nChild += pLeft->nChild-1;
if( pRight->eType==eType ) nChild += pRight->nChild-1;
}
nByte = sizeof(Fts5ExprNode) + sizeof(Fts5ExprNode*)*(nChild-1);
pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte);
if( pRet ){
pRet->eType = eType;
pRet->pNear = pNear;
fts5ExprAssignXNext(pRet);
if( eType==FTS5_STRING ){
int iPhrase;
for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
pNear->apPhrase[iPhrase]->pNode = pRet;
if( pNear->apPhrase[iPhrase]->nTerm==0 ){
pRet->xNext = 0;
pRet->eType = FTS5_EOF;
}
}
if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL ){
Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
if( pNear->nPhrase!=1
|| pPhrase->nTerm>1
|| (pPhrase->nTerm>0 && pPhrase->aTerm[0].bFirst)
){
sqlite3Fts5ParseError(pParse,
"fts5: %s queries are not supported (detail!=full)",
pNear->nPhrase==1 ? "phrase": "NEAR"
);
sqlite3_free(pRet);
pRet = 0;
}
}
}else{
fts5ExprAddChildren(pRet, pLeft);
fts5ExprAddChildren(pRet, pRight);
if( pRet->iHeight>SQLITE_FTS5_MAX_EXPR_DEPTH ){
sqlite3Fts5ParseError(pParse,
"fts5 expression tree is too large (maximum depth %d)",
SQLITE_FTS5_MAX_EXPR_DEPTH
);
sqlite3_free(pRet);
pRet = 0;
}
}
}
}
}
if( pRet==0 ){
assert( pParse->rc!=SQLITE_OK );
sqlite3Fts5ParseNodeFree(pLeft);
sqlite3Fts5ParseNodeFree(pRight);
sqlite3Fts5ParseNearsetFree(pNear);
}
return pRet;
}
Fts5ExprNode *sqlite3Fts5ParseImplicitAnd(
Fts5Parse *pParse, /* Parse context */
Fts5ExprNode *pLeft, /* Left hand child expression */
Fts5ExprNode *pRight /* Right hand child expression */
){
Fts5ExprNode *pRet = 0;
Fts5ExprNode *pPrev;
if( pParse->rc ){
sqlite3Fts5ParseNodeFree(pLeft);
sqlite3Fts5ParseNodeFree(pRight);
}else{
assert( pLeft->eType==FTS5_STRING
|| pLeft->eType==FTS5_TERM
|| pLeft->eType==FTS5_EOF
|| pLeft->eType==FTS5_AND
);
assert( pRight->eType==FTS5_STRING
|| pRight->eType==FTS5_TERM
|| pRight->eType==FTS5_EOF
);
if( pLeft->eType==FTS5_AND ){
pPrev = pLeft->apChild[pLeft->nChild-1];
}else{
pPrev = pLeft;
}
assert( pPrev->eType==FTS5_STRING
|| pPrev->eType==FTS5_TERM
|| pPrev->eType==FTS5_EOF
);
if( pRight->eType==FTS5_EOF ){
assert( pParse->apPhrase[pParse->nPhrase-1]==pRight->pNear->apPhrase[0] );
sqlite3Fts5ParseNodeFree(pRight);
pRet = pLeft;
pParse->nPhrase--;
}
else if( pPrev->eType==FTS5_EOF ){
Fts5ExprPhrase **ap;
if( pPrev==pLeft ){
pRet = pRight;
}else{
pLeft->apChild[pLeft->nChild-1] = pRight;
pRet = pLeft;
}
ap = &pParse->apPhrase[pParse->nPhrase-1-pRight->pNear->nPhrase];
assert( ap[0]==pPrev->pNear->apPhrase[0] );
memmove(ap, &ap[1], sizeof(Fts5ExprPhrase*)*pRight->pNear->nPhrase);
pParse->nPhrase--;
sqlite3Fts5ParseNodeFree(pPrev);
}
else{
pRet = sqlite3Fts5ParseNode(pParse, FTS5_AND, pLeft, pRight, 0);
}
}
return pRet;
}
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
sqlite3_int64 nByte = 0;
Fts5ExprTerm *p;
char *zQuoted;
/* Determine the maximum amount of space required. */
for(p=pTerm; p; p=p->pSynonym){
nByte += pTerm->nQueryTerm * 2 + 3 + 2;
}
zQuoted = sqlite3_malloc64(nByte);
if( zQuoted ){
int i = 0;
for(p=pTerm; p; p=p->pSynonym){
char *zIn = p->pTerm;
char *zEnd = &zIn[p->nQueryTerm];
zQuoted[i++] = '"';
while( zIn<zEnd ){
if( *zIn=='"' ) zQuoted[i++] = '"';
zQuoted[i++] = *zIn++;
}
zQuoted[i++] = '"';
if( p->pSynonym ) zQuoted[i++] = '|';
}
if( pTerm->bPrefix ){
zQuoted[i++] = ' ';
zQuoted[i++] = '*';
}
zQuoted[i++] = '\0';
}
return zQuoted;
}
static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){
char *zNew;
va_list ap;
va_start(ap, zFmt);
zNew = sqlite3_vmprintf(zFmt, ap);
va_end(ap);
if( zApp && zNew ){
char *zNew2 = sqlite3_mprintf("%s%s", zApp, zNew);
sqlite3_free(zNew);
zNew = zNew2;
}
sqlite3_free(zApp);
return zNew;
}
/*
** Compose a tcl-readable representation of expression pExpr. Return a
** pointer to a buffer containing that representation. It is the
** responsibility of the caller to at some point free the buffer using
** sqlite3_free().
*/
static char *fts5ExprPrintTcl(
Fts5Config *pConfig,
const char *zNearsetCmd,
Fts5ExprNode *pExpr
){
char *zRet = 0;
if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){
Fts5ExprNearset *pNear = pExpr->pNear;
int i;
int iTerm;
zRet = fts5PrintfAppend(zRet, "%s ", zNearsetCmd);
if( zRet==0 ) return 0;
if( pNear->pColset ){
int *aiCol = pNear->pColset->aiCol;
int nCol = pNear->pColset->nCol;
if( nCol==1 ){
zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]);
}else{
zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]);
for(i=1; i<pNear->pColset->nCol; i++){
zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]);
}
zRet = fts5PrintfAppend(zRet, "} ");
}
if( zRet==0 ) return 0;
}
if( pNear->nPhrase>1 ){
zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear);
if( zRet==0 ) return 0;
}
zRet = fts5PrintfAppend(zRet, "--");
if( zRet==0 ) return 0;
for(i=0; i<pNear->nPhrase; i++){
Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
zRet = fts5PrintfAppend(zRet, " {");
for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){
Fts5ExprTerm *p = &pPhrase->aTerm[iTerm];
zRet = fts5PrintfAppend(zRet, "%s%.*s", iTerm==0?"":" ",
p->nQueryTerm, p->pTerm
);
if( pPhrase->aTerm[iTerm].bPrefix ){
zRet = fts5PrintfAppend(zRet, "*");
}
}
if( zRet ) zRet = fts5PrintfAppend(zRet, "}");
if( zRet==0 ) return 0;
}
}else if( pExpr->eType==0 ){
zRet = sqlite3_mprintf("{}");
}else{
char const *zOp = 0;
int i;
switch( pExpr->eType ){
case FTS5_AND: zOp = "AND"; break;
case FTS5_NOT: zOp = "NOT"; break;
default:
assert( pExpr->eType==FTS5_OR );
zOp = "OR";
break;
}
zRet = sqlite3_mprintf("%s", zOp);
for(i=0; zRet && i<pExpr->nChild; i++){
char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]);
if( !z ){
sqlite3_free(zRet);
zRet = 0;
}else{
zRet = fts5PrintfAppend(zRet, " [%z]", z);
}
}
}
return zRet;
}
static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){
char *zRet = 0;
if( pExpr->eType==0 ){
return sqlite3_mprintf("\"\"");
}else
if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){
Fts5ExprNearset *pNear = pExpr->pNear;
int i;
int iTerm;
if( pNear->pColset ){
int ii;
Fts5Colset *pColset = pNear->pColset;
if( pColset->nCol>1 ) zRet = fts5PrintfAppend(zRet, "{");
for(ii=0; ii<pColset->nCol; ii++){
zRet = fts5PrintfAppend(zRet, "%s%s",
pConfig->azCol[pColset->aiCol[ii]], ii==pColset->nCol-1 ? "" : " "
);
}
if( zRet ){
zRet = fts5PrintfAppend(zRet, "%s : ", pColset->nCol>1 ? "}" : "");
}
if( zRet==0 ) return 0;
}
if( pNear->nPhrase>1 ){
zRet = fts5PrintfAppend(zRet, "NEAR(");
if( zRet==0 ) return 0;
}
for(i=0; i<pNear->nPhrase; i++){
Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
if( i!=0 ){
zRet = fts5PrintfAppend(zRet, " ");
if( zRet==0 ) return 0;
}
for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){
char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]);
if( zTerm ){
zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm);
sqlite3_free(zTerm);
}
if( zTerm==0 || zRet==0 ){
sqlite3_free(zRet);
return 0;
}
}
}
if( pNear->nPhrase>1 ){
zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear);
if( zRet==0 ) return 0;
}
}else{
char const *zOp = 0;
int i;
switch( pExpr->eType ){
case FTS5_AND: zOp = " AND "; break;
case FTS5_NOT: zOp = " NOT "; break;
default:
assert( pExpr->eType==FTS5_OR );
zOp = " OR ";
break;
}
for(i=0; i<pExpr->nChild; i++){
char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]);
if( z==0 ){
sqlite3_free(zRet);
zRet = 0;
}else{
int e = pExpr->apChild[i]->eType;
int b = (e!=FTS5_STRING && e!=FTS5_TERM && e!=FTS5_EOF);
zRet = fts5PrintfAppend(zRet, "%s%s%z%s",
(i==0 ? "" : zOp),
(b?"(":""), z, (b?")":"")
);
}
if( zRet==0 ) break;
}
}
return zRet;
}
/*
** The implementation of user-defined scalar functions fts5_expr() (bTcl==0)
** and fts5_expr_tcl() (bTcl!=0).
*/
static void fts5ExprFunction(
sqlite3_context *pCtx, /* Function call context */
int nArg, /* Number of args */
sqlite3_value **apVal, /* Function arguments */
int bTcl
){
Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx);
sqlite3 *db = sqlite3_context_db_handle(pCtx);
const char *zExpr = 0;
char *zErr = 0;
Fts5Expr *pExpr = 0;
int rc;
int i;
const char **azConfig; /* Array of arguments for Fts5Config */
const char *zNearsetCmd = "nearset";
int nConfig; /* Size of azConfig[] */
Fts5Config *pConfig = 0;
int iArg = 1;
if( nArg<1 ){
zErr = sqlite3_mprintf("wrong number of arguments to function %s",
bTcl ? "fts5_expr_tcl" : "fts5_expr"
);
sqlite3_result_error(pCtx, zErr, -1);
sqlite3_free(zErr);
return;
}
if( bTcl && nArg>1 ){
zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]);
iArg = 2;
}
nConfig = 3 + (nArg-iArg);
azConfig = (const char**)sqlite3_malloc64(sizeof(char*) * nConfig);
if( azConfig==0 ){
sqlite3_result_error_nomem(pCtx);
return;
}
azConfig[0] = 0;
azConfig[1] = "main";
azConfig[2] = "tbl";
for(i=3; iArg<nArg; iArg++){
const char *z = (const char*)sqlite3_value_text(apVal[iArg]);
azConfig[i++] = (z ? z : "");
}
zExpr = (const char*)sqlite3_value_text(apVal[0]);
if( zExpr==0 ) zExpr = "";
rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr);
if( rc==SQLITE_OK ){
rc = sqlite3Fts5ExprNew(pConfig, 0, pConfig->nCol, zExpr, &pExpr, &zErr);
}
if( rc==SQLITE_OK ){
char *zText;
if( pExpr->pRoot->xNext==0 ){
zText = sqlite3_mprintf("");
}else if( bTcl ){
zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot);
}else{
zText = fts5ExprPrint(pConfig, pExpr->pRoot);
}
if( zText==0 ){
rc = SQLITE_NOMEM;
}else{
sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT);
sqlite3_free(zText);
}
}
if( rc!=SQLITE_OK ){
if( zErr ){
sqlite3_result_error(pCtx, zErr, -1);
sqlite3_free(zErr);
}else{
sqlite3_result_error_code(pCtx, rc);
}
}
sqlite3_free((void *)azConfig);
sqlite3Fts5ConfigFree(pConfig);
sqlite3Fts5ExprFree(pExpr);
}
static void fts5ExprFunctionHr(
sqlite3_context *pCtx, /* Function call context */
int nArg, /* Number of args */
sqlite3_value **apVal /* Function arguments */
){
fts5ExprFunction(pCtx, nArg, apVal, 0);
}
static void fts5ExprFunctionTcl(
sqlite3_context *pCtx, /* Function call context */
int nArg, /* Number of args */
sqlite3_value **apVal /* Function arguments */
){
fts5ExprFunction(pCtx, nArg, apVal, 1);
}
/*
** The implementation of an SQLite user-defined-function that accepts a
** single integer as an argument. If the integer is an alpha-numeric
** unicode code point, 1 is returned. Otherwise 0.
*/
static void fts5ExprIsAlnum(
sqlite3_context *pCtx, /* Function call context */
int nArg, /* Number of args */
sqlite3_value **apVal /* Function arguments */
){
int iCode;
u8 aArr[32];
if( nArg!=1 ){
sqlite3_result_error(pCtx,
"wrong number of arguments to function fts5_isalnum", -1
);
return;
}
memset(aArr, 0, sizeof(aArr));
sqlite3Fts5UnicodeCatParse("L*", aArr);
sqlite3Fts5UnicodeCatParse("N*", aArr);
sqlite3Fts5UnicodeCatParse("Co", aArr);
iCode = sqlite3_value_int(apVal[0]);
sqlite3_result_int(pCtx, aArr[sqlite3Fts5UnicodeCategory((u32)iCode)]);
}
static void fts5ExprFold(
sqlite3_context *pCtx, /* Function call context */
int nArg, /* Number of args */
sqlite3_value **apVal /* Function arguments */
){
if( nArg!=1 && nArg!=2 ){
sqlite3_result_error(pCtx,
"wrong number of arguments to function fts5_fold", -1
);
}else{
int iCode;
int bRemoveDiacritics = 0;
iCode = sqlite3_value_int(apVal[0]);
if( nArg==2 ) bRemoveDiacritics = sqlite3_value_int(apVal[1]);
sqlite3_result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics));
}
}
#endif /* if SQLITE_TEST || SQLITE_FTS5_DEBUG */
/*
** This is called during initialization to register the fts5_expr() scalar
** UDF with the SQLite handle passed as the only argument.
*/
int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){
#if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
struct Fts5ExprFunc {
const char *z;
void (*x)(sqlite3_context*,int,sqlite3_value**);
} aFunc[] = {
{ "fts5_expr", fts5ExprFunctionHr },
{ "fts5_expr_tcl", fts5ExprFunctionTcl },
{ "fts5_isalnum", fts5ExprIsAlnum },
{ "fts5_fold", fts5ExprFold },
};
int i;
int rc = SQLITE_OK;
void *pCtx = (void*)pGlobal;
for(i=0; rc==SQLITE_OK && i<ArraySize(aFunc); i++){
struct Fts5ExprFunc *p = &aFunc[i];
rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0);
}
#else
int rc = SQLITE_OK;
UNUSED_PARAM2(pGlobal,db);
#endif
/* Avoid warnings indicating that sqlite3Fts5ParserTrace() and
** sqlite3Fts5ParserFallback() are unused */
#ifndef NDEBUG
(void)sqlite3Fts5ParserTrace;
#endif
(void)sqlite3Fts5ParserFallback;
return rc;
}
/*
** Return the number of phrases in expression pExpr.
*/
int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){
return (pExpr ? pExpr->nPhrase : 0);
}
/*
** Return the number of terms in the iPhrase'th phrase in pExpr.
*/
int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){
if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0;
return pExpr->apExprPhrase[iPhrase]->nTerm;
}
/*
** This function is used to access the current position list for phrase
** iPhrase.
*/
int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){
int nRet;
Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
Fts5ExprNode *pNode = pPhrase->pNode;
if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){
*pa = pPhrase->poslist.p;
nRet = pPhrase->poslist.n;
}else{
*pa = 0;
nRet = 0;
}
return nRet;
}
struct Fts5PoslistPopulator {
Fts5PoslistWriter writer;
int bOk; /* True if ok to populate */
int bMiss;
};
/*
** Clear the position lists associated with all phrases in the expression
** passed as the first argument. Argument bLive is true if the expression
** might be pointing to a real entry, otherwise it has just been reset.
**
** At present this function is only used for detail=col and detail=none
** fts5 tables. This implies that all phrases must be at most 1 token
** in size, as phrase matches are not supported without detail=full.
*/
Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr *pExpr, int bLive){
Fts5PoslistPopulator *pRet;
pRet = sqlite3_malloc64(sizeof(Fts5PoslistPopulator)*pExpr->nPhrase);
if( pRet ){
int i;
memset(pRet, 0, sizeof(Fts5PoslistPopulator)*pExpr->nPhrase);
for(i=0; i<pExpr->nPhrase; i++){
Fts5Buffer *pBuf = &pExpr->apExprPhrase[i]->poslist;
Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode;
assert( pExpr->apExprPhrase[i]->nTerm<=1 );
if( bLive &&
(pBuf->n==0 || pNode->iRowid!=pExpr->pRoot->iRowid || pNode->bEof)
){
pRet[i].bMiss = 1;
}else{
pBuf->n = 0;
}
}
}
return pRet;
}
struct Fts5ExprCtx {
Fts5Expr *pExpr;
Fts5PoslistPopulator *aPopulator;
i64 iOff;
};
typedef struct Fts5ExprCtx Fts5ExprCtx;
/*
** TODO: Make this more efficient!
*/
static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){
int i;
for(i=0; i<pColset->nCol; i++){
if( pColset->aiCol[i]==iCol ) return 1;
}
return 0;
}
/*
** pToken is a buffer nToken bytes in size that may or may not contain
** an embedded 0x00 byte. If it does, return the number of bytes in
** the buffer before the 0x00. If it does not, return nToken.
*/
static int fts5QueryTerm(const char *pToken, int nToken){
int ii;
for(ii=0; ii<nToken && pToken[ii]; ii++){}
return ii;
}
static int fts5ExprPopulatePoslistsCb(
void *pCtx, /* Copy of 2nd argument to xTokenize() */
int tflags, /* Mask of FTS5_TOKEN_* flags */
const char *pToken, /* Pointer to buffer containing token */
int nToken, /* Size of token in bytes */
int iUnused1, /* Byte offset of token within input text */
int iUnused2 /* Byte offset of end of token within input text */
){
Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx;
Fts5Expr *pExpr = p->pExpr;
int i;
int nQuery = nToken;
i64 iRowid = pExpr->pRoot->iRowid;
UNUSED_PARAM2(iUnused1, iUnused2);
if( nQuery>FTS5_MAX_TOKEN_SIZE ) nQuery = FTS5_MAX_TOKEN_SIZE;
if( pExpr->pConfig->bTokendata ){
nQuery = fts5QueryTerm(pToken, nQuery);
}
if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++;
for(i=0; i<pExpr->nPhrase; i++){
Fts5ExprTerm *pT;
if( p->aPopulator[i].bOk==0 ) continue;
for(pT=&pExpr->apExprPhrase[i]->aTerm[0]; pT; pT=pT->pSynonym){
if( (pT->nQueryTerm==nQuery || (pT->nQueryTerm<nQuery && pT->bPrefix))
&& memcmp(pT->pTerm, pToken, pT->nQueryTerm)==0
){
int rc = sqlite3Fts5PoslistWriterAppend(
&pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff
);
if( rc==SQLITE_OK && pExpr->pConfig->bTokendata && !pT->bPrefix ){
int iCol = p->iOff>>32;
int iTokOff = p->iOff & 0x7FFFFFFF;
rc = sqlite3Fts5IndexIterWriteTokendata(
pT->pIter, pToken, nToken, iRowid, iCol, iTokOff
);
}
if( rc ) return rc;
break;
}
}
}
return SQLITE_OK;
}
int sqlite3Fts5ExprPopulatePoslists(
Fts5Config *pConfig,
Fts5Expr *pExpr,
Fts5PoslistPopulator *aPopulator,
int iCol,
const char *z, int n
){
int i;
Fts5ExprCtx sCtx;
sCtx.pExpr = pExpr;
sCtx.aPopulator = aPopulator;
sCtx.iOff = (((i64)iCol) << 32) - 1;
for(i=0; i<pExpr->nPhrase; i++){
Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode;
Fts5Colset *pColset = pNode->pNear->pColset;
if( (pColset && 0==fts5ExprColsetTest(pColset, iCol))
|| aPopulator[i].bMiss
){
aPopulator[i].bOk = 0;
}else{
aPopulator[i].bOk = 1;
}
}
return sqlite3Fts5Tokenize(pConfig,
FTS5_TOKENIZE_DOCUMENT, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb
);
}
static void fts5ExprClearPoslists(Fts5ExprNode *pNode){
if( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING ){
pNode->pNear->apPhrase[0]->poslist.n = 0;
}else{
int i;
for(i=0; i<pNode->nChild; i++){
fts5ExprClearPoslists(pNode->apChild[i]);
}
}
}
static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){
pNode->iRowid = iRowid;
pNode->bEof = 0;
switch( pNode->eType ){
case FTS5_TERM:
case FTS5_STRING:
return (pNode->pNear->apPhrase[0]->poslist.n>0);
case FTS5_AND: {
int i;
for(i=0; i<pNode->nChild; i++){
if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){
fts5ExprClearPoslists(pNode);
return 0;
}
}
break;
}
case FTS5_OR: {
int i;
int bRet = 0;
for(i=0; i<pNode->nChild; i++){
if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){
bRet = 1;
}
}
return bRet;
}
default: {
assert( pNode->eType==FTS5_NOT );
if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid)
|| 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid)
){
fts5ExprClearPoslists(pNode);
return 0;
}
break;
}
}
return 1;
}
void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){
fts5ExprCheckPoslists(pExpr->pRoot, iRowid);
}
/*
** This function is only called for detail=columns tables.
*/
int sqlite3Fts5ExprPhraseCollist(
Fts5Expr *pExpr,
int iPhrase,
const u8 **ppCollist,
int *pnCollist
){
Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
Fts5ExprNode *pNode = pPhrase->pNode;
int rc = SQLITE_OK;
assert( iPhrase>=0 && iPhrase<pExpr->nPhrase );
assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
if( pNode->bEof==0
&& pNode->iRowid==pExpr->pRoot->iRowid
&& pPhrase->poslist.n>0
){
Fts5ExprTerm *pTerm = &pPhrase->aTerm[0];
if( pTerm->pSynonym ){
Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1];
rc = fts5ExprSynonymList(
pTerm, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist
);
}else{
*ppCollist = pPhrase->aTerm[0].pIter->pData;
*pnCollist = pPhrase->aTerm[0].pIter->nData;
}
}else{
*ppCollist = 0;
*pnCollist = 0;
}
return rc;
}
/*
** Does the work of the fts5_api.xQueryToken() API method.
*/
int sqlite3Fts5ExprQueryToken(
Fts5Expr *pExpr,
int iPhrase,
int iToken,
const char **ppOut,
int *pnOut
){
Fts5ExprPhrase *pPhrase = 0;
if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){
return SQLITE_RANGE;
}
pPhrase = pExpr->apExprPhrase[iPhrase];
if( iToken<0 || iToken>=pPhrase->nTerm ){
return SQLITE_RANGE;
}
*ppOut = pPhrase->aTerm[iToken].pTerm;
*pnOut = pPhrase->aTerm[iToken].nFullTerm;
return SQLITE_OK;
}
/*
** Does the work of the fts5_api.xInstToken() API method.
*/
int sqlite3Fts5ExprInstToken(
Fts5Expr *pExpr,
i64 iRowid,
int iPhrase,
int iCol,
int iOff,
int iToken,
const char **ppOut,
int *pnOut
){
Fts5ExprPhrase *pPhrase = 0;
Fts5ExprTerm *pTerm = 0;
int rc = SQLITE_OK;
if( iPhrase<0 || iPhrase>=pExpr->nPhrase ){
return SQLITE_RANGE;
}
pPhrase = pExpr->apExprPhrase[iPhrase];
if( iToken<0 || iToken>=pPhrase->nTerm ){
return SQLITE_RANGE;
}
pTerm = &pPhrase->aTerm[iToken];
if( pTerm->bPrefix==0 ){
if( pExpr->pConfig->bTokendata ){
rc = sqlite3Fts5IterToken(
pTerm->pIter, iRowid, iCol, iOff+iToken, ppOut, pnOut
);
}else{
*ppOut = pTerm->pTerm;
*pnOut = pTerm->nFullTerm;
}
}
return rc;
}
/*
** Clear the token mappings for all Fts5IndexIter objects mannaged by
** the expression passed as the only argument.
*/
void sqlite3Fts5ExprClearTokens(Fts5Expr *pExpr){
int ii;
for(ii=0; ii<pExpr->nPhrase; ii++){
Fts5ExprTerm *pT;
for(pT=&pExpr->apExprPhrase[ii]->aTerm[0]; pT; pT=pT->pSynonym){
sqlite3Fts5IndexIterClearTokendata(pT->pIter);
}
}
}