0
0
mirror of https://github.com/tursodatabase/libsql.git synced 2025-06-16 11:02:56 +00:00
Files
.cargo
.config
.github
bindings
bottomless
bottomless-cli
docker-compose
docs
libsql
libsql-ffi
libsql-hrana
libsql-replication
libsql-server
libsql-shell
libsql-sqlite3
art
autoconf
benchmark
contrib
crates
doc
ext
async
consio
crr
expert
fts3
fts5
test
fts5_common.tcl
fts5aa.test
fts5ab.test
fts5ac.test
fts5ad.test
fts5ae.test
fts5af.test
fts5ag.test
fts5ah.test
fts5ai.test
fts5aj.test
fts5ak.test
fts5al.test
fts5alter.test
fts5auto.test
fts5aux.test
fts5auxdata.test
fts5bigid.test
fts5bigpl.test
fts5bigtok.test
fts5cat.test
fts5circref.test
fts5colset.test
fts5columnsize.test
fts5config.test
fts5conflict.test
fts5connect.test
fts5content.test
fts5contentless.test
fts5contentless2.test
fts5contentless3.test
fts5contentless4.test
fts5contentless5.test
fts5corrupt.test
fts5corrupt2.test
fts5corrupt3.test
fts5corrupt4.test
fts5corrupt5.test
fts5corrupt6.test
fts5corrupt7.test
fts5delete.test
fts5detail.test
fts5determin.test
fts5dlidx.test
fts5doclist.test
fts5ea.test
fts5eb.test
fts5fault1.test
fts5fault2.test
fts5fault3.test
fts5fault4.test
fts5fault5.test
fts5fault6.test
fts5fault7.test
fts5fault8.test
fts5fault9.test
fts5faultA.test
fts5faultB.test
fts5faultD.test
fts5faultE.test
fts5faultF.test
fts5faultG.test
fts5faultH.test
fts5first.test
fts5full.test
fts5fuzz1.test
fts5hash.test
fts5integrity.test
fts5interrupt.test
fts5lastrowid.test
fts5leftjoin.test
fts5limits.test
fts5matchinfo.test
fts5merge.test
fts5merge2.test
fts5misc.test
fts5multi.test
fts5multiclient.test
fts5near.test
fts5onepass.test
fts5optimize.test
fts5optimize2.test
fts5optimize3.test
fts5origintext.test
fts5origintext2.test
fts5origintext3.test
fts5origintext4.test
fts5origintext5.test
fts5phrase.test
fts5plan.test
fts5porter.test
fts5porter2.test
fts5prefix.test
fts5prefix2.test
fts5query.test
fts5rank.test
fts5rebuild.test
fts5restart.test
fts5rowid.test
fts5savepoint.test
fts5secure.test
fts5secure2.test
fts5secure3.test
fts5secure4.test
fts5secure5.test
fts5secure6.test
fts5secure7.test
fts5secure8.test
fts5securefault.test
fts5simple.test
fts5simple2.test
fts5simple3.test
fts5synonym.test
fts5synonym2.test
fts5tok1.test
fts5tok2.test
fts5tokenizer.test
fts5tokenizer2.test
fts5trigram.test
fts5trigram2.test
fts5ubsan.test
fts5umlaut.test
fts5unicode.test
fts5unicode2.test
fts5unicode3.test
fts5unicode4.test
fts5unindexed.test
fts5update.test
fts5version.test
fts5vocab.test
fts5vocab2.test
tool
extract_api_docs.tcl
fts5.h
fts5Int.h
fts5_aux.c
fts5_buffer.c
fts5_config.c
fts5_expr.c
fts5_hash.c
fts5_index.c
fts5_main.c
fts5_storage.c
fts5_tcl.c
fts5_test_mi.c
fts5_test_tok.c
fts5_tokenize.c
fts5_unicode2.c
fts5_varint.c
fts5_vocab.c
fts5parse.y
mkportersteps.tcl
icu
jni
libsql-wasi
lsm1
misc
rbu
recover
repair
rtree
session
udf
userauth
vwal
wasi
wasm
README.md
mptest
src
test
tool
vsixtest
.gitignore
Dockerfile-wasm-udf
LIBSQL_VERSION
LICENSE.md
Makefile.in
Makefile.linux-gcc
Makefile.msc
README-SQLite.md
VERSION
aclocal.m4
config.guess
config.sub
configure
configure.ac
install-sh
libsql.pc.in
ltmain.sh
magic.txt
main.mk
manifest
manifest.uuid
mksourceid_libsql
spec.template
sqlite.pc.in
sqlite3.1
sqlite3.pc.in
sqlite_cfg.h.in
libsql-storage
libsql-storage-server
libsql-sys
libsql-wal
tools
vendored
xtask
.dockerignore
.env
.gitignore
.gitmodules
CODE_OF_CONDUCT.md
CONTRIBUTING.md
Cargo.lock
Cargo.toml
Dockerfile
Dockerfile.dev
LICENSE.md
README-libsql.md
README.md
docker-entrypoint.sh
docker-wrapper.sh
fly.toml
rust-toolchain.toml
libsql/libsql-sqlite3/ext/fts5/test/fts5unicode2.test

589 lines
18 KiB
Plaintext
Raw Normal View History

# 2012 May 25
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# The tests in this file focus on testing the "unicode" FTS tokenizer.
#
# This is a modified copy of FTS4 test file "fts4_unicode.test".
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5unicode2
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc do_unicode_token_test {tn input res} {
uplevel [list do_test $tn [list \
sqlite3_fts5_tokenize -subst db "unicode61 remove_diacritics 0" $input
] [list {*}$res]]
}
proc do_unicode_token_test2 {tn input res} {
uplevel [list do_test $tn [list \
sqlite3_fts5_tokenize -subst db "unicode61" $input
] [list {*}$res]]
}
proc do_unicode_token_test3 {tn args} {
set tokenizer [concat unicode61 {*}[lrange $args 0 end-2]]
set input [lindex $args end-1]
set res [lindex $args end]
uplevel [list do_test $tn [list \
sqlite3_fts5_tokenize -subst db $tokenizer $input
] [list {*}$res]]
}
do_unicode_token_test 1.0 {a B c D} {a a b B c c d D}
do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \
"\uE4 \uC4 \uF6 \uD6 \uFC \uDC"
do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \
"x\uE4x x\uC4x x\uF6x x\uD6x x\uFCx x\uDCx"
# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.
do_unicode_token_test 1.3 "\uDF" "\uDF \uDF"
do_unicode_token_test 1.4 "\u1E9E" "\uDF \u1E9E"
do_unicode_token_test 1.5 "The quick brown fox" {
the The quick quick brown brown fox fox
}
do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" {
the The quick quick brown brown fox fox
}
do_unicode_token_test2 1.7 {a B c D} {a a b B c c d D}
do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "a \uC4 o \uD6 u \uDC"
do_unicode_token_test2 1.9 "x\uC4x x\uD6x x\uDCx" \
"xax x\uC4x xox x\uD6x xux x\uDCx"
# Check that diacritics are removed if remove_diacritics=1 is specified.
# And that they do not break tokens.
do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx"
# Title-case mappings work
do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5"
do_unicode_token_test 1.12 "\u00C1abc\u00C2 \u00D1def\u00C3" \
"\u00E1abc\u00E2 \u00C1abc\u00C2 \u00F1def\u00E3 \u00D1def\u00C3"
do_unicode_token_test 1.13 "\u00A2abc\u00A3 \u00A4def\u00A5" \
"abc abc def def"
#-------------------------------------------------------------------------
#
set docs [list {
Enhance the INSERT syntax to allow multiple rows to be inserted via the
VALUES clause.
} {
Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause.
} {
Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp().
} {
Added the sqlite3_db_readonly() interface.
} {
Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the
ability to add new PRAGMA statements or to override built-in PRAGMAs.
} {
Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
the same row that contains the maximum x value.
} {
Added support for the FTS4 languageid option.
} {
Documented support for the FTS4 content option. This feature has actually
been in the code since version 3.7.9 but is only now considered to be
officially supported.
} {
Pending statements no longer block ROLLBACK. Instead, the pending statement
will return SQLITE_ABORT upon next access after the ROLLBACK.
} {
Improvements to the handling of CSV inputs in the command-line shell
} {
Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be
incorrectly converted into an INNER JOIN if the WHERE clause indexable terms
connected by OR.
}]
set map(a) [list "\u00C4" "\u00E4"] ; # LATIN LETTER A WITH DIAERESIS
set map(e) [list "\u00CB" "\u00EB"] ; # LATIN LETTER E WITH DIAERESIS
set map(i) [list "\u00CF" "\u00EF"] ; # LATIN LETTER I WITH DIAERESIS
set map(o) [list "\u00D6" "\u00F6"] ; # LATIN LETTER O WITH DIAERESIS
set map(u) [list "\u00DC" "\u00FC"] ; # LATIN LETTER U WITH DIAERESIS
set map(y) [list "\u0178" "\u00FF"] ; # LATIN LETTER Y WITH DIAERESIS
set map(h) [list "\u1E26" "\u1E27"] ; # LATIN LETTER H WITH DIAERESIS
set map(w) [list "\u1E84" "\u1E85"] ; # LATIN LETTER W WITH DIAERESIS
set map(x) [list "\u1E8C" "\u1E8D"] ; # LATIN LETTER X WITH DIAERESIS
foreach k [array names map] {
lappend mappings [string toupper $k] [lindex $map($k) 0]
lappend mappings $k [lindex $map($k) 1]
}
proc mapdoc {doc} {
set doc [regsub -all {[[:space:]]+} $doc " "]
string map $::mappings [string trim $doc]
}
do_test 2.0 {
execsql { CREATE VIRTUAL TABLE t2 USING fts5(tokenize=unicode61, x); }
foreach doc $docs {
set d [mapdoc $doc]
execsql { INSERT INTO t2 VALUES($d) }
}
} {}
do_test 2.1 {
set q [mapdoc "row"]
execsql { SELECT * FROM t2 WHERE t2 MATCH $q }
} [list [mapdoc {
Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
the same row that contains the maximum x value.
}]]
foreach {tn query snippet} {
2 "row" {
...returns the value of y on the same [row] that contains
the maximum x value.
}
3 "ROW" {
...returns the value of y on the same [row] that contains
the maximum x value.
}
4 "rollback" {
Pending statements no longer block [ROLLBACK]. Instead, the pending
statement will return SQLITE_ABORT upon...
}
5 "rOllback" {
Pending statements no longer block [ROLLBACK]. Instead, the pending
statement will return SQLITE_ABORT upon...
}
6 "lang*" {
Added support for the FTS4 [languageid] option.
}
} {
do_test 2.$tn {
set q [mapdoc $query]
execsql {
SELECT snippet(t2, -1, '[', ']', '...', 15) FROM t2 WHERE t2 MATCH $q
}
} [list [mapdoc $snippet]]
}
#-------------------------------------------------------------------------
# Make sure the unicode61 tokenizer does not crash if it is passed a
# NULL pointer.
reset_db
do_execsql_test 3.1 {
CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x, y);
INSERT INTO t1 VALUES(NULL, 'a b c');
}
do_execsql_test 3.2 {
SELECT snippet(t1, -1, '[', ']', '...', 15) FROM t1 WHERE t1 MATCH 'b'
} {{a [b] c}}
do_execsql_test 3.3 {
BEGIN;
DELETE FROM t1;
INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b');
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 VALUES('a b c', NULL);
INSERT INTO t1 VALUES('a x c', NULL);
COMMIT;
}
do_execsql_test 3.4 {
SELECT * FROM t1 WHERE t1 MATCH 'a b';
} {{a b c} {}}
#-------------------------------------------------------------------------
#
reset_db
do_test 4.1 {
set a "abc\uFFFEdef"
set b "abc\uD800def"
set c "\uFFFEdef"
set d "\uD800def"
execsql {
CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x);
INSERT INTO t1 VALUES($a);
INSERT INTO t1 VALUES($b);
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
}
execsql "CREATE VIRTUAL TABLE t8 USING fts5(
a, b, tokenize=\"unicode61 separators '\uFFFE\uD800\u00BF'\"
)"
} {}
do_test 4.2 {
set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}]
set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}]
set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
execsql {
INSERT INTO t1 VALUES($a);
INSERT INTO t1 VALUES($b);
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
}
} {}
do_test 4.3 {
set a [binary format c* {0xF7 0xBF 0xBF 0xBF}]
set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}]
set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}]
set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}]
execsql {
INSERT INTO t1 VALUES($a);
INSERT INTO t1 VALUES($b);
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
}
} {}
do_test 4.4 {
sqlite3_exec_hex db {
CREATE VIRTUAL TABLE t9 USING fts5(a, b,
tokenize="unicode61 separators '%C09004'"
);
INSERT INTO t9(a) VALUES('abc%88def %89ghi%90');
}
} {0 {}}
#-------------------------------------------------------------------------
do_unicode_token_test3 5.1 {tokenchars {}} {
sqlite3_reset sqlite3_column_int
} {
sqlite3 sqlite3
reset reset
sqlite3 sqlite3
column column
int int
}
do_unicode_token_test3 5.2 {tokenchars _} {
sqlite3_reset sqlite3_column_int
} {
sqlite3_reset sqlite3_reset
sqlite3_column_int sqlite3_column_int
}
do_unicode_token_test3 5.3 {separators xyz} {
Laotianxhorseyrunszfast
} {
laotian Laotian
horse horse
runs runs
fast fast
}
do_unicode_token_test3 5.4 {tokenchars xyz} {
Laotianxhorseyrunszfast
} {
laotianxhorseyrunszfast Laotianxhorseyrunszfast
}
do_unicode_token_test3 5.5 {tokenchars _} {separators zyx} {
sqlite3_resetxsqlite3_column_intyhonda_phantom
} {
sqlite3_reset sqlite3_reset
sqlite3_column_int sqlite3_column_int
honda_phantom honda_phantom
}
do_unicode_token_test3 5.6 "separators \u05D1" "abc\u05D1def" {
abc abc def def
}
do_unicode_token_test3 5.7 \
"tokenchars \u2444\u2445" \
"separators \u05D0\u05D1\u05D2" \
"\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \
[list \
\u2444fre\u2445sh \u2444fre\u2445sh \
water water \
fish fish \
\u2445timer \u2445timer \
]
# Check that it is not possible to add a standalone diacritic codepoint
# to either separators or tokenchars.
do_unicode_token_test3 5.8 "separators \u0301" \
"hello\u0301world \u0301helloworld" \
"helloworld hello\u0301world helloworld helloworld"
do_unicode_token_test3 5.9 "tokenchars \u0301" \
"hello\u0301world \u0301helloworld" \
"helloworld hello\u0301world helloworld helloworld"
do_unicode_token_test3 5.10 "separators \u0301" \
"remove_diacritics 0" \
"hello\u0301world \u0301helloworld" \
"hello\u0301world hello\u0301world helloworld helloworld"
do_unicode_token_test3 5.11 "tokenchars \u0301" \
"remove_diacritics 0" \
"hello\u0301world \u0301helloworld" \
"hello\u0301world hello\u0301world helloworld helloworld"
#-------------------------------------------------------------------------
proc do_tokenize {tokenizer txt} {
set res [list]
foreach {b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] {
lappend res $b
}
set res
}
# Argument $lCodepoint must be a list of codepoints (integers) that
# correspond to whitespace characters. This command creates a string
# $W from the codepoints, then tokenizes "${W}hello{$W}world${W}"
# using tokenizer $tokenizer. The test passes if the tokenizer successfully
# extracts the two 5 character tokens.
#
proc do_isspace_test {tn tokenizer lCp} {
set whitespace [format [string repeat %c [llength $lCp]] {*}$lCp]
set txt "${whitespace}hello${whitespace}world${whitespace}"
uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}]
}
set tokenizers [list unicode61]
#ifcapable icu { lappend tokenizers icu }
# Some tests to check that the tokenizers can both identify white-space
# codepoints. All codepoints tested below are of type "Zs" in the
# UnicodeData.txt file.
foreach T $tokenizers {
do_isspace_test 6.$T.1 $T 32
do_isspace_test 6.$T.2 $T 160
do_isspace_test 6.$T.3 $T 5760
do_isspace_test 6.$T.4 $T 6158
do_isspace_test 6.$T.5 $T 8192
do_isspace_test 6.$T.6 $T 8193
do_isspace_test 6.$T.7 $T 8194
do_isspace_test 6.$T.8 $T 8195
do_isspace_test 6.$T.9 $T 8196
do_isspace_test 6.$T.10 $T 8197
do_isspace_test 6.$T.11 $T 8198
do_isspace_test 6.$T.12 $T 8199
do_isspace_test 6.$T.13 $T 8200
do_isspace_test 6.$T.14 $T 8201
do_isspace_test 6.$T.15 $T 8202
do_isspace_test 6.$T.16 $T 8239
do_isspace_test 6.$T.17 $T 8287
do_isspace_test 6.$T.18 $T 12288
do_isspace_test 6.$T.19 $T {32 160 5760 6158}
do_isspace_test 6.$T.20 $T {8192 8193 8194 8195}
do_isspace_test 6.$T.21 $T {8196 8197 8198 8199}
do_isspace_test 6.$T.22 $T {8200 8201 8202 8239}
do_isspace_test 6.$T.23 $T {8287 12288}
}
#-------------------------------------------------------------------------
# Test that the private use ranges are treated as alphanumeric.
#
foreach {tn1 c} {
1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff
} {
foreach {tn2 config res} {
1 "" "hello*world hello*world"
2 "separators *" "hello hello world world"
} {
set config [string map [list * $c] $config]
set input [string map [list * $c] "hello*world"]
set output [string map [list * $c] $res]
do_unicode_token_test3 7.$tn1.$tn2 {*}$config $input $output
}
}
#-------------------------------------------------------------------------
# Cursory test of remove_diacritics=0.
#
# 00C4;LATIN CAPITAL LETTER A WITH DIAERESIS
# 00D6;LATIN CAPITAL LETTER O WITH DIAERESIS
# 00E4;LATIN SMALL LETTER A WITH DIAERESIS
# 00F6;LATIN SMALL LETTER O WITH DIAERESIS
#
do_execsql_test 8.1.1 "
CREATE VIRTUAL TABLE t3 USING fts5(
content, tokenize='unicode61 remove_diacritics 1'
);
INSERT INTO t3 VALUES('o');
INSERT INTO t3 VALUES('a');
INSERT INTO t3 VALUES('O');
INSERT INTO t3 VALUES('A');
INSERT INTO t3 VALUES('\xD6');
INSERT INTO t3 VALUES('\xC4');
INSERT INTO t3 VALUES('\xF6');
INSERT INTO t3 VALUES('\xE4');
"
do_execsql_test 8.1.2 {
SELECT rowid FROM t3 WHERE t3 MATCH 'o' ORDER BY rowid ASC;
} {1 3 5 7}
do_execsql_test 8.1.3 {
SELECT rowid FROM t3 WHERE t3 MATCH 'a' ORDER BY rowid ASC;
} {2 4 6 8}
do_execsql_test 8.2.1 {
CREATE VIRTUAL TABLE t4 USING fts5(
content, tokenize='unicode61 remove_diacritics 0'
);
INSERT INTO t4 SELECT * FROM t3 ORDER BY rowid ASC;
}
do_execsql_test 8.2.2 {
SELECT rowid FROM t4 WHERE t4 MATCH 'o' ORDER BY rowid ASC;
} {1 3}
do_execsql_test 8.2.3 {
SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC;
} {2 4}
#-------------------------------------------------------------------------
#
if 0 {
foreach {tn sql} {
1 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]);
CREATE VIRTUAL TABLE t6 USING fts4(
tokenize=unicode61 [tokenchars=="] "tokenchars=[]");
CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 [separators=x\xC4]);
}
2 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 "tokenchars= .");
CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 "tokenchars=[=""]");
CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 "separators=x\xC4");
}
3 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 'tokenchars= .');
CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 'tokenchars=="[]');
CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 'separators=x\xC4');
}
4 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 `tokenchars= .`);
CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 `tokenchars=[="]`);
CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 `separators=x\xC4`);
}
} {
do_execsql_test 9.$tn.0 {
DROP TABLE IF EXISTS t5;
DROP TABLE IF EXISTS t5aux;
DROP TABLE IF EXISTS t6;
DROP TABLE IF EXISTS t6aux;
DROP TABLE IF EXISTS t7;
DROP TABLE IF EXISTS t7aux;
}
do_execsql_test 9.$tn.1 $sql
do_execsql_test 9.$tn.2 {
CREATE VIRTUAL TABLE t5aux USING fts4aux(t5);
INSERT INTO t5 VALUES('one two three/four.five.six');
SELECT * FROM t5aux;
} {
four.five.six * 1 1 four.five.six 0 1 1
{one two three} * 1 1 {one two three} 0 1 1
}
do_execsql_test 9.$tn.3 {
CREATE VIRTUAL TABLE t6aux USING fts4aux(t6);
INSERT INTO t6 VALUES('alpha=beta"gamma/delta[epsilon]zeta');
SELECT * FROM t6aux;
} {
{alpha=beta"gamma} * 1 1 {alpha=beta"gamma} 0 1 1
{delta[epsilon]zeta} * 1 1 {delta[epsilon]zeta} 0 1 1
}
do_execsql_test 9.$tn.4 {
CREATE VIRTUAL TABLE t7aux USING fts4aux(t7);
INSERT INTO t7 VALUES('alephxbeth\xC4gimel');
SELECT * FROM t7aux;
} {
aleph * 1 1 aleph 0 1 1
beth * 1 1 beth 0 1 1
gimel * 1 1 gimel 0 1 1
}
}
# Check that multiple options are handled correctly.
#
do_execsql_test 10.1 {
DROP TABLE IF EXISTS t1;
CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61
"tokenchars=xyz" "tokenchars=.=" "separators=.=" "separators=xy"
"separators=a" "separators=a" "tokenchars=a" "tokenchars=a"
);
INSERT INTO t1 VALUES('oneatwoxthreeyfour');
INSERT INTO t1 VALUES('a.single=word');
CREATE VIRTUAL TABLE t1aux USING fts4aux(t1);
SELECT * FROM t1aux;
} {
.single=word * 1 1 .single=word 0 1 1
four * 1 1 four 0 1 1
one * 1 1 one 0 1 1
three * 1 1 three 0 1 1
two * 1 1 two 0 1 1
}
# Test that case folding happens after tokenization, not before.
#
do_execsql_test 10.2 {
DROP TABLE IF EXISTS t2;
CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61 "separators=aB");
INSERT INTO t2 VALUES('oneatwoBthree');
INSERT INTO t2 VALUES('onebtwoAthree');
CREATE VIRTUAL TABLE t2aux USING fts4aux(t2);
SELECT * FROM t2aux;
} {
one * 1 1 one 0 1 1
onebtwoathree * 1 1 onebtwoathree 0 1 1
three * 1 1 three 0 1 1
two * 1 1 two 0 1 1
}
# Test that the tokenchars and separators options work with the
# fts3tokenize table.
#
do_execsql_test 11.1 {
CREATE VIRTUAL TABLE ft1 USING fts3tokenize(
"unicode61", "tokenchars=@.", "separators=1234567890"
);
SELECT token FROM ft1 WHERE input = 'berlin@street123sydney.road';
} {
berlin@street sydney.road
}
}
finish_test