mirror of
https://github.com/tursodatabase/libsql.git
synced 2025-02-24 10:55:35 +00:00
A common complain with libSQL is how to run extensions. The main mechanism, with a .so, has a lot of issues around how those .so are distributed. The most common extensions are the ones in the sqlean package. We can improve this experience by bundling them in our sqlite build. Not all SQLean extensions are kosher: some of them, like fileio, use the vfs. Others, are deemed too complex. The extensions included here are a subset that we deem important enough, and low risk enough, to just be a part of the main bundle.
153 lines
5.9 KiB
C
153 lines
5.9 KiB
C
/* MIT License
|
|
*
|
|
* Copyright (c) 2023 Tyge Løvset
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in all
|
|
* copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
// UTF-8 string handling.
|
|
|
|
#include <ctype.h>
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
|
|
#include "text/utf8/rune.h"
|
|
#include "text/utf8/utf8.h"
|
|
|
|
const uint8_t utf8_dtab[] = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7,
|
|
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
|
7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3, 3, 3,
|
|
3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8,
|
|
8, 8, 8, 0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12,
|
|
12, 12, 12, 12, 12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12,
|
|
24, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12,
|
|
12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12,
|
|
12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
|
};
|
|
|
|
// Encode/decode functions.
|
|
|
|
// utf8_decode decodes a byte as part of a utf8 codepoint.
|
|
uint32_t utf8_decode(utf8_decode_t* d, const uint32_t byte) {
|
|
const uint32_t type = utf8_dtab[byte];
|
|
d->codep = d->state ? (byte & 0x3fu) | (d->codep << 6) : (0xffU >> type) & byte;
|
|
return d->state = utf8_dtab[256 + d->state + type];
|
|
}
|
|
|
|
// utf8_encode encodes the utf8 codepoint c to s
|
|
// and returns the number of bytes written.
|
|
int utf8_encode(char* out, uint32_t c) {
|
|
if (c < 0x80U) {
|
|
out[0] = (char)c;
|
|
return 1;
|
|
} else if (c < 0x0800U) {
|
|
out[0] = (char)((c >> 6 & 0x1F) | 0xC0);
|
|
out[1] = (char)((c & 0x3F) | 0x80);
|
|
return 2;
|
|
} else if (c < 0x010000U) {
|
|
if ((c < 0xD800U) | (c >= 0xE000U)) {
|
|
out[0] = (char)((c >> 12 & 0x0F) | 0xE0);
|
|
out[1] = (char)((c >> 6 & 0x3F) | 0x80);
|
|
out[2] = (char)((c & 0x3F) | 0x80);
|
|
return 3;
|
|
}
|
|
} else if (c < 0x110000U) {
|
|
out[0] = (char)((c >> 18 & 0x07) | 0xF0);
|
|
out[1] = (char)((c >> 12 & 0x3F) | 0x80);
|
|
out[2] = (char)((c >> 6 & 0x3F) | 0x80);
|
|
out[3] = (char)((c & 0x3F) | 0x80);
|
|
return 4;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// String functions.
|
|
|
|
// utf8_at returns a pointer to the utf8 codepoint at index in s.
|
|
const char* utf8_at(const char* s, size_t n, size_t index) {
|
|
while ((index > 0) & (*s != 0) & (n-- != 0)) {
|
|
index -= (*++s & 0xC0) != 0x80;
|
|
}
|
|
return s;
|
|
}
|
|
|
|
// utf8_pos returns the byte position of the utf8 codepoint at index in s.
|
|
size_t utf8_pos(const char* s, size_t n, size_t index) {
|
|
return (size_t)(utf8_at(s, n, index) - s);
|
|
}
|
|
|
|
// utf8_len returns the number of utf8 codepoints in s.
|
|
size_t utf8_len(const char* s, size_t n) {
|
|
size_t size = 0;
|
|
while ((n-- != 0) & (*s != 0)) {
|
|
size += (*++s & 0xC0) != 0x80;
|
|
}
|
|
return size;
|
|
}
|
|
|
|
// utf8_peek returns the utf8 codepoint at the start of s.
|
|
uint32_t utf8_peek(const char* s) {
|
|
utf8_decode_t d = {.state = 0};
|
|
do {
|
|
utf8_decode(&d, (uint8_t)*s++);
|
|
} while (d.state);
|
|
return d.codep;
|
|
}
|
|
|
|
// utf8_peek_at returns the utf8 codepoint at the index pos from s.
|
|
uint32_t utf8_peek_at(const char* s, size_t n, size_t pos) {
|
|
return utf8_peek(utf8_at(s, n, pos));
|
|
}
|
|
|
|
// utf8_icmp compares the utf8 strings s1 and s2 case-insensitively.
|
|
int utf8_icmp(const char* s1, size_t n1, const char* s2, size_t n2) {
|
|
utf8_decode_t d1 = {.state = 0}, d2 = {.state = 0};
|
|
size_t j1 = 0, j2 = 0;
|
|
while ((j1 < n1) & (j2 < n2)) {
|
|
do {
|
|
utf8_decode(&d1, (uint8_t)s1[j1++]);
|
|
} while (d1.state);
|
|
do {
|
|
utf8_decode(&d2, (uint8_t)s2[j2++]);
|
|
} while (d2.state);
|
|
int32_t c = (int32_t)rune_casefold(d1.codep) - (int32_t)rune_casefold(d2.codep);
|
|
if (c || !s2[j2 - 1]) // OK if n1 and n2 are npos
|
|
return (int)c;
|
|
}
|
|
return (int)(n1 - n2);
|
|
}
|
|
|
|
// utf8_valid returns true if s is a valid utf8 string.
|
|
bool utf8_valid(const char* s, size_t n) {
|
|
utf8_decode_t d = {.state = 0};
|
|
while ((n-- != 0) & (*s != 0)) {
|
|
utf8_decode(&d, (uint8_t)*s++);
|
|
}
|
|
return d.state == 0;
|
|
}
|