mirror of
https://github.com/tursodatabase/libsql.git
synced 2025-02-24 10:55:35 +00:00
A common complain with libSQL is how to run extensions. The main mechanism, with a .so, has a lot of issues around how those .so are distributed. The most common extensions are the ones in the sqlean package. We can improve this experience by bundling them in our sqlite build. Not all SQLean extensions are kosher: some of them, like fileio, use the vfs. Others, are deemed too complex. The extensions included here are a subset that we deem important enough, and low risk enough, to just be a part of the main bundle.
160 lines
5.9 KiB
C
160 lines
5.9 KiB
C
#ifndef UTF8_GROUPS_H
|
|
#define UTF8_GROUPS_H
|
|
|
|
/* The tables below are extracted from the RE2 library */
|
|
#include <stdint.h>
|
|
#include "rune.h"
|
|
|
|
typedef struct {
|
|
uint16_t lo;
|
|
uint16_t hi;
|
|
} URange16;
|
|
|
|
typedef struct {
|
|
const URange16* r16;
|
|
int nr16;
|
|
} UGroup;
|
|
|
|
static const URange16 Cc_range16[] = {
|
|
// Control
|
|
{0, 31},
|
|
{127, 159},
|
|
};
|
|
|
|
static const URange16 Lt_range16[] = {
|
|
// Title case
|
|
{453, 453}, {456, 456}, {459, 459}, {498, 498}, {8072, 8079},
|
|
{8088, 8095}, {8104, 8111}, {8124, 8124}, {8140, 8140}, {8188, 8188},
|
|
};
|
|
|
|
static const URange16 Nd_range16[] = {
|
|
// Decimal number
|
|
{48, 57}, {1632, 1641}, {1776, 1785}, {1984, 1993}, {2406, 2415}, {2534, 2543},
|
|
{2662, 2671}, {2790, 2799}, {2918, 2927}, {3046, 3055}, {3174, 3183}, {3302, 3311},
|
|
{3430, 3439}, {3558, 3567}, {3664, 3673}, {3792, 3801}, {3872, 3881}, {4160, 4169},
|
|
{4240, 4249}, {6112, 6121}, {6160, 6169}, {6470, 6479}, {6608, 6617}, {6784, 6793},
|
|
{6800, 6809}, {6992, 7001}, {7088, 7097}, {7232, 7241}, {7248, 7257}, {42528, 42537},
|
|
{43216, 43225}, {43264, 43273}, {43472, 43481}, {43504, 43513}, {43600, 43609}, {44016, 44025},
|
|
{65296, 65305},
|
|
};
|
|
|
|
static const URange16 Nl_range16[] = {
|
|
// Number letter
|
|
{5870, 5872}, {8544, 8578}, {8581, 8584}, {12295, 12295},
|
|
{12321, 12329}, {12344, 12346}, {42726, 42735},
|
|
};
|
|
|
|
static const URange16 Pc_range16[] = {
|
|
// Connector punctuation
|
|
{95, 95}, {8255, 8256}, {8276, 8276}, {65075, 65076}, {65101, 65103}, {65343, 65343},
|
|
};
|
|
|
|
static const URange16 Pd_range16[] = {
|
|
// Dash punctuation
|
|
{45, 45}, {1418, 1418}, {1470, 1470}, {5120, 5120}, {6150, 6150}, {8208, 8213},
|
|
{11799, 11799}, {11802, 11802}, {11834, 11835}, {11840, 11840}, {11869, 11869}, {12316, 12316},
|
|
{12336, 12336}, {12448, 12448}, {65073, 65074}, {65112, 65112}, {65123, 65123}, {65293, 65293},
|
|
};
|
|
|
|
static const URange16 Pf_range16[] = {
|
|
// Final punctuation
|
|
{187, 187}, {8217, 8217}, {8221, 8221}, {8250, 8250}, {11779, 11779},
|
|
{11781, 11781}, {11786, 11786}, {11789, 11789}, {11805, 11805}, {11809, 11809},
|
|
};
|
|
|
|
static const URange16 Pi_range16[] = {
|
|
// Initial punctuation
|
|
{171, 171}, {8216, 8216}, {8219, 8220}, {8223, 8223}, {8249, 8249}, {11778, 11778},
|
|
{11780, 11780}, {11785, 11785}, {11788, 11788}, {11804, 11804}, {11808, 11808},
|
|
};
|
|
|
|
static const URange16 Sc_range16[] = {
|
|
// Currency symbol
|
|
{36, 36}, {162, 165}, {1423, 1423}, {1547, 1547}, {2046, 2047}, {2546, 2547},
|
|
{2555, 2555}, {2801, 2801}, {3065, 3065}, {3647, 3647}, {6107, 6107}, {8352, 8384},
|
|
{43064, 43064}, {65020, 65020}, {65129, 65129}, {65284, 65284}, {65504, 65505}, {65509, 65510},
|
|
};
|
|
|
|
static const URange16 Zl_range16[] = {
|
|
// Line separator
|
|
{8232, 8232},
|
|
};
|
|
|
|
static const URange16 Zp_range16[] = {
|
|
// Paragraph separator
|
|
{8233, 8233},
|
|
};
|
|
|
|
static const URange16 Zs_range16[] = {
|
|
// Space separator
|
|
{32, 32}, {160, 160}, {5760, 5760}, {8192, 8202}, {8239, 8239}, {8287, 8287}, {12288, 12288},
|
|
};
|
|
|
|
static const URange16 Arabic_range16[] = {
|
|
{1536, 1540}, {1542, 1547}, {1549, 1562}, {1564, 1566}, {1568, 1599}, {1601, 1610},
|
|
{1622, 1647}, {1649, 1756}, {1758, 1791}, {1872, 1919}, {2160, 2190}, {2192, 2193},
|
|
{2200, 2273}, {2275, 2303}, {64336, 64450}, {64467, 64829}, {64832, 64911}, {64914, 64967},
|
|
{64975, 64975}, {65008, 65023}, {65136, 65140}, {65142, 65276},
|
|
};
|
|
|
|
static const URange16 Cyrillic_range16[] = {
|
|
{1024, 1156}, {1159, 1327}, {7296, 7304}, {7467, 7467},
|
|
{7544, 7544}, {11744, 11775}, {42560, 42655}, {65070, 65071},
|
|
};
|
|
|
|
static const URange16 Devanagari_range16[] = {
|
|
{2304, 2384},
|
|
{2389, 2403},
|
|
{2406, 2431},
|
|
{43232, 43263},
|
|
};
|
|
|
|
static const URange16 Greek_range16[] = {
|
|
{880, 883}, {885, 887}, {890, 893}, {895, 895}, {900, 900}, {902, 902},
|
|
{904, 906}, {908, 908}, {910, 929}, {931, 993}, {1008, 1023}, {7462, 7466},
|
|
{7517, 7521}, {7526, 7530}, {7615, 7615}, {7936, 7957}, {7960, 7965}, {7968, 8005},
|
|
{8008, 8013}, {8016, 8023}, {8025, 8025}, {8027, 8027}, {8029, 8029}, {8031, 8061},
|
|
{8064, 8116}, {8118, 8132}, {8134, 8147}, {8150, 8155}, {8157, 8175}, {8178, 8180},
|
|
{8182, 8190}, {8486, 8486}, {43877, 43877},
|
|
};
|
|
|
|
static const URange16 Han_range16[] = {
|
|
{11904, 11929}, {11931, 12019}, {12032, 12245}, {12293, 12293}, {12295, 12295}, {12321, 12329},
|
|
{12344, 12347}, {13312, 19903}, {19968, 40959}, {63744, 64109}, {64112, 64217},
|
|
};
|
|
|
|
static const URange16 Latin_range16[] = {
|
|
{65, 90}, {97, 122}, {170, 170}, {186, 186}, {192, 214}, {216, 246},
|
|
{248, 696}, {736, 740}, {7424, 7461}, {7468, 7516}, {7522, 7525}, {7531, 7543},
|
|
{7545, 7614}, {7680, 7935}, {8305, 8305}, {8319, 8319}, {8336, 8348}, {8490, 8491},
|
|
{8498, 8498}, {8526, 8526}, {8544, 8584}, {11360, 11391}, {42786, 42887}, {42891, 42954},
|
|
{42960, 42961}, {42963, 42963}, {42965, 42969}, {42994, 43007}, {43824, 43866}, {43868, 43876},
|
|
{43878, 43881}, {64256, 64262}, {65313, 65338}, {65345, 65370},
|
|
};
|
|
|
|
#define UNI_ENTRY(Code) {Code##_range16, sizeof(Code##_range16) / sizeof(URange16)}
|
|
#define _e_arg(k, v) [k] = v
|
|
|
|
static const UGroup _utf8_unicode_groups[U8G_SIZE] = {
|
|
[U8G_Cc] = UNI_ENTRY(Cc),
|
|
[U8G_Lt] = UNI_ENTRY(Lt),
|
|
[U8G_Nd] = UNI_ENTRY(Nd),
|
|
[U8G_Nl] = UNI_ENTRY(Nl),
|
|
[U8G_Pc] = UNI_ENTRY(Pc),
|
|
[U8G_Pd] = UNI_ENTRY(Pd),
|
|
[U8G_Pf] = UNI_ENTRY(Pf),
|
|
[U8G_Pi] = UNI_ENTRY(Pi),
|
|
[U8G_Sc] = UNI_ENTRY(Sc),
|
|
[U8G_Zl] = UNI_ENTRY(Zl),
|
|
[U8G_Zp] = UNI_ENTRY(Zp),
|
|
[U8G_Zs] = UNI_ENTRY(Zs),
|
|
[U8G_Arabic] = UNI_ENTRY(Arabic),
|
|
[U8G_Cyrillic] = UNI_ENTRY(Cyrillic),
|
|
[U8G_Devanagari] = UNI_ENTRY(Devanagari),
|
|
[U8G_Greek] = UNI_ENTRY(Greek),
|
|
[U8G_Han] = UNI_ENTRY(Han),
|
|
[U8G_Latin] = UNI_ENTRY(Latin),
|
|
};
|
|
|
|
#endif // UTF8_GROUPS_H
|