mirror of
https://github.com/tursodatabase/libsql.git
synced 2025-03-09 12:31:39 +00:00
A common complain with libSQL is how to run extensions. The main mechanism, with a .so, has a lot of issues around how those .so are distributed. The most common extensions are the ones in the sqlean package. We can improve this experience by bundling them in our sqlite build. Not all SQLean extensions are kosher: some of them, like fileio, use the vfs. Others, are deemed too complex. The extensions included here are a subset that we deem important enough, and low risk enough, to just be a part of the main bundle.
370 lines
12 KiB
C
370 lines
12 KiB
C
/***************************************************************************
|
|
Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
|
|
tries to compile and match it, deriving options from the string itself. If
|
|
STANDALONE is defined, a main program that calls the driver with the contents
|
|
of specified files is compiled, and commentary on what is happening is output.
|
|
If an argument starts with '=' the rest of it it is taken as a literal string
|
|
rather than a file name. This allows easy testing of short strings.
|
|
|
|
Written by Philip Hazel, October 2016
|
|
***************************************************************************/
|
|
|
|
#include <errno.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#define PCRE2_CODE_UNIT_WIDTH 8
|
|
#include "regexp/pcre2/pcre2.h"
|
|
|
|
#define MAX_MATCH_SIZE 1000
|
|
|
|
#define DFA_WORKSPACE_COUNT 100
|
|
|
|
#define ALLOWED_COMPILE_OPTIONS \
|
|
(PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
|
|
PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
|
|
PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \
|
|
PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
|
|
PCRE2_NO_AUTO_CAPTURE| \
|
|
PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
|
|
PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
|
|
PCRE2_UTF)
|
|
|
|
#define ALLOWED_MATCH_OPTIONS \
|
|
(PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
|
|
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
|
|
PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
|
|
|
|
/* This is the callout function. Its only purpose is to halt matching if there
|
|
are more than 100 callouts, as one way of stopping too much time being spent on
|
|
fruitless matches. The callout data is a pointer to the counter. */
|
|
|
|
static int callout_function(pcre2_callout_block *cb, void *callout_data)
|
|
{
|
|
(void)cb; /* Avoid unused parameter warning */
|
|
*((uint32_t *)callout_data) += 1;
|
|
return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
|
|
}
|
|
|
|
/* Putting in this apparently unnecessary prototype prevents gcc from giving a
|
|
"no previous prototype" warning when compiling at high warning level. */
|
|
|
|
int LLVMFuzzerTestOneInput(const unsigned char *, size_t);
|
|
|
|
/* Here's the driving function. */
|
|
|
|
int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size)
|
|
{
|
|
uint32_t compile_options;
|
|
uint32_t match_options;
|
|
pcre2_match_data *match_data = NULL;
|
|
pcre2_match_context *match_context = NULL;
|
|
size_t match_size;
|
|
int dfa_workspace[DFA_WORKSPACE_COUNT];
|
|
int r1, r2;
|
|
int i;
|
|
|
|
if (size < 1) return 0;
|
|
|
|
/* Limiting the length of the subject for matching stops fruitless searches
|
|
in large trees taking too much time. */
|
|
|
|
match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
|
|
|
|
/* Figure out some options to use. Initialize the random number to ensure
|
|
repeatability. Ensure that we get a 32-bit unsigned random number for testing
|
|
options. (RAND_MAX is required to be at least 32767, but is commonly
|
|
2147483647, which excludes the top bit.) */
|
|
|
|
srand((unsigned int)(data[size/2]));
|
|
r1 = rand();
|
|
r2 = rand();
|
|
|
|
/* Ensure that all undefined option bits are zero (waste of time trying them)
|
|
and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
|
|
input is UTF-8. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is no
|
|
reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set because
|
|
\C in random patterns is highly likely to cause a crash. */
|
|
|
|
compile_options =
|
|
((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_COMPILE_OPTIONS) |
|
|
PCRE2_NEVER_BACKSLASH_C;
|
|
|
|
match_options =
|
|
((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_MATCH_OPTIONS);
|
|
|
|
/* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
|
|
allowed together and just give an immediate error return. */
|
|
|
|
if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
|
|
match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
|
|
|
|
/* Do the compile with and without the options, and after a successful compile,
|
|
likewise do the match with and without the options. */
|
|
|
|
for (i = 0; i < 2; i++)
|
|
{
|
|
uint32_t callout_count;
|
|
int errorcode;
|
|
PCRE2_SIZE erroroffset;
|
|
pcre2_code *code;
|
|
|
|
#ifdef STANDALONE
|
|
printf("Compile options %.8x never_backslash_c", compile_options);
|
|
printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
|
|
((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "",
|
|
((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "",
|
|
((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "",
|
|
((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "",
|
|
((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
|
|
((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "",
|
|
((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "",
|
|
((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "",
|
|
((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "",
|
|
((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "",
|
|
((compile_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
|
|
((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "",
|
|
((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "",
|
|
((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "",
|
|
((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "",
|
|
((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "",
|
|
((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "",
|
|
((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "",
|
|
((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "",
|
|
((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "",
|
|
((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
|
|
((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "",
|
|
((compile_options & PCRE2_UCP) != 0)? ",ucp" : "",
|
|
((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "",
|
|
((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "",
|
|
((compile_options & PCRE2_UTF) != 0)? ",utf" : "");
|
|
#endif
|
|
|
|
code = pcre2_compile((PCRE2_SPTR)data, (PCRE2_SIZE)size, compile_options,
|
|
&errorcode, &erroroffset, NULL);
|
|
|
|
/* Compilation succeeded */
|
|
|
|
if (code != NULL)
|
|
{
|
|
int j;
|
|
uint32_t save_match_options = match_options;
|
|
|
|
#ifdef SUPPORT_JIT
|
|
pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
|
|
#endif
|
|
|
|
/* Create match data and context blocks only when we first need them. Set
|
|
low match and depth limits to avoid wasting too much searching large
|
|
pattern trees. Almost all matches are going to fail. */
|
|
|
|
if (match_data == NULL)
|
|
{
|
|
match_data = pcre2_match_data_create(32, NULL);
|
|
if (match_data == NULL)
|
|
{
|
|
#ifdef STANDALONE
|
|
printf("** Failed to create match data block\n");
|
|
#endif
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if (match_context == NULL)
|
|
{
|
|
match_context = pcre2_match_context_create(NULL);
|
|
if (match_context == NULL)
|
|
{
|
|
#ifdef STANDALONE
|
|
printf("** Failed to create match context block\n");
|
|
#endif
|
|
return 0;
|
|
}
|
|
(void)pcre2_set_match_limit(match_context, 100);
|
|
(void)pcre2_set_depth_limit(match_context, 100);
|
|
(void)pcre2_set_callout(match_context, callout_function, &callout_count);
|
|
}
|
|
|
|
/* Match twice, with and without options. */
|
|
|
|
for (j = 0; j < 2; j++)
|
|
{
|
|
#ifdef STANDALONE
|
|
printf("Match options %.8x", match_options);
|
|
printf("%s%s%s%s%s%s%s%s%s%s\n",
|
|
((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
|
|
((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
|
|
((match_options & PCRE2_NO_JIT) != 0)? ",no_jit" : "",
|
|
((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
|
|
((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
|
|
((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
|
|
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
|
|
((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
|
|
((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
|
|
((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
|
|
#endif
|
|
|
|
callout_count = 0;
|
|
errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size, 0,
|
|
match_options, match_data, match_context);
|
|
|
|
#ifdef STANDALONE
|
|
if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
|
|
{
|
|
unsigned char buffer[256];
|
|
pcre2_get_error_message(errorcode, buffer, 256);
|
|
printf("Match failed: error %d: %s\n", errorcode, buffer);
|
|
}
|
|
#endif
|
|
|
|
match_options = 0; /* For second time */
|
|
}
|
|
|
|
/* Match with DFA twice, with and without options. */
|
|
|
|
match_options = save_match_options & ~PCRE2_NO_JIT; /* Not valid for DFA */
|
|
|
|
for (j = 0; j < 2; j++)
|
|
{
|
|
#ifdef STANDALONE
|
|
printf("DFA match options %.8x", match_options);
|
|
printf("%s%s%s%s%s%s%s%s%s\n",
|
|
((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
|
|
((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
|
|
((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
|
|
((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
|
|
((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
|
|
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
|
|
((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
|
|
((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
|
|
((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
|
|
#endif
|
|
|
|
callout_count = 0;
|
|
errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)data,
|
|
(PCRE2_SIZE)match_size, 0, match_options, match_data, match_context,
|
|
dfa_workspace, DFA_WORKSPACE_COUNT);
|
|
|
|
#ifdef STANDALONE
|
|
if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
|
|
{
|
|
unsigned char buffer[256];
|
|
pcre2_get_error_message(errorcode, buffer, 256);
|
|
printf("Match failed: error %d: %s\n", errorcode, buffer);
|
|
}
|
|
#endif
|
|
|
|
match_options = 0; /* For second time */
|
|
}
|
|
|
|
match_options = save_match_options; /* Reset for the second compile */
|
|
pcre2_code_free(code);
|
|
}
|
|
|
|
/* Compilation failed */
|
|
|
|
else
|
|
{
|
|
unsigned char buffer[256];
|
|
pcre2_get_error_message(errorcode, buffer, 256);
|
|
#ifdef STANDALONE
|
|
printf("Error %d at offset %lu: %s\n", errorcode, erroroffset, buffer);
|
|
#else
|
|
if (strstr((const char *)buffer, "internal error") != NULL) abort();
|
|
#endif
|
|
}
|
|
|
|
compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */
|
|
}
|
|
|
|
if (match_data != NULL) pcre2_match_data_free(match_data);
|
|
if (match_context != NULL) pcre2_match_context_free(match_context);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* Optional main program. */
|
|
|
|
#ifdef STANDALONE
|
|
int main(int argc, char **argv)
|
|
{
|
|
int i;
|
|
|
|
if (argc < 2)
|
|
{
|
|
printf("** No arguments given\n");
|
|
return 0;
|
|
}
|
|
|
|
for (i = 1; i < argc; i++)
|
|
{
|
|
size_t filelen;
|
|
size_t readsize;
|
|
unsigned char *buffer;
|
|
FILE *f;
|
|
|
|
/* Handle a literal string. Copy to an exact size buffer so that checks for
|
|
overrunning work. */
|
|
|
|
if (argv[i][0] == '=')
|
|
{
|
|
readsize = strlen(argv[i]) - 1;
|
|
printf("------ <Literal> ------\n");
|
|
printf("Length = %lu\n", readsize);
|
|
printf("%.*s\n", (int)readsize, argv[i]+1);
|
|
buffer = (unsigned char *)malloc(readsize);
|
|
if (buffer == NULL)
|
|
printf("** Failed to allocate %lu bytes of memory\n", readsize);
|
|
else
|
|
{
|
|
memcpy(buffer, argv[i]+1, readsize);
|
|
LLVMFuzzerTestOneInput(buffer, readsize);
|
|
free(buffer);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
/* Handle a string given in a file */
|
|
|
|
f = fopen(argv[i], "rb");
|
|
if (f == NULL)
|
|
{
|
|
printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
|
|
continue;
|
|
}
|
|
|
|
printf("------ %s ------\n", argv[i]);
|
|
|
|
fseek(f, 0, SEEK_END);
|
|
filelen = ftell(f);
|
|
fseek(f, 0, SEEK_SET);
|
|
|
|
buffer = (unsigned char *)malloc(filelen);
|
|
if (buffer == NULL)
|
|
{
|
|
printf("** Failed to allocate %lu bytes of memory\n", filelen);
|
|
fclose(f);
|
|
continue;
|
|
}
|
|
|
|
readsize = fread(buffer, 1, filelen, f);
|
|
fclose(f);
|
|
|
|
if (readsize != filelen)
|
|
printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
|
|
else
|
|
{
|
|
printf("Length = %lu\n", filelen);
|
|
LLVMFuzzerTestOneInput(buffer, filelen);
|
|
}
|
|
free(buffer);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
#endif /* STANDALONE */
|
|
|
|
/* End */
|