2016-05-31 16:22:48 +00:00
/*
* * 2016 - 05 - 28
* *
* * The author disclaims copyright to this source code . In place of
* * a legal notice , here is a blessing :
* *
* * May you do good and not evil .
* * May you find forgiveness for yourself and forgive others .
* * May you share freely , never taking more than you give .
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* *
* * This file contains the implementation of an SQLite virtual table for
* * reading CSV files .
* *
* * Usage :
* *
* * . load . / csv
* * CREATE VIRTUAL TABLE temp . csv USING csv ( filename = FILENAME ) ;
* * SELECT * FROM csv ;
* *
2018-11-16 01:42:26 +00:00
* * The columns are named " c1 " , " c2 " , " c3 " , . . . by default . Or the
* * application can define its own CREATE TABLE statement using the
* * schema = parameter , like this :
2016-05-31 16:22:48 +00:00
* *
* * CREATE VIRTUAL TABLE temp . csv2 USING csv (
* * filename = " ../http.log " ,
* * schema = " CREATE TABLE x(date,ipaddr,url,referrer,userAgent) "
* * ) ;
2016-06-03 01:01:57 +00:00
* *
* * Instead of specifying a file , the text of the CSV can be loaded using
* * the data = parameter .
* *
* * If the columns = N parameter is supplied , then the CSV file is assumed to have
2018-11-16 01:42:26 +00:00
* * N columns . If both the columns = and schema = parameters are omitted , then
* * the number and names of the columns is determined by the first line of
* * the CSV input .
2016-06-03 01:01:57 +00:00
* *
* * Some extra debugging features ( used for testing virtual tables ) are available
* * if this module is compiled with - DSQLITE_TEST .
2016-05-31 16:22:48 +00:00
*/
# include <sqlite3ext.h>
SQLITE_EXTENSION_INIT1
# include <string.h>
# include <stdlib.h>
# include <assert.h>
# include <stdarg.h>
# include <ctype.h>
# include <stdio.h>
2016-07-15 02:50:18 +00:00
# ifndef SQLITE_OMIT_VIRTUALTABLE
2016-05-31 16:22:48 +00:00
/*
* * A macro to hint to the compiler that a function should not be
* * inlined .
*/
# if defined(__GNUC__)
# define CSV_NOINLINE __attribute__((noinline))
# elif defined(_MSC_VER) && _MSC_VER>=1310
# define CSV_NOINLINE __declspec(noinline)
# else
# define CSV_NOINLINE
# endif
/* Max size of the error message in a CsvReader */
# define CSV_MXERR 200
2016-06-02 17:44:24 +00:00
/* Size of the CsvReader input buffer */
# define CSV_INBUFSZ 1024
2016-05-31 16:22:48 +00:00
/* A context object used when read a CSV file. */
typedef struct CsvReader CsvReader ;
struct CsvReader {
FILE * in ; /* Read the CSV text from this input stream */
char * z ; /* Accumulated text for a field */
int n ; /* Number of bytes in z */
int nAlloc ; /* Space allocated for z[] */
int nLine ; /* Current line number */
2017-06-26 18:42:23 +00:00
int bNotFirst ; /* True if prior text has been seen */
2017-09-18 00:18:31 +00:00
int cTerm ; /* Character that terminated the most recent field */
2016-06-02 17:44:24 +00:00
size_t iIn ; /* Next unread character in the input buffer */
size_t nIn ; /* Number of characters in the input buffer */
char * zIn ; /* The input buffer */
2016-05-31 16:22:48 +00:00
char zErr [ CSV_MXERR ] ; /* Error message */
} ;
/* Initialize a CsvReader object */
static void csv_reader_init ( CsvReader * p ) {
2016-06-02 17:44:24 +00:00
p - > in = 0 ;
p - > z = 0 ;
p - > n = 0 ;
p - > nAlloc = 0 ;
p - > nLine = 0 ;
2017-06-26 18:42:23 +00:00
p - > bNotFirst = 0 ;
2016-06-02 17:44:24 +00:00
p - > nIn = 0 ;
p - > zIn = 0 ;
p - > zErr [ 0 ] = 0 ;
2016-05-31 16:22:48 +00:00
}
/* Close and reset a CsvReader object */
static void csv_reader_reset ( CsvReader * p ) {
2016-06-02 17:44:24 +00:00
if ( p - > in ) {
fclose ( p - > in ) ;
sqlite3_free ( p - > zIn ) ;
}
2016-05-31 16:22:48 +00:00
sqlite3_free ( p - > z ) ;
csv_reader_init ( p ) ;
}
/* Report an error on a CsvReader */
static void csv_errmsg ( CsvReader * p , const char * zFormat , . . . ) {
va_list ap ;
va_start ( ap , zFormat ) ;
sqlite3_vsnprintf ( CSV_MXERR , p - > zErr , zFormat , ap ) ;
va_end ( ap ) ;
}
/* Open the file associated with a CsvReader
* * Return the number of errors .
*/
2016-06-02 17:44:24 +00:00
static int csv_reader_open (
CsvReader * p , /* The reader to open */
const char * zFilename , /* Read from this filename */
const char * zData /* ... or use this data */
) {
if ( zFilename ) {
p - > zIn = sqlite3_malloc ( CSV_INBUFSZ ) ;
if ( p - > zIn = = 0 ) {
csv_errmsg ( p , " out of memory " ) ;
return 1 ;
}
p - > in = fopen ( zFilename , " rb " ) ;
if ( p - > in = = 0 ) {
2018-04-24 10:57:10 +00:00
sqlite3_free ( p - > zIn ) ;
2016-06-02 17:44:24 +00:00
csv_reader_reset ( p ) ;
csv_errmsg ( p , " cannot open '%s' for reading " , zFilename ) ;
return 1 ;
}
} else {
assert ( p - > in = = 0 ) ;
p - > zIn = ( char * ) zData ;
p - > nIn = strlen ( zData ) ;
2016-05-31 16:22:48 +00:00
}
return 0 ;
}
2016-06-02 17:44:24 +00:00
/* The input buffer has overflowed. Refill the input buffer, then
* * return the next character
*/
static CSV_NOINLINE int csv_getc_refill ( CsvReader * p ) {
size_t got ;
assert ( p - > iIn > = p - > nIn ) ; /* Only called on an empty input buffer */
assert ( p - > in ! = 0 ) ; /* Only called if reading froma file */
got = fread ( p - > zIn , 1 , CSV_INBUFSZ , p - > in ) ;
if ( got = = 0 ) return EOF ;
p - > nIn = got ;
p - > iIn = 1 ;
return p - > zIn [ 0 ] ;
}
/* Return the next character of input. Return EOF at end of input. */
static int csv_getc ( CsvReader * p ) {
if ( p - > iIn > = p - > nIn ) {
if ( p - > in ! = 0 ) return csv_getc_refill ( p ) ;
return EOF ;
}
2017-09-18 00:18:31 +00:00
return ( ( unsigned char * ) p - > zIn ) [ p - > iIn + + ] ;
2016-06-02 17:44:24 +00:00
}
2016-05-31 16:22:48 +00:00
/* Increase the size of p->z and append character c to the end.
* * Return 0 on success and non - zero if there is an OOM error */
static CSV_NOINLINE int csv_resize_and_append ( CsvReader * p , char c ) {
char * zNew ;
int nNew = p - > nAlloc * 2 + 100 ;
zNew = sqlite3_realloc64 ( p - > z , nNew ) ;
if ( zNew ) {
p - > z = zNew ;
p - > nAlloc = nNew ;
p - > z [ p - > n + + ] = c ;
return 0 ;
} else {
csv_errmsg ( p , " out of memory " ) ;
return 1 ;
}
}
/* Append a single character to the CsvReader.z[] array.
* * Return 0 on success and non - zero if there is an OOM error */
static int csv_append ( CsvReader * p , char c ) {
if ( p - > n > = p - > nAlloc - 1 ) return csv_resize_and_append ( p , c ) ;
p - > z [ p - > n + + ] = c ;
return 0 ;
}
/* Read a single field of CSV text. Compatible with rfc4180 and extended
* * with the option of having a separator other than " , " .
* *
* * + Input comes from p - > in .
* * + Store results in p - > z of length p - > n . Space to hold p - > z comes
* * from sqlite3_malloc64 ( ) .
* * + Keep track of the line number in p - > nLine .
* * + Store the character that terminates the field in p - > cTerm . Store
* * EOF on end - of - file .
* *
2018-06-02 12:05:18 +00:00
* * Return 0 at EOF or on OOM . On EOF , the p - > cTerm character will have
* * been set to EOF .
2016-05-31 16:22:48 +00:00
*/
static char * csv_read_one_field ( CsvReader * p ) {
int c ;
p - > n = 0 ;
2016-06-02 17:44:24 +00:00
c = csv_getc ( p ) ;
2016-05-31 16:22:48 +00:00
if ( c = = EOF ) {
p - > cTerm = EOF ;
2018-06-02 12:05:18 +00:00
return 0 ;
2016-05-31 16:22:48 +00:00
}
if ( c = = ' " ' ) {
int pc , ppc ;
int startLine = p - > nLine ;
pc = ppc = 0 ;
while ( 1 ) {
2016-06-02 17:44:24 +00:00
c = csv_getc ( p ) ;
2016-06-03 01:01:57 +00:00
if ( c < = ' " ' | | pc = = ' " ' ) {
if ( c = = ' \n ' ) p - > nLine + + ;
if ( c = = ' " ' ) {
if ( pc = = ' " ' ) {
pc = 0 ;
continue ;
}
}
if ( ( c = = ' , ' & & pc = = ' " ' )
| | ( c = = ' \n ' & & pc = = ' " ' )
| | ( c = = ' \n ' & & pc = = ' \r ' & & ppc = = ' " ' )
| | ( c = = EOF & & pc = = ' " ' )
) {
do { p - > n - - ; } while ( p - > z [ p - > n ] ! = ' " ' ) ;
2016-07-22 21:26:56 +00:00
p - > cTerm = ( char ) c ;
2016-06-03 01:01:57 +00:00
break ;
}
if ( pc = = ' " ' & & c ! = ' \r ' ) {
csv_errmsg ( p , " line %d: unescaped %c character " , p - > nLine , ' " ' ) ;
break ;
}
if ( c = = EOF ) {
csv_errmsg ( p , " line %d: unterminated %c-quoted field \n " ,
startLine , ' " ' ) ;
2016-07-22 21:26:56 +00:00
p - > cTerm = ( char ) c ;
2016-06-03 01:01:57 +00:00
break ;
2016-05-31 16:22:48 +00:00
}
}
if ( csv_append ( p , ( char ) c ) ) return 0 ;
ppc = pc ;
pc = c ;
}
} else {
2017-06-26 18:42:23 +00:00
/* If this is the first field being parsed and it begins with the
* * UTF - 8 BOM ( 0xEF BB BF ) then skip the BOM */
if ( ( c & 0xff ) = = 0xef & & p - > bNotFirst = = 0 ) {
2017-06-28 15:17:31 +00:00
csv_append ( p , ( char ) c ) ;
2017-06-26 18:42:23 +00:00
c = csv_getc ( p ) ;
if ( ( c & 0xff ) = = 0xbb ) {
2017-06-28 15:17:31 +00:00
csv_append ( p , ( char ) c ) ;
2017-06-26 18:42:23 +00:00
c = csv_getc ( p ) ;
if ( ( c & 0xff ) = = 0xbf ) {
p - > bNotFirst = 1 ;
p - > n = 0 ;
return csv_read_one_field ( p ) ;
}
}
}
2016-06-03 01:01:57 +00:00
while ( c > ' , ' | | ( c ! = EOF & & c ! = ' , ' & & c ! = ' \n ' ) ) {
2016-05-31 16:22:48 +00:00
if ( csv_append ( p , ( char ) c ) ) return 0 ;
2016-06-02 17:44:24 +00:00
c = csv_getc ( p ) ;
2016-05-31 16:22:48 +00:00
}
if ( c = = ' \n ' ) {
p - > nLine + + ;
if ( p - > n > 0 & & p - > z [ p - > n - 1 ] = = ' \r ' ) p - > n - - ;
}
2016-07-22 21:26:56 +00:00
p - > cTerm = ( char ) c ;
2016-05-31 16:22:48 +00:00
}
2022-07-05 17:49:04 +00:00
assert ( p - > z = = 0 | | p - > n < p - > nAlloc ) ;
2016-05-31 16:22:48 +00:00
if ( p - > z ) p - > z [ p - > n ] = 0 ;
2017-06-26 18:42:23 +00:00
p - > bNotFirst = 1 ;
2016-05-31 16:22:48 +00:00
return p - > z ;
}
/* Forward references to the various virtual table methods implemented
* * in this file . */
static int csvtabCreate ( sqlite3 * , void * , int , const char * const * ,
sqlite3_vtab * * , char * * ) ;
static int csvtabConnect ( sqlite3 * , void * , int , const char * const * ,
sqlite3_vtab * * , char * * ) ;
static int csvtabBestIndex ( sqlite3_vtab * , sqlite3_index_info * ) ;
static int csvtabDisconnect ( sqlite3_vtab * ) ;
static int csvtabOpen ( sqlite3_vtab * , sqlite3_vtab_cursor * * ) ;
static int csvtabClose ( sqlite3_vtab_cursor * ) ;
static int csvtabFilter ( sqlite3_vtab_cursor * , int idxNum , const char * idxStr ,
int argc , sqlite3_value * * argv ) ;
static int csvtabNext ( sqlite3_vtab_cursor * ) ;
static int csvtabEof ( sqlite3_vtab_cursor * ) ;
static int csvtabColumn ( sqlite3_vtab_cursor * , sqlite3_context * , int ) ;
static int csvtabRowid ( sqlite3_vtab_cursor * , sqlite3_int64 * ) ;
/* An instance of the CSV virtual table */
typedef struct CsvTable {
sqlite3_vtab base ; /* Base class. Must be first */
char * zFilename ; /* Name of the CSV file */
2016-06-02 17:44:24 +00:00
char * zData ; /* Raw CSV data in lieu of zFilename */
2016-05-31 16:22:48 +00:00
long iStart ; /* Offset to start of data in zFilename */
int nCol ; /* Number of columns in the CSV file */
2016-05-31 18:08:35 +00:00
unsigned int tstFlags ; /* Bit values used for testing */
2016-05-31 16:22:48 +00:00
} CsvTable ;
2016-05-31 18:08:35 +00:00
/* Allowed values for tstFlags */
# define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/
2016-05-31 16:22:48 +00:00
/* A cursor for the CSV virtual table */
typedef struct CsvCursor {
sqlite3_vtab_cursor base ; /* Base class. Must be first */
CsvReader rdr ; /* The CsvReader object */
char * * azVal ; /* Value of the current row */
2016-06-03 01:01:57 +00:00
int * aLen ; /* Length of each entry */
2016-05-31 16:22:48 +00:00
sqlite3_int64 iRowid ; /* The current rowid. Negative for EOF */
} CsvCursor ;
/* Transfer error message text from a reader into a CsvTable */
static void csv_xfer_error ( CsvTable * pTab , CsvReader * pRdr ) {
sqlite3_free ( pTab - > base . zErrMsg ) ;
pTab - > base . zErrMsg = sqlite3_mprintf ( " %s " , pRdr - > zErr ) ;
}
/*
* * This method is the destructor fo a CsvTable object .
*/
static int csvtabDisconnect ( sqlite3_vtab * pVtab ) {
CsvTable * p = ( CsvTable * ) pVtab ;
sqlite3_free ( p - > zFilename ) ;
2016-06-02 23:13:21 +00:00
sqlite3_free ( p - > zData ) ;
2016-05-31 16:22:48 +00:00
sqlite3_free ( p ) ;
return SQLITE_OK ;
}
/* Skip leading whitespace. Return a pointer to the first non-whitespace
* * character , or to the zero terminator if the string has only whitespace */
static const char * csv_skip_whitespace ( const char * z ) {
while ( isspace ( ( unsigned char ) z [ 0 ] ) ) z + + ;
return z ;
}
/* Remove trailing whitespace from the end of string z[] */
static void csv_trim_whitespace ( char * z ) {
size_t n = strlen ( z ) ;
while ( n > 0 & & isspace ( ( unsigned char ) z [ n ] ) ) n - - ;
z [ n ] = 0 ;
}
/* Dequote the string */
static void csv_dequote ( char * z ) {
2016-07-22 21:26:56 +00:00
int j ;
2016-05-31 16:22:48 +00:00
char cQuote = z [ 0 ] ;
2016-07-22 21:26:56 +00:00
size_t i , n ;
2016-05-31 16:22:48 +00:00
if ( cQuote ! = ' \' ' & & cQuote ! = ' " ' ) return ;
n = strlen ( z ) ;
if ( n < 2 | | z [ n - 1 ] ! = z [ 0 ] ) return ;
for ( i = 1 , j = 0 ; i < n - 1 ; i + + ) {
if ( z [ i ] = = cQuote & & z [ i + 1 ] = = cQuote ) i + + ;
z [ j + + ] = z [ i ] ;
}
z [ j ] = 0 ;
}
/* Check to see if the string is of the form: "TAG = VALUE" with optional
* * whitespace before and around tokens . If it is , return a pointer to the
* * first character of VALUE . If it is not , return NULL .
*/
static const char * csv_parameter ( const char * zTag , int nTag , const char * z ) {
z = csv_skip_whitespace ( z ) ;
if ( strncmp ( zTag , z , nTag ) ! = 0 ) return 0 ;
z = csv_skip_whitespace ( z + nTag ) ;
if ( z [ 0 ] ! = ' = ' ) return 0 ;
return csv_skip_whitespace ( z + 1 ) ;
}
2016-06-02 17:44:24 +00:00
/* Decode a parameter that requires a dequoted string.
* *
* * Return 1 if the parameter is seen , or 0 if not . 1 is returned
* * even if there is an error . If an error occurs , then an error message
* * is left in p - > zErr . If there are no errors , p - > zErr [ 0 ] = = 0.
*/
static int csv_string_parameter (
CsvReader * p , /* Leave the error message here, if there is one */
const char * zParam , /* Parameter we are checking for */
const char * zArg , /* Raw text of the virtual table argment */
char * * pzVal /* Write the dequoted string value here */
) {
const char * zValue ;
2016-07-09 16:38:25 +00:00
zValue = csv_parameter ( zParam , ( int ) strlen ( zParam ) , zArg ) ;
2016-06-02 17:44:24 +00:00
if ( zValue = = 0 ) return 0 ;
p - > zErr [ 0 ] = 0 ;
if ( * pzVal ) {
csv_errmsg ( p , " more than one '%s' parameter " , zParam ) ;
return 1 ;
}
* pzVal = sqlite3_mprintf ( " %s " , zValue ) ;
if ( * pzVal = = 0 ) {
csv_errmsg ( p , " out of memory " ) ;
return 1 ;
}
csv_trim_whitespace ( * pzVal ) ;
csv_dequote ( * pzVal ) ;
return 1 ;
}
2016-05-31 16:22:48 +00:00
/* Return 0 if the argument is false and 1 if it is true. Return -1 if
* * we cannot really tell .
*/
static int csv_boolean ( const char * z ) {
if ( sqlite3_stricmp ( " yes " , z ) = = 0
| | sqlite3_stricmp ( " on " , z ) = = 0
| | sqlite3_stricmp ( " true " , z ) = = 0
2016-11-09 01:46:13 +00:00
| | ( z [ 0 ] = = ' 1 ' & & z [ 1 ] = = 0 )
2016-05-31 16:22:48 +00:00
) {
return 1 ;
}
if ( sqlite3_stricmp ( " no " , z ) = = 0
| | sqlite3_stricmp ( " off " , z ) = = 0
| | sqlite3_stricmp ( " false " , z ) = = 0
| | ( z [ 0 ] = = ' 0 ' & & z [ 1 ] = = 0 )
) {
return 0 ;
}
return - 1 ;
}
2018-11-16 01:42:26 +00:00
/* Check to see if the string is of the form: "TAG = BOOLEAN" or just "TAG".
* * If it is , set * pValue to be the value of the boolean ( " true " if there is
* * not " = BOOLEAN " component ) and return non - zero . If the input string
* * does not begin with TAG , return zero .
*/
static int csv_boolean_parameter (
const char * zTag , /* Tag we are looking for */
int nTag , /* Size of the tag in bytes */
const char * z , /* Input parameter */
int * pValue /* Write boolean value here */
) {
int b ;
z = csv_skip_whitespace ( z ) ;
if ( strncmp ( zTag , z , nTag ) ! = 0 ) return 0 ;
z = csv_skip_whitespace ( z + nTag ) ;
if ( z [ 0 ] = = 0 ) {
* pValue = 1 ;
return 1 ;
}
if ( z [ 0 ] ! = ' = ' ) return 0 ;
z = csv_skip_whitespace ( z + 1 ) ;
b = csv_boolean ( z ) ;
if ( b > = 0 ) {
* pValue = b ;
return 1 ;
}
return 0 ;
}
2016-05-31 16:22:48 +00:00
/*
* * Parameters :
2016-06-02 17:44:24 +00:00
* * filename = FILENAME Name of file containing CSV content
* * data = TEXT Direct CSV content .
2016-05-31 18:44:33 +00:00
* * schema = SCHEMA Alternative CSV schema .
2016-05-31 16:22:48 +00:00
* * header = YES | NO First row of CSV defines the names of
* * columns if " yes " . Default " no " .
2016-06-02 17:44:24 +00:00
* * columns = N Assume the CSV file contains N columns .
2016-06-03 01:01:57 +00:00
* *
* * Only available if compiled with SQLITE_TEST :
* *
2016-05-31 18:08:35 +00:00
* * testflags = N Bitmask of test flags . Optional
2016-05-31 16:22:48 +00:00
* *
2016-05-31 18:44:33 +00:00
* * If schema = is omitted , then the columns are named " c0 " , " c1 " , " c2 " ,
* * and so forth . If columns = N is omitted , then the file is opened and
* * the number of columns in the first row is counted to determine the
* * column count . If header = YES , then the first row is skipped .
2016-05-31 16:22:48 +00:00
*/
static int csvtabConnect (
sqlite3 * db ,
void * pAux ,
int argc , const char * const * argv ,
sqlite3_vtab * * ppVtab ,
char * * pzErr
) {
2016-05-31 18:44:33 +00:00
CsvTable * pNew = 0 ; /* The CsvTable object to construct */
int bHeader = - 1 ; /* header= flags. -1 means not seen yet */
int rc = SQLITE_OK ; /* Result code from this routine */
2016-06-02 17:44:24 +00:00
int i , j ; /* Loop counters */
2016-06-03 01:01:57 +00:00
# ifdef SQLITE_TEST
2016-06-02 17:44:24 +00:00
int tstFlags = 0 ; /* Value for testflags=N parameter */
2016-06-03 01:01:57 +00:00
# endif
2018-11-16 01:42:26 +00:00
int b ; /* Value of a boolean parameter */
2016-05-31 18:44:33 +00:00
int nCol = - 99 ; /* Value of the columns= parameter */
CsvReader sRdr ; /* A CSV file reader used to store an error
* * message and / or to count the number of columns */
2016-06-02 17:44:24 +00:00
static const char * azParam [ ] = {
" filename " , " data " , " schema " ,
} ;
char * azPValue [ 3 ] ; /* Parameter values */
# define CSV_FILENAME (azPValue[0])
# define CSV_DATA (azPValue[1])
# define CSV_SCHEMA (azPValue[2])
2016-05-31 16:22:48 +00:00
2016-06-02 17:44:24 +00:00
assert ( sizeof ( azPValue ) = = sizeof ( azParam ) ) ;
2016-05-31 16:22:48 +00:00
memset ( & sRdr , 0 , sizeof ( sRdr ) ) ;
2016-06-02 17:44:24 +00:00
memset ( azPValue , 0 , sizeof ( azPValue ) ) ;
2016-05-31 16:22:48 +00:00
for ( i = 3 ; i < argc ; i + + ) {
const char * z = argv [ i ] ;
const char * zValue ;
2016-06-02 17:44:24 +00:00
for ( j = 0 ; j < sizeof ( azParam ) / sizeof ( azParam [ 0 ] ) ; j + + ) {
if ( csv_string_parameter ( & sRdr , azParam [ j ] , z , & azPValue [ j ] ) ) break ;
}
if ( j < sizeof ( azParam ) / sizeof ( azParam [ 0 ] ) ) {
if ( sRdr . zErr [ 0 ] ) goto csvtab_connect_error ;
2016-05-31 16:22:48 +00:00
} else
2018-11-16 01:42:26 +00:00
if ( csv_boolean_parameter ( " header " , 6 , z , & b ) ) {
2016-05-31 16:22:48 +00:00
if ( bHeader > = 0 ) {
csv_errmsg ( & sRdr , " more than one 'header' parameter " ) ;
goto csvtab_connect_error ;
}
2018-11-16 01:42:26 +00:00
bHeader = b ;
2016-05-31 16:22:48 +00:00
} else
2016-06-03 01:01:57 +00:00
# ifdef SQLITE_TEST
2016-05-31 18:08:35 +00:00
if ( ( zValue = csv_parameter ( " testflags " , 9 , z ) ) ! = 0 ) {
tstFlags = ( unsigned int ) atoi ( zValue ) ;
} else
2016-06-03 01:01:57 +00:00
# endif
2016-05-31 18:44:33 +00:00
if ( ( zValue = csv_parameter ( " columns " , 7 , z ) ) ! = 0 ) {
if ( nCol > 0 ) {
csv_errmsg ( & sRdr , " more than one 'columns' parameter " ) ;
goto csvtab_connect_error ;
}
nCol = atoi ( zValue ) ;
if ( nCol < = 0 ) {
2018-11-16 01:42:26 +00:00
csv_errmsg ( & sRdr , " column= value must be positive " ) ;
2016-05-31 18:44:33 +00:00
goto csvtab_connect_error ;
}
} else
2016-05-31 16:22:48 +00:00
{
2018-11-16 01:42:26 +00:00
csv_errmsg ( & sRdr , " bad parameter: '%s' " , z ) ;
2016-05-31 16:22:48 +00:00
goto csvtab_connect_error ;
}
}
2016-06-02 17:44:24 +00:00
if ( ( CSV_FILENAME = = 0 ) = = ( CSV_DATA = = 0 ) ) {
2018-11-16 01:42:26 +00:00
csv_errmsg ( & sRdr , " must specify either filename= or data= but not both " ) ;
2016-05-31 16:22:48 +00:00
goto csvtab_connect_error ;
}
2018-11-16 01:42:26 +00:00
if ( ( nCol < = 0 | | bHeader = = 1 )
& & csv_reader_open ( & sRdr , CSV_FILENAME , CSV_DATA )
) {
2016-05-31 16:22:48 +00:00
goto csvtab_connect_error ;
}
pNew = sqlite3_malloc ( sizeof ( * pNew ) ) ;
* ppVtab = ( sqlite3_vtab * ) pNew ;
if ( pNew = = 0 ) goto csvtab_connect_oom ;
memset ( pNew , 0 , sizeof ( * pNew ) ) ;
2018-11-16 01:42:26 +00:00
if ( CSV_SCHEMA = = 0 ) {
sqlite3_str * pStr = sqlite3_str_new ( 0 ) ;
char * zSep = " " ;
int iCol = 0 ;
sqlite3_str_appendf ( pStr , " CREATE TABLE x( " ) ;
if ( nCol < 0 & & bHeader < 1 ) {
nCol = 0 ;
do {
csv_read_one_field ( & sRdr ) ;
nCol + + ;
} while ( sRdr . cTerm = = ' , ' ) ;
}
if ( nCol > 0 & & bHeader < 1 ) {
for ( iCol = 0 ; iCol < nCol ; iCol + + ) {
sqlite3_str_appendf ( pStr , " %sc%d TEXT " , zSep , iCol ) ;
zSep = " , " ;
}
} else {
do {
char * z = csv_read_one_field ( & sRdr ) ;
if ( ( nCol > 0 & & iCol < nCol ) | | ( nCol < 0 & & bHeader ) ) {
sqlite3_str_appendf ( pStr , " %s \" %w \" TEXT " , zSep , z ) ;
zSep = " , " ;
iCol + + ;
}
} while ( sRdr . cTerm = = ' , ' ) ;
if ( nCol < 0 ) {
nCol = iCol ;
} else {
while ( iCol < nCol ) {
sqlite3_str_appendf ( pStr , " %sc%d TEXT " , zSep , + + iCol ) ;
zSep = " , " ;
}
}
}
2016-05-31 18:44:33 +00:00
pNew - > nCol = nCol ;
2018-11-16 01:42:26 +00:00
sqlite3_str_appendf ( pStr , " ) " ) ;
CSV_SCHEMA = sqlite3_str_finish ( pStr ) ;
if ( CSV_SCHEMA = = 0 ) goto csvtab_connect_oom ;
} else if ( nCol < 0 ) {
2016-05-31 18:44:33 +00:00
do {
2018-06-02 12:05:18 +00:00
csv_read_one_field ( & sRdr ) ;
2016-05-31 18:44:33 +00:00
pNew - > nCol + + ;
} while ( sRdr . cTerm = = ' , ' ) ;
2018-11-16 01:42:26 +00:00
} else {
pNew - > nCol = nCol ;
2016-05-31 18:44:33 +00:00
}
2016-06-02 17:44:24 +00:00
pNew - > zFilename = CSV_FILENAME ; CSV_FILENAME = 0 ;
pNew - > zData = CSV_DATA ; CSV_DATA = 0 ;
2016-06-03 01:01:57 +00:00
# ifdef SQLITE_TEST
2016-05-31 18:08:35 +00:00
pNew - > tstFlags = tstFlags ;
2016-06-03 01:01:57 +00:00
# endif
2018-11-16 01:42:26 +00:00
if ( bHeader ! = 1 ) {
pNew - > iStart = 0 ;
} else if ( pNew - > zData ) {
pNew - > iStart = ( int ) sRdr . iIn ;
} else {
2019-01-03 15:22:33 +00:00
pNew - > iStart = ( int ) ( ftell ( sRdr . in ) - sRdr . nIn + sRdr . iIn ) ;
2016-05-31 16:22:48 +00:00
}
2018-11-16 01:42:26 +00:00
csv_reader_reset ( & sRdr ) ;
2016-06-02 17:44:24 +00:00
rc = sqlite3_declare_vtab ( db , CSV_SCHEMA ) ;
2018-11-16 01:42:26 +00:00
if ( rc ) {
csv_errmsg ( & sRdr , " bad schema: '%s' - %s " , CSV_SCHEMA , sqlite3_errmsg ( db ) ) ;
goto csvtab_connect_error ;
}
2016-06-02 17:44:24 +00:00
for ( i = 0 ; i < sizeof ( azPValue ) / sizeof ( azPValue [ 0 ] ) ; i + + ) {
sqlite3_free ( azPValue [ i ] ) ;
}
2020-01-07 19:45:40 +00:00
/* Rationale for DIRECTONLY:
* * An attacker who controls a database schema could use this vtab
* * to exfiltrate sensitive data from other files in the filesystem .
* * And , recommended practice is to put all CSV virtual tables in the
* * TEMP namespace , so they should still be usable from within TEMP
* * views , so there shouldn ' t be a serious loss of functionality by
* * prohibiting the use of this vtab from persistent triggers and views .
*/
sqlite3_vtab_config ( db , SQLITE_VTAB_DIRECTONLY ) ;
2016-05-31 16:22:48 +00:00
return SQLITE_OK ;
csvtab_connect_oom :
rc = SQLITE_NOMEM ;
csv_errmsg ( & sRdr , " out of memory " ) ;
csvtab_connect_error :
if ( pNew ) csvtabDisconnect ( & pNew - > base ) ;
2016-06-02 17:44:24 +00:00
for ( i = 0 ; i < sizeof ( azPValue ) / sizeof ( azPValue [ 0 ] ) ; i + + ) {
sqlite3_free ( azPValue [ i ] ) ;
}
2016-05-31 16:22:48 +00:00
if ( sRdr . zErr [ 0 ] ) {
sqlite3_free ( * pzErr ) ;
* pzErr = sqlite3_mprintf ( " %s " , sRdr . zErr ) ;
}
csv_reader_reset ( & sRdr ) ;
2016-05-31 18:08:35 +00:00
if ( rc = = SQLITE_OK ) rc = SQLITE_ERROR ;
2016-05-31 16:22:48 +00:00
return rc ;
}
/*
* * Reset the current row content held by a CsvCursor .
*/
static void csvtabCursorRowReset ( CsvCursor * pCur ) {
CsvTable * pTab = ( CsvTable * ) pCur - > base . pVtab ;
int i ;
for ( i = 0 ; i < pTab - > nCol ; i + + ) {
sqlite3_free ( pCur - > azVal [ i ] ) ;
pCur - > azVal [ i ] = 0 ;
2016-06-03 01:01:57 +00:00
pCur - > aLen [ i ] = 0 ;
2016-05-31 16:22:48 +00:00
}
}
/*
* * The xConnect and xCreate methods do the same thing , but they must be
* * different so that the virtual table is not an eponymous virtual table .
*/
static int csvtabCreate (
sqlite3 * db ,
void * pAux ,
int argc , const char * const * argv ,
sqlite3_vtab * * ppVtab ,
char * * pzErr
) {
return csvtabConnect ( db , pAux , argc , argv , ppVtab , pzErr ) ;
}
/*
* * Destructor for a CsvCursor .
*/
static int csvtabClose ( sqlite3_vtab_cursor * cur ) {
CsvCursor * pCur = ( CsvCursor * ) cur ;
csvtabCursorRowReset ( pCur ) ;
csv_reader_reset ( & pCur - > rdr ) ;
sqlite3_free ( cur ) ;
return SQLITE_OK ;
}
/*
* * Constructor for a new CsvTable cursor object .
*/
static int csvtabOpen ( sqlite3_vtab * p , sqlite3_vtab_cursor * * ppCursor ) {
CsvTable * pTab = ( CsvTable * ) p ;
CsvCursor * pCur ;
2016-06-03 01:01:57 +00:00
size_t nByte ;
nByte = sizeof ( * pCur ) + ( sizeof ( char * ) + sizeof ( int ) ) * pTab - > nCol ;
2016-07-09 16:38:25 +00:00
pCur = sqlite3_malloc64 ( nByte ) ;
2016-05-31 16:22:48 +00:00
if ( pCur = = 0 ) return SQLITE_NOMEM ;
2016-06-03 01:01:57 +00:00
memset ( pCur , 0 , nByte ) ;
2016-05-31 16:22:48 +00:00
pCur - > azVal = ( char * * ) & pCur [ 1 ] ;
2016-06-03 01:01:57 +00:00
pCur - > aLen = ( int * ) & pCur - > azVal [ pTab - > nCol ] ;
2016-05-31 16:22:48 +00:00
* ppCursor = & pCur - > base ;
2016-06-02 17:44:24 +00:00
if ( csv_reader_open ( & pCur - > rdr , pTab - > zFilename , pTab - > zData ) ) {
2016-05-31 16:22:48 +00:00
csv_xfer_error ( pTab , & pCur - > rdr ) ;
return SQLITE_ERROR ;
}
return SQLITE_OK ;
}
/*
* * Advance a CsvCursor to its next row of input .
* * Set the EOF marker if we reach the end of input .
*/
static int csvtabNext ( sqlite3_vtab_cursor * cur ) {
CsvCursor * pCur = ( CsvCursor * ) cur ;
CsvTable * pTab = ( CsvTable * ) cur - > pVtab ;
int i = 0 ;
char * z ;
do {
z = csv_read_one_field ( & pCur - > rdr ) ;
if ( z = = 0 ) {
break ;
}
if ( i < pTab - > nCol ) {
2016-06-03 01:01:57 +00:00
if ( pCur - > aLen [ i ] < pCur - > rdr . n + 1 ) {
2016-07-09 16:38:25 +00:00
char * zNew = sqlite3_realloc64 ( pCur - > azVal [ i ] , pCur - > rdr . n + 1 ) ;
2016-06-03 01:01:57 +00:00
if ( zNew = = 0 ) {
csv_errmsg ( & pCur - > rdr , " out of memory " ) ;
csv_xfer_error ( pTab , & pCur - > rdr ) ;
break ;
}
pCur - > azVal [ i ] = zNew ;
pCur - > aLen [ i ] = pCur - > rdr . n + 1 ;
}
memcpy ( pCur - > azVal [ i ] , z , pCur - > rdr . n + 1 ) ;
i + + ;
2016-05-31 16:22:48 +00:00
}
2016-06-03 01:01:57 +00:00
} while ( pCur - > rdr . cTerm = = ' , ' ) ;
2022-07-06 15:44:57 +00:00
if ( z = = 0 & & i = = 0 ) {
2016-05-31 16:22:48 +00:00
pCur - > iRowid = - 1 ;
} else {
pCur - > iRowid + + ;
2017-08-08 20:03:10 +00:00
while ( i < pTab - > nCol ) {
sqlite3_free ( pCur - > azVal [ i ] ) ;
pCur - > azVal [ i ] = 0 ;
pCur - > aLen [ i ] = 0 ;
i + + ;
}
2016-05-31 16:22:48 +00:00
}
return SQLITE_OK ;
}
/*
* * Return values of columns for the row at which the CsvCursor
* * is currently pointing .
*/
static int csvtabColumn (
sqlite3_vtab_cursor * cur , /* The cursor */
sqlite3_context * ctx , /* First argument to sqlite3_result_...() */
int i /* Which column to return */
) {
CsvCursor * pCur = ( CsvCursor * ) cur ;
CsvTable * pTab = ( CsvTable * ) cur - > pVtab ;
if ( i > = 0 & & i < pTab - > nCol & & pCur - > azVal [ i ] ! = 0 ) {
2020-10-07 11:24:45 +00:00
sqlite3_result_text ( ctx , pCur - > azVal [ i ] , - 1 , SQLITE_TRANSIENT ) ;
2016-05-31 16:22:48 +00:00
}
return SQLITE_OK ;
}
/*
* * Return the rowid for the current row .
*/
static int csvtabRowid ( sqlite3_vtab_cursor * cur , sqlite_int64 * pRowid ) {
CsvCursor * pCur = ( CsvCursor * ) cur ;
* pRowid = pCur - > iRowid ;
return SQLITE_OK ;
}
/*
* * Return TRUE if the cursor has been moved off of the last
* * row of output .
*/
static int csvtabEof ( sqlite3_vtab_cursor * cur ) {
CsvCursor * pCur = ( CsvCursor * ) cur ;
return pCur - > iRowid < 0 ;
}
/*
* * Only a full table scan is supported . So xFilter simply rewinds to
* * the beginning .
*/
static int csvtabFilter (
sqlite3_vtab_cursor * pVtabCursor ,
int idxNum , const char * idxStr ,
int argc , sqlite3_value * * argv
) {
CsvCursor * pCur = ( CsvCursor * ) pVtabCursor ;
CsvTable * pTab = ( CsvTable * ) pVtabCursor - > pVtab ;
pCur - > iRowid = 0 ;
2022-07-05 17:49:04 +00:00
/* Ensure the field buffer is always allocated. Otherwise, if the
* * first field is zero bytes in size , this may be mistaken for an OOM
* * error in csvtabNext ( ) . */
if ( csv_append ( & pCur - > rdr , 0 ) ) return SQLITE_NOMEM ;
2016-06-02 17:44:24 +00:00
if ( pCur - > rdr . in = = 0 ) {
assert ( pCur - > rdr . zIn = = pTab - > zData ) ;
2016-07-22 21:26:56 +00:00
assert ( pTab - > iStart > = 0 ) ;
assert ( ( size_t ) pTab - > iStart < = pCur - > rdr . nIn ) ;
2016-06-02 17:44:24 +00:00
pCur - > rdr . iIn = pTab - > iStart ;
} else {
fseek ( pCur - > rdr . in , pTab - > iStart , SEEK_SET ) ;
pCur - > rdr . iIn = 0 ;
pCur - > rdr . nIn = 0 ;
}
2016-05-31 16:22:48 +00:00
return csvtabNext ( pVtabCursor ) ;
}
/*
2016-06-02 17:44:24 +00:00
* * Only a forward full table scan is supported . xBestIndex is mostly
2016-05-31 18:08:35 +00:00
* * a no - op . If CSVTEST_FIDX is set , then the presence of equality
* * constraints lowers the estimated cost , which is fiction , but is useful
* * for testing certain kinds of virtual table behavior .
2016-05-31 16:22:48 +00:00
*/
static int csvtabBestIndex (
sqlite3_vtab * tab ,
sqlite3_index_info * pIdxInfo
) {
2016-05-31 18:08:35 +00:00
pIdxInfo - > estimatedCost = 1000000 ;
2016-06-03 01:01:57 +00:00
# ifdef SQLITE_TEST
if ( ( ( ( CsvTable * ) tab ) - > tstFlags & CSVTEST_FIDX ) ! = 0 ) {
/* The usual (and sensible) case is to always do a full table scan.
* * The code in this branch only runs when testflags = 1. This code
* * generates an artifical and unrealistic plan which is useful
* * for testing virtual table logic but is not helpful to real applications .
* *
* * Any = = , LIKE , or GLOB constraint is marked as usable by the virtual
* * table ( even though it is not ) and the cost of running the virtual table
* * is reduced from 1 million to just 10. The constraints are * not * marked
* * as omittable , however , so the query planner should still generate a
* * plan that gives a correct answer , even if they plan is not optimal .
*/
int i ;
int nConst = 0 ;
for ( i = 0 ; i < pIdxInfo - > nConstraint ; i + + ) {
unsigned char op ;
if ( pIdxInfo - > aConstraint [ i ] . usable = = 0 ) continue ;
op = pIdxInfo - > aConstraint [ i ] . op ;
if ( op = = SQLITE_INDEX_CONSTRAINT_EQ
| | op = = SQLITE_INDEX_CONSTRAINT_LIKE
| | op = = SQLITE_INDEX_CONSTRAINT_GLOB
) {
pIdxInfo - > estimatedCost = 10 ;
pIdxInfo - > aConstraintUsage [ nConst ] . argvIndex = nConst + 1 ;
nConst + + ;
}
2016-05-31 18:08:35 +00:00
}
}
2016-06-03 01:01:57 +00:00
# endif
2016-05-31 16:22:48 +00:00
return SQLITE_OK ;
}
static sqlite3_module CsvModule = {
0 , /* iVersion */
csvtabCreate , /* xCreate */
csvtabConnect , /* xConnect */
csvtabBestIndex , /* xBestIndex */
csvtabDisconnect , /* xDisconnect */
csvtabDisconnect , /* xDestroy */
csvtabOpen , /* xOpen - open a cursor */
csvtabClose , /* xClose - close a cursor */
csvtabFilter , /* xFilter - configure scan constraints */
csvtabNext , /* xNext - advance a cursor */
csvtabEof , /* xEof - check for end of scan */
csvtabColumn , /* xColumn - read data */
csvtabRowid , /* xRowid - read data */
0 , /* xUpdate */
0 , /* xBegin */
0 , /* xSync */
0 , /* xCommit */
0 , /* xRollback */
0 , /* xFindMethod */
0 , /* xRename */
} ;
2016-06-03 01:01:57 +00:00
# ifdef SQLITE_TEST
/*
* * For virtual table testing , make a version of the CSV virtual table
* * available that has an xUpdate function . But the xUpdate always returns
* * SQLITE_READONLY since the CSV file is not really writable .
*/
static int csvtabUpdate ( sqlite3_vtab * p , int n , sqlite3_value * * v , sqlite3_int64 * x ) {
return SQLITE_READONLY ;
}
static sqlite3_module CsvModuleFauxWrite = {
0 , /* iVersion */
csvtabCreate , /* xCreate */
csvtabConnect , /* xConnect */
csvtabBestIndex , /* xBestIndex */
csvtabDisconnect , /* xDisconnect */
csvtabDisconnect , /* xDestroy */
csvtabOpen , /* xOpen - open a cursor */
csvtabClose , /* xClose - close a cursor */
csvtabFilter , /* xFilter - configure scan constraints */
csvtabNext , /* xNext - advance a cursor */
csvtabEof , /* xEof - check for end of scan */
csvtabColumn , /* xColumn - read data */
csvtabRowid , /* xRowid - read data */
csvtabUpdate , /* xUpdate */
0 , /* xBegin */
0 , /* xSync */
0 , /* xCommit */
0 , /* xRollback */
0 , /* xFindMethod */
0 , /* xRename */
} ;
# endif /* SQLITE_TEST */
2016-07-15 02:50:18 +00:00
# endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
2016-06-03 01:01:57 +00:00
2016-05-31 16:22:48 +00:00
# ifdef _WIN32
__declspec ( dllexport )
# endif
/*
* * This routine is called when the extension is loaded . The new
* * CSV virtual table module is registered with the calling database
* * connection .
*/
int sqlite3_csv_init (
sqlite3 * db ,
char * * pzErrMsg ,
const sqlite3_api_routines * pApi
) {
2022-02-21 13:44:51 +00:00
# ifndef SQLITE_OMIT_VIRTUALTABLE
2016-06-03 01:01:57 +00:00
int rc ;
2016-05-31 16:22:48 +00:00
SQLITE_EXTENSION_INIT2 ( pApi ) ;
2016-06-03 01:01:57 +00:00
rc = sqlite3_create_module ( db , " csv " , & CsvModule , 0 ) ;
# ifdef SQLITE_TEST
if ( rc = = SQLITE_OK ) {
rc = sqlite3_create_module ( db , " csv_wr " , & CsvModuleFauxWrite , 0 ) ;
}
# endif
return rc ;
2016-07-15 02:50:18 +00:00
# else
return SQLITE_OK ;
# endif
2016-05-31 16:22:48 +00:00
}