summaryrefslogtreecommitdiffstats
path: root/ext/misc/csv.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--ext/misc/csv.c964
1 files changed, 964 insertions, 0 deletions
diff --git a/ext/misc/csv.c b/ext/misc/csv.c
new file mode 100644
index 0000000..870a0cf
--- /dev/null
+++ b/ext/misc/csv.c
@@ -0,0 +1,964 @@
+/*
+** 2016-05-28
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains the implementation of an SQLite virtual table for
+** reading CSV files.
+**
+** Usage:
+**
+** .load ./csv
+** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
+** SELECT * FROM csv;
+**
+** The columns are named "c1", "c2", "c3", ... by default. Or the
+** application can define its own CREATE TABLE statement using the
+** schema= parameter, like this:
+**
+** CREATE VIRTUAL TABLE temp.csv2 USING csv(
+** filename = "../http.log",
+** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
+** );
+**
+** Instead of specifying a file, the text of the CSV can be loaded using
+** the data= parameter.
+**
+** If the columns=N parameter is supplied, then the CSV file is assumed to have
+** N columns. If both the columns= and schema= parameters are omitted, then
+** the number and names of the columns is determined by the first line of
+** the CSV input.
+**
+** Some extra debugging features (used for testing virtual tables) are available
+** if this module is compiled with -DSQLITE_TEST.
+*/
+#include <sqlite3ext.h>
+SQLITE_EXTENSION_INIT1
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <stdio.h>
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+
+/*
+** A macro to hint to the compiler that a function should not be
+** inlined.
+*/
+#if defined(__GNUC__)
+# define CSV_NOINLINE __attribute__((noinline))
+#elif defined(_MSC_VER) && _MSC_VER>=1310
+# define CSV_NOINLINE __declspec(noinline)
+#else
+# define CSV_NOINLINE
+#endif
+
+
+/* Max size of the error message in a CsvReader */
+#define CSV_MXERR 200
+
+/* Size of the CsvReader input buffer */
+#define CSV_INBUFSZ 1024
+
+/* A context object used when read a CSV file. */
+typedef struct CsvReader CsvReader;
+struct CsvReader {
+ FILE *in; /* Read the CSV text from this input stream */
+ char *z; /* Accumulated text for a field */
+ int n; /* Number of bytes in z */
+ int nAlloc; /* Space allocated for z[] */
+ int nLine; /* Current line number */
+ int bNotFirst; /* True if prior text has been seen */
+ int cTerm; /* Character that terminated the most recent field */
+ size_t iIn; /* Next unread character in the input buffer */
+ size_t nIn; /* Number of characters in the input buffer */
+ char *zIn; /* The input buffer */
+ char zErr[CSV_MXERR]; /* Error message */
+};
+
+/* Initialize a CsvReader object */
+static void csv_reader_init(CsvReader *p){
+ p->in = 0;
+ p->z = 0;
+ p->n = 0;
+ p->nAlloc = 0;
+ p->nLine = 0;
+ p->bNotFirst = 0;
+ p->nIn = 0;
+ p->zIn = 0;
+ p->zErr[0] = 0;
+}
+
+/* Close and reset a CsvReader object */
+static void csv_reader_reset(CsvReader *p){
+ if( p->in ){
+ fclose(p->in);
+ sqlite3_free(p->zIn);
+ }
+ sqlite3_free(p->z);
+ csv_reader_init(p);
+}
+
+/* Report an error on a CsvReader */
+static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
+ va_list ap;
+ va_start(ap, zFormat);
+ sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
+ va_end(ap);
+}
+
+/* Open the file associated with a CsvReader
+** Return the number of errors.
+*/
+static int csv_reader_open(
+ CsvReader *p, /* The reader to open */
+ const char *zFilename, /* Read from this filename */
+ const char *zData /* ... or use this data */
+){
+ if( zFilename ){
+ p->zIn = sqlite3_malloc( CSV_INBUFSZ );
+ if( p->zIn==0 ){
+ csv_errmsg(p, "out of memory");
+ return 1;
+ }
+ p->in = fopen(zFilename, "rb");
+ if( p->in==0 ){
+ sqlite3_free(p->zIn);
+ csv_reader_reset(p);
+ csv_errmsg(p, "cannot open '%s' for reading", zFilename);
+ return 1;
+ }
+ }else{
+ assert( p->in==0 );
+ p->zIn = (char*)zData;
+ p->nIn = strlen(zData);
+ }
+ return 0;
+}
+
+/* The input buffer has overflowed. Refill the input buffer, then
+** return the next character
+*/
+static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
+ size_t got;
+
+ assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */
+ assert( p->in!=0 ); /* Only called if reading froma file */
+
+ got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
+ if( got==0 ) return EOF;
+ p->nIn = got;
+ p->iIn = 1;
+ return p->zIn[0];
+}
+
+/* Return the next character of input. Return EOF at end of input. */
+static int csv_getc(CsvReader *p){
+ if( p->iIn >= p->nIn ){
+ if( p->in!=0 ) return csv_getc_refill(p);
+ return EOF;
+ }
+ return ((unsigned char*)p->zIn)[p->iIn++];
+}
+
+/* Increase the size of p->z and append character c to the end.
+** Return 0 on success and non-zero if there is an OOM error */
+static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
+ char *zNew;
+ int nNew = p->nAlloc*2 + 100;
+ zNew = sqlite3_realloc64(p->z, nNew);
+ if( zNew ){
+ p->z = zNew;
+ p->nAlloc = nNew;
+ p->z[p->n++] = c;
+ return 0;
+ }else{
+ csv_errmsg(p, "out of memory");
+ return 1;
+ }
+}
+
+/* Append a single character to the CsvReader.z[] array.
+** Return 0 on success and non-zero if there is an OOM error */
+static int csv_append(CsvReader *p, char c){
+ if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
+ p->z[p->n++] = c;
+ return 0;
+}
+
+/* Read a single field of CSV text. Compatible with rfc4180 and extended
+** with the option of having a separator other than ",".
+**
+** + Input comes from p->in.
+** + Store results in p->z of length p->n. Space to hold p->z comes
+** from sqlite3_malloc64().
+** + Keep track of the line number in p->nLine.
+** + Store the character that terminates the field in p->cTerm. Store
+** EOF on end-of-file.
+**
+** Return 0 at EOF or on OOM. On EOF, the p->cTerm character will have
+** been set to EOF.
+*/
+static char *csv_read_one_field(CsvReader *p){
+ int c;
+ p->n = 0;
+ c = csv_getc(p);
+ if( c==EOF ){
+ p->cTerm = EOF;
+ return 0;
+ }
+ if( c=='"' ){
+ int pc, ppc;
+ int startLine = p->nLine;
+ pc = ppc = 0;
+ while( 1 ){
+ c = csv_getc(p);
+ if( c<='"' || pc=='"' ){
+ if( c=='\n' ) p->nLine++;
+ if( c=='"' ){
+ if( pc=='"' ){
+ pc = 0;
+ continue;
+ }
+ }
+ if( (c==',' && pc=='"')
+ || (c=='\n' && pc=='"')
+ || (c=='\n' && pc=='\r' && ppc=='"')
+ || (c==EOF && pc=='"')
+ ){
+ do{ p->n--; }while( p->z[p->n]!='"' );
+ p->cTerm = (char)c;
+ break;
+ }
+ if( pc=='"' && c!='\r' ){
+ csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
+ break;
+ }
+ if( c==EOF ){
+ csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
+ startLine, '"');
+ p->cTerm = (char)c;
+ break;
+ }
+ }
+ if( csv_append(p, (char)c) ) return 0;
+ ppc = pc;
+ pc = c;
+ }
+ }else{
+ /* If this is the first field being parsed and it begins with the
+ ** UTF-8 BOM (0xEF BB BF) then skip the BOM */
+ if( (c&0xff)==0xef && p->bNotFirst==0 ){
+ csv_append(p, (char)c);
+ c = csv_getc(p);
+ if( (c&0xff)==0xbb ){
+ csv_append(p, (char)c);
+ c = csv_getc(p);
+ if( (c&0xff)==0xbf ){
+ p->bNotFirst = 1;
+ p->n = 0;
+ return csv_read_one_field(p);
+ }
+ }
+ }
+ while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
+ if( csv_append(p, (char)c) ) return 0;
+ c = csv_getc(p);
+ }
+ if( c=='\n' ){
+ p->nLine++;
+ if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
+ }
+ p->cTerm = (char)c;
+ }
+ assert( p->z==0 || p->n<p->nAlloc );
+ if( p->z ) p->z[p->n] = 0;
+ p->bNotFirst = 1;
+ return p->z;
+}
+
+
+/* Forward references to the various virtual table methods implemented
+** in this file. */
+static int csvtabCreate(sqlite3*, void*, int, const char*const*,
+ sqlite3_vtab**,char**);
+static int csvtabConnect(sqlite3*, void*, int, const char*const*,
+ sqlite3_vtab**,char**);
+static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
+static int csvtabDisconnect(sqlite3_vtab*);
+static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
+static int csvtabClose(sqlite3_vtab_cursor*);
+static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
+ int argc, sqlite3_value **argv);
+static int csvtabNext(sqlite3_vtab_cursor*);
+static int csvtabEof(sqlite3_vtab_cursor*);
+static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
+static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
+
+/* An instance of the CSV virtual table */
+typedef struct CsvTable {
+ sqlite3_vtab base; /* Base class. Must be first */
+ char *zFilename; /* Name of the CSV file */
+ char *zData; /* Raw CSV data in lieu of zFilename */
+ long iStart; /* Offset to start of data in zFilename */
+ int nCol; /* Number of columns in the CSV file */
+ unsigned int tstFlags; /* Bit values used for testing */
+} CsvTable;
+
+/* Allowed values for tstFlags */
+#define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/
+
+/* A cursor for the CSV virtual table */
+typedef struct CsvCursor {
+ sqlite3_vtab_cursor base; /* Base class. Must be first */
+ CsvReader rdr; /* The CsvReader object */
+ char **azVal; /* Value of the current row */
+ int *aLen; /* Length of each entry */
+ sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */
+} CsvCursor;
+
+/* Transfer error message text from a reader into a CsvTable */
+static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
+ sqlite3_free(pTab->base.zErrMsg);
+ pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
+}
+
+/*
+** This method is the destructor fo a CsvTable object.
+*/
+static int csvtabDisconnect(sqlite3_vtab *pVtab){
+ CsvTable *p = (CsvTable*)pVtab;
+ sqlite3_free(p->zFilename);
+ sqlite3_free(p->zData);
+ sqlite3_free(p);
+ return SQLITE_OK;
+}
+
+/* Skip leading whitespace. Return a pointer to the first non-whitespace
+** character, or to the zero terminator if the string has only whitespace */
+static const char *csv_skip_whitespace(const char *z){
+ while( isspace((unsigned char)z[0]) ) z++;
+ return z;
+}
+
+/* Remove trailing whitespace from the end of string z[] */
+static void csv_trim_whitespace(char *z){
+ size_t n = strlen(z);
+ while( n>0 && isspace((unsigned char)z[n]) ) n--;
+ z[n] = 0;
+}
+
+/* Dequote the string */
+static void csv_dequote(char *z){
+ int j;
+ char cQuote = z[0];
+ size_t i, n;
+
+ if( cQuote!='\'' && cQuote!='"' ) return;
+ n = strlen(z);
+ if( n<2 || z[n-1]!=z[0] ) return;
+ for(i=1, j=0; i<n-1; i++){
+ if( z[i]==cQuote && z[i+1]==cQuote ) i++;
+ z[j++] = z[i];
+ }
+ z[j] = 0;
+}
+
+/* Check to see if the string is of the form: "TAG = VALUE" with optional
+** whitespace before and around tokens. If it is, return a pointer to the
+** first character of VALUE. If it is not, return NULL.
+*/
+static const char *csv_parameter(const char *zTag, int nTag, const char *z){
+ z = csv_skip_whitespace(z);
+ if( strncmp(zTag, z, nTag)!=0 ) return 0;
+ z = csv_skip_whitespace(z+nTag);
+ if( z[0]!='=' ) return 0;
+ return csv_skip_whitespace(z+1);
+}
+
+/* Decode a parameter that requires a dequoted string.
+**
+** Return 1 if the parameter is seen, or 0 if not. 1 is returned
+** even if there is an error. If an error occurs, then an error message
+** is left in p->zErr. If there are no errors, p->zErr[0]==0.
+*/
+static int csv_string_parameter(
+ CsvReader *p, /* Leave the error message here, if there is one */
+ const char *zParam, /* Parameter we are checking for */
+ const char *zArg, /* Raw text of the virtual table argment */
+ char **pzVal /* Write the dequoted string value here */
+){
+ const char *zValue;
+ zValue = csv_parameter(zParam,(int)strlen(zParam),zArg);
+ if( zValue==0 ) return 0;
+ p->zErr[0] = 0;
+ if( *pzVal ){
+ csv_errmsg(p, "more than one '%s' parameter", zParam);
+ return 1;
+ }
+ *pzVal = sqlite3_mprintf("%s", zValue);
+ if( *pzVal==0 ){
+ csv_errmsg(p, "out of memory");
+ return 1;
+ }
+ csv_trim_whitespace(*pzVal);
+ csv_dequote(*pzVal);
+ return 1;
+}
+
+
+/* Return 0 if the argument is false and 1 if it is true. Return -1 if
+** we cannot really tell.
+*/
+static int csv_boolean(const char *z){
+ if( sqlite3_stricmp("yes",z)==0
+ || sqlite3_stricmp("on",z)==0
+ || sqlite3_stricmp("true",z)==0
+ || (z[0]=='1' && z[1]==0)
+ ){
+ return 1;
+ }
+ if( sqlite3_stricmp("no",z)==0
+ || sqlite3_stricmp("off",z)==0
+ || sqlite3_stricmp("false",z)==0
+ || (z[0]=='0' && z[1]==0)
+ ){
+ return 0;
+ }
+ return -1;
+}
+
+/* Check to see if the string is of the form: "TAG = BOOLEAN" or just "TAG".
+** If it is, set *pValue to be the value of the boolean ("true" if there is
+** not "= BOOLEAN" component) and return non-zero. If the input string
+** does not begin with TAG, return zero.
+*/
+static int csv_boolean_parameter(
+ const char *zTag, /* Tag we are looking for */
+ int nTag, /* Size of the tag in bytes */
+ const char *z, /* Input parameter */
+ int *pValue /* Write boolean value here */
+){
+ int b;
+ z = csv_skip_whitespace(z);
+ if( strncmp(zTag, z, nTag)!=0 ) return 0;
+ z = csv_skip_whitespace(z + nTag);
+ if( z[0]==0 ){
+ *pValue = 1;
+ return 1;
+ }
+ if( z[0]!='=' ) return 0;
+ z = csv_skip_whitespace(z+1);
+ b = csv_boolean(z);
+ if( b>=0 ){
+ *pValue = b;
+ return 1;
+ }
+ return 0;
+}
+
+/*
+** Parameters:
+** filename=FILENAME Name of file containing CSV content
+** data=TEXT Direct CSV content.
+** schema=SCHEMA Alternative CSV schema.
+** header=YES|NO First row of CSV defines the names of
+** columns if "yes". Default "no".
+** columns=N Assume the CSV file contains N columns.
+**
+** Only available if compiled with SQLITE_TEST:
+**
+** testflags=N Bitmask of test flags. Optional
+**
+** If schema= is omitted, then the columns are named "c0", "c1", "c2",
+** and so forth. If columns=N is omitted, then the file is opened and
+** the number of columns in the first row is counted to determine the
+** column count. If header=YES, then the first row is skipped.
+*/
+static int csvtabConnect(
+ sqlite3 *db,
+ void *pAux,
+ int argc, const char *const*argv,
+ sqlite3_vtab **ppVtab,
+ char **pzErr
+){
+ CsvTable *pNew = 0; /* The CsvTable object to construct */
+ int bHeader = -1; /* header= flags. -1 means not seen yet */
+ int rc = SQLITE_OK; /* Result code from this routine */
+ int i, j; /* Loop counters */
+#ifdef SQLITE_TEST
+ int tstFlags = 0; /* Value for testflags=N parameter */
+#endif
+ int b; /* Value of a boolean parameter */
+ int nCol = -99; /* Value of the columns= parameter */
+ CsvReader sRdr; /* A CSV file reader used to store an error
+ ** message and/or to count the number of columns */
+ static const char *azParam[] = {
+ "filename", "data", "schema",
+ };
+ char *azPValue[3]; /* Parameter values */
+# define CSV_FILENAME (azPValue[0])
+# define CSV_DATA (azPValue[1])
+# define CSV_SCHEMA (azPValue[2])
+
+
+ assert( sizeof(azPValue)==sizeof(azParam) );
+ memset(&sRdr, 0, sizeof(sRdr));
+ memset(azPValue, 0, sizeof(azPValue));
+ for(i=3; i<argc; i++){
+ const char *z = argv[i];
+ const char *zValue;
+ for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
+ if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
+ }
+ if( j<sizeof(azParam)/sizeof(azParam[0]) ){
+ if( sRdr.zErr[0] ) goto csvtab_connect_error;
+ }else
+ if( csv_boolean_parameter("header",6,z,&b) ){
+ if( bHeader>=0 ){
+ csv_errmsg(&sRdr, "more than one 'header' parameter");
+ goto csvtab_connect_error;
+ }
+ bHeader = b;
+ }else
+#ifdef SQLITE_TEST
+ if( (zValue = csv_parameter("testflags",9,z))!=0 ){
+ tstFlags = (unsigned int)atoi(zValue);
+ }else
+#endif
+ if( (zValue = csv_parameter("columns",7,z))!=0 ){
+ if( nCol>0 ){
+ csv_errmsg(&sRdr, "more than one 'columns' parameter");
+ goto csvtab_connect_error;
+ }
+ nCol = atoi(zValue);
+ if( nCol<=0 ){
+ csv_errmsg(&sRdr, "column= value must be positive");
+ goto csvtab_connect_error;
+ }
+ }else
+ {
+ csv_errmsg(&sRdr, "bad parameter: '%s'", z);
+ goto csvtab_connect_error;
+ }
+ }
+ if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
+ csv_errmsg(&sRdr, "must specify either filename= or data= but not both");
+ goto csvtab_connect_error;
+ }
+
+ if( (nCol<=0 || bHeader==1)
+ && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA)
+ ){
+ goto csvtab_connect_error;
+ }
+ pNew = sqlite3_malloc( sizeof(*pNew) );
+ *ppVtab = (sqlite3_vtab*)pNew;
+ if( pNew==0 ) goto csvtab_connect_oom;
+ memset(pNew, 0, sizeof(*pNew));
+ if( CSV_SCHEMA==0 ){
+ sqlite3_str *pStr = sqlite3_str_new(0);
+ char *zSep = "";
+ int iCol = 0;
+ sqlite3_str_appendf(pStr, "CREATE TABLE x(");
+ if( nCol<0 && bHeader<1 ){
+ nCol = 0;
+ do{
+ csv_read_one_field(&sRdr);
+ nCol++;
+ }while( sRdr.cTerm==',' );
+ }
+ if( nCol>0 && bHeader<1 ){
+ for(iCol=0; iCol<nCol; iCol++){
+ sqlite3_str_appendf(pStr, "%sc%d TEXT", zSep, iCol);
+ zSep = ",";
+ }
+ }else{
+ do{
+ char *z = csv_read_one_field(&sRdr);
+ if( (nCol>0 && iCol<nCol) || (nCol<0 && bHeader) ){
+ sqlite3_str_appendf(pStr,"%s\"%w\" TEXT", zSep, z);
+ zSep = ",";
+ iCol++;
+ }
+ }while( sRdr.cTerm==',' );
+ if( nCol<0 ){
+ nCol = iCol;
+ }else{
+ while( iCol<nCol ){
+ sqlite3_str_appendf(pStr,"%sc%d TEXT", zSep, ++iCol);
+ zSep = ",";
+ }
+ }
+ }
+ pNew->nCol = nCol;
+ sqlite3_str_appendf(pStr, ")");
+ CSV_SCHEMA = sqlite3_str_finish(pStr);
+ if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
+ }else if( nCol<0 ){
+ do{
+ csv_read_one_field(&sRdr);
+ pNew->nCol++;
+ }while( sRdr.cTerm==',' );
+ }else{
+ pNew->nCol = nCol;
+ }
+ pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0;
+ pNew->zData = CSV_DATA; CSV_DATA = 0;
+#ifdef SQLITE_TEST
+ pNew->tstFlags = tstFlags;
+#endif
+ if( bHeader!=1 ){
+ pNew->iStart = 0;
+ }else if( pNew->zData ){
+ pNew->iStart = (int)sRdr.iIn;
+ }else{
+ pNew->iStart = (int)(ftell(sRdr.in) - sRdr.nIn + sRdr.iIn);
+ }
+ csv_reader_reset(&sRdr);
+ rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
+ if( rc ){
+ csv_errmsg(&sRdr, "bad schema: '%s' - %s", CSV_SCHEMA, sqlite3_errmsg(db));
+ goto csvtab_connect_error;
+ }
+ for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
+ sqlite3_free(azPValue[i]);
+ }
+ /* Rationale for DIRECTONLY:
+ ** An attacker who controls a database schema could use this vtab
+ ** to exfiltrate sensitive data from other files in the filesystem.
+ ** And, recommended practice is to put all CSV virtual tables in the
+ ** TEMP namespace, so they should still be usable from within TEMP
+ ** views, so there shouldn't be a serious loss of functionality by
+ ** prohibiting the use of this vtab from persistent triggers and views.
+ */
+ sqlite3_vtab_config(db, SQLITE_VTAB_DIRECTONLY);
+ return SQLITE_OK;
+
+csvtab_connect_oom:
+ rc = SQLITE_NOMEM;
+ csv_errmsg(&sRdr, "out of memory");
+
+csvtab_connect_error:
+ if( pNew ) csvtabDisconnect(&pNew->base);
+ for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
+ sqlite3_free(azPValue[i]);
+ }
+ if( sRdr.zErr[0] ){
+ sqlite3_free(*pzErr);
+ *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
+ }
+ csv_reader_reset(&sRdr);
+ if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
+ return rc;
+}
+
+/*
+** Reset the current row content held by a CsvCursor.
+*/
+static void csvtabCursorRowReset(CsvCursor *pCur){
+ CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
+ int i;
+ for(i=0; i<pTab->nCol; i++){
+ sqlite3_free(pCur->azVal[i]);
+ pCur->azVal[i] = 0;
+ pCur->aLen[i] = 0;
+ }
+}
+
+/*
+** The xConnect and xCreate methods do the same thing, but they must be
+** different so that the virtual table is not an eponymous virtual table.
+*/
+static int csvtabCreate(
+ sqlite3 *db,
+ void *pAux,
+ int argc, const char *const*argv,
+ sqlite3_vtab **ppVtab,
+ char **pzErr
+){
+ return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
+}
+
+/*
+** Destructor for a CsvCursor.
+*/
+static int csvtabClose(sqlite3_vtab_cursor *cur){
+ CsvCursor *pCur = (CsvCursor*)cur;
+ csvtabCursorRowReset(pCur);
+ csv_reader_reset(&pCur->rdr);
+ sqlite3_free(cur);
+ return SQLITE_OK;
+}
+
+/*
+** Constructor for a new CsvTable cursor object.
+*/
+static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
+ CsvTable *pTab = (CsvTable*)p;
+ CsvCursor *pCur;
+ size_t nByte;
+ nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
+ pCur = sqlite3_malloc64( nByte );
+ if( pCur==0 ) return SQLITE_NOMEM;
+ memset(pCur, 0, nByte);
+ pCur->azVal = (char**)&pCur[1];
+ pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
+ *ppCursor = &pCur->base;
+ if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
+ csv_xfer_error(pTab, &pCur->rdr);
+ return SQLITE_ERROR;
+ }
+ return SQLITE_OK;
+}
+
+
+/*
+** Advance a CsvCursor to its next row of input.
+** Set the EOF marker if we reach the end of input.
+*/
+static int csvtabNext(sqlite3_vtab_cursor *cur){
+ CsvCursor *pCur = (CsvCursor*)cur;
+ CsvTable *pTab = (CsvTable*)cur->pVtab;
+ int i = 0;
+ char *z;
+ do{
+ z = csv_read_one_field(&pCur->rdr);
+ if( z==0 ){
+ break;
+ }
+ if( i<pTab->nCol ){
+ if( pCur->aLen[i] < pCur->rdr.n+1 ){
+ char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1);
+ if( zNew==0 ){
+ csv_errmsg(&pCur->rdr, "out of memory");
+ csv_xfer_error(pTab, &pCur->rdr);
+ break;
+ }
+ pCur->azVal[i] = zNew;
+ pCur->aLen[i] = pCur->rdr.n+1;
+ }
+ memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
+ i++;
+ }
+ }while( pCur->rdr.cTerm==',' );
+ if( z==0 && i==0 ){
+ pCur->iRowid = -1;
+ }else{
+ pCur->iRowid++;
+ while( i<pTab->nCol ){
+ sqlite3_free(pCur->azVal[i]);
+ pCur->azVal[i] = 0;
+ pCur->aLen[i] = 0;
+ i++;
+ }
+ }
+ return SQLITE_OK;
+}
+
+/*
+** Return values of columns for the row at which the CsvCursor
+** is currently pointing.
+*/
+static int csvtabColumn(
+ sqlite3_vtab_cursor *cur, /* The cursor */
+ sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
+ int i /* Which column to return */
+){
+ CsvCursor *pCur = (CsvCursor*)cur;
+ CsvTable *pTab = (CsvTable*)cur->pVtab;
+ if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
+ sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT);
+ }
+ return SQLITE_OK;
+}
+
+/*
+** Return the rowid for the current row.
+*/
+static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
+ CsvCursor *pCur = (CsvCursor*)cur;
+ *pRowid = pCur->iRowid;
+ return SQLITE_OK;
+}
+
+/*
+** Return TRUE if the cursor has been moved off of the last
+** row of output.
+*/
+static int csvtabEof(sqlite3_vtab_cursor *cur){
+ CsvCursor *pCur = (CsvCursor*)cur;
+ return pCur->iRowid<0;
+}
+
+/*
+** Only a full table scan is supported. So xFilter simply rewinds to
+** the beginning.
+*/
+static int csvtabFilter(
+ sqlite3_vtab_cursor *pVtabCursor,
+ int idxNum, const char *idxStr,
+ int argc, sqlite3_value **argv
+){
+ CsvCursor *pCur = (CsvCursor*)pVtabCursor;
+ CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
+ pCur->iRowid = 0;
+
+ /* Ensure the field buffer is always allocated. Otherwise, if the
+ ** first field is zero bytes in size, this may be mistaken for an OOM
+ ** error in csvtabNext(). */
+ if( csv_append(&pCur->rdr, 0) ) return SQLITE_NOMEM;
+
+ if( pCur->rdr.in==0 ){
+ assert( pCur->rdr.zIn==pTab->zData );
+ assert( pTab->iStart>=0 );
+ assert( (size_t)pTab->iStart<=pCur->rdr.nIn );
+ pCur->rdr.iIn = pTab->iStart;
+ }else{
+ fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
+ pCur->rdr.iIn = 0;
+ pCur->rdr.nIn = 0;
+ }
+ return csvtabNext(pVtabCursor);
+}
+
+/*
+** Only a forward full table scan is supported. xBestIndex is mostly
+** a no-op. If CSVTEST_FIDX is set, then the presence of equality
+** constraints lowers the estimated cost, which is fiction, but is useful
+** for testing certain kinds of virtual table behavior.
+*/
+static int csvtabBestIndex(
+ sqlite3_vtab *tab,
+ sqlite3_index_info *pIdxInfo
+){
+ pIdxInfo->estimatedCost = 1000000;
+#ifdef SQLITE_TEST
+ if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
+ /* The usual (and sensible) case is to always do a full table scan.
+ ** The code in this branch only runs when testflags=1. This code
+ ** generates an artifical and unrealistic plan which is useful
+ ** for testing virtual table logic but is not helpful to real applications.
+ **
+ ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
+ ** table (even though it is not) and the cost of running the virtual table
+ ** is reduced from 1 million to just 10. The constraints are *not* marked
+ ** as omittable, however, so the query planner should still generate a
+ ** plan that gives a correct answer, even if they plan is not optimal.
+ */
+ int i;
+ int nConst = 0;
+ for(i=0; i<pIdxInfo->nConstraint; i++){
+ unsigned char op;
+ if( pIdxInfo->aConstraint[i].usable==0 ) continue;
+ op = pIdxInfo->aConstraint[i].op;
+ if( op==SQLITE_INDEX_CONSTRAINT_EQ
+ || op==SQLITE_INDEX_CONSTRAINT_LIKE
+ || op==SQLITE_INDEX_CONSTRAINT_GLOB
+ ){
+ pIdxInfo->estimatedCost = 10;
+ pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
+ nConst++;
+ }
+ }
+ }
+#endif
+ return SQLITE_OK;
+}
+
+
+static sqlite3_module CsvModule = {
+ 0, /* iVersion */
+ csvtabCreate, /* xCreate */
+ csvtabConnect, /* xConnect */
+ csvtabBestIndex, /* xBestIndex */
+ csvtabDisconnect, /* xDisconnect */
+ csvtabDisconnect, /* xDestroy */
+ csvtabOpen, /* xOpen - open a cursor */
+ csvtabClose, /* xClose - close a cursor */
+ csvtabFilter, /* xFilter - configure scan constraints */
+ csvtabNext, /* xNext - advance a cursor */
+ csvtabEof, /* xEof - check for end of scan */
+ csvtabColumn, /* xColumn - read data */
+ csvtabRowid, /* xRowid - read data */
+ 0, /* xUpdate */
+ 0, /* xBegin */
+ 0, /* xSync */
+ 0, /* xCommit */
+ 0, /* xRollback */
+ 0, /* xFindMethod */
+ 0, /* xRename */
+};
+
+#ifdef SQLITE_TEST
+/*
+** For virtual table testing, make a version of the CSV virtual table
+** available that has an xUpdate function. But the xUpdate always returns
+** SQLITE_READONLY since the CSV file is not really writable.
+*/
+static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
+ return SQLITE_READONLY;
+}
+static sqlite3_module CsvModuleFauxWrite = {
+ 0, /* iVersion */
+ csvtabCreate, /* xCreate */
+ csvtabConnect, /* xConnect */
+ csvtabBestIndex, /* xBestIndex */
+ csvtabDisconnect, /* xDisconnect */
+ csvtabDisconnect, /* xDestroy */
+ csvtabOpen, /* xOpen - open a cursor */
+ csvtabClose, /* xClose - close a cursor */
+ csvtabFilter, /* xFilter - configure scan constraints */
+ csvtabNext, /* xNext - advance a cursor */
+ csvtabEof, /* xEof - check for end of scan */
+ csvtabColumn, /* xColumn - read data */
+ csvtabRowid, /* xRowid - read data */
+ csvtabUpdate, /* xUpdate */
+ 0, /* xBegin */
+ 0, /* xSync */
+ 0, /* xCommit */
+ 0, /* xRollback */
+ 0, /* xFindMethod */
+ 0, /* xRename */
+};
+#endif /* SQLITE_TEST */
+
+#endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
+
+
+#ifdef _WIN32
+__declspec(dllexport)
+#endif
+/*
+** This routine is called when the extension is loaded. The new
+** CSV virtual table module is registered with the calling database
+** connection.
+*/
+int sqlite3_csv_init(
+ sqlite3 *db,
+ char **pzErrMsg,
+ const sqlite3_api_routines *pApi
+){
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+ int rc;
+ SQLITE_EXTENSION_INIT2(pApi);
+ rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
+#ifdef SQLITE_TEST
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
+ }
+#endif
+ return rc;
+#else
+ return SQLITE_OK;
+#endif
+}