//! CSV Virtual Table. //! //! Port of [csv](http://www.sqlite.org/cgi/src/finfo?name=ext/misc/csv.c) C //! extension: `https://www.sqlite.org/csv.html` //! //! # Example //! //! ```rust,no_run //! # use rusqlite::{Connection, Result}; //! fn example() -> Result<()> { //! // Note: This should be done once (usually when opening the DB). //! let db = Connection::open_in_memory()?; //! rusqlite::vtab::csvtab::load_module(&db)?; //! // Assume my_csv.csv //! let schema = " //! CREATE VIRTUAL TABLE my_csv_data //! USING csv(filename = 'my_csv.csv') //! "; //! db.execute_batch(schema)?; //! // Now the `my_csv_data` (virtual) table can be queried as normal... //! Ok(()) //! } //! ``` use std::fs::File; use std::marker::PhantomData; use std::os::raw::c_int; use std::path::Path; use std::str; use crate::ffi; use crate::types::Null; use crate::vtab::{ escape_double_quote, parse_boolean, read_only_module, Context, CreateVTab, IndexInfo, VTab, VTabConfig, VTabConnection, VTabCursor, VTabKind, Values, }; use crate::{Connection, Error, Result}; /// Register the "csv" module. /// ```sql /// CREATE VIRTUAL TABLE vtab USING csv( /// filename=FILENAME -- Name of file containing CSV content /// [, schema=SCHEMA] -- Alternative CSV schema. 'CREATE TABLE x(col1 TEXT NOT NULL, col2 INT, ...);' /// [, header=YES|NO] -- First row of CSV defines the names of columns if "yes". Default "no". /// [, columns=N] -- Assume the CSV file contains N columns. /// [, delimiter=C] -- CSV delimiter. Default ','. /// [, quote=C] -- CSV quote. Default '"'. 0 means no quote. /// ); /// ``` pub fn load_module(conn: &Connection) -> Result<()> { let aux: Option<()> = None; conn.create_module("csv", read_only_module::(), aux) } /// An instance of the CSV virtual table #[repr(C)] struct CsvTab { /// Base class. Must be first base: ffi::sqlite3_vtab, /// Name of the CSV file filename: String, has_headers: bool, delimiter: u8, quote: u8, /// Offset to start of data offset_first_row: csv::Position, } impl CsvTab { fn reader(&self) -> Result, csv::Error> { csv::ReaderBuilder::new() .has_headers(self.has_headers) .delimiter(self.delimiter) .quote(self.quote) .from_path(&self.filename) } fn parse_byte(arg: &str) -> Option { if arg.len() == 1 { arg.bytes().next() } else { None } } } unsafe impl<'vtab> VTab<'vtab> for CsvTab { type Aux = (); type Cursor = CsvTabCursor<'vtab>; fn connect( db: &mut VTabConnection, _aux: Option<&()>, args: &[&[u8]], ) -> Result<(String, CsvTab)> { if args.len() < 4 { return Err(Error::ModuleError("no CSV file specified".to_owned())); } let mut vtab = CsvTab { base: ffi::sqlite3_vtab::default(), filename: "".to_owned(), has_headers: false, delimiter: b',', quote: b'"', offset_first_row: csv::Position::new(), }; let mut schema = None; let mut n_col = None; let args = &args[3..]; for c_slice in args { let (param, value) = super::parameter(c_slice)?; match param { "filename" => { if !Path::new(value).exists() { return Err(Error::ModuleError(format!("file '{value}' does not exist"))); } vtab.filename = value.to_owned(); } "schema" => { schema = Some(value.to_owned()); } "columns" => { if let Ok(n) = value.parse::() { if n_col.is_some() { return Err(Error::ModuleError( "more than one 'columns' parameter".to_owned(), )); } else if n == 0 { return Err(Error::ModuleError( "must have at least one column".to_owned(), )); } n_col = Some(n); } else { return Err(Error::ModuleError(format!( "unrecognized argument to 'columns': {value}" ))); } } "header" => { if let Some(b) = parse_boolean(value) { vtab.has_headers = b; } else { return Err(Error::ModuleError(format!( "unrecognized argument to 'header': {value}" ))); } } "delimiter" => { if let Some(b) = CsvTab::parse_byte(value) { vtab.delimiter = b; } else { return Err(Error::ModuleError(format!( "unrecognized argument to 'delimiter': {value}" ))); } } "quote" => { if let Some(b) = CsvTab::parse_byte(value) { if b == b'0' { vtab.quote = 0; } else { vtab.quote = b; } } else { return Err(Error::ModuleError(format!( "unrecognized argument to 'quote': {value}" ))); } } _ => { return Err(Error::ModuleError(format!( "unrecognized parameter '{param}'" ))); } } } if vtab.filename.is_empty() { return Err(Error::ModuleError("no CSV file specified".to_owned())); } let mut cols: Vec = Vec::new(); if vtab.has_headers || (n_col.is_none() && schema.is_none()) { let mut reader = vtab.reader()?; if vtab.has_headers { { let headers = reader.headers()?; // headers ignored if cols is not empty if n_col.is_none() && schema.is_none() { cols = headers .into_iter() .map(|header| escape_double_quote(header).into_owned()) .collect(); } } vtab.offset_first_row = reader.position().clone(); } else { let mut record = csv::ByteRecord::new(); if reader.read_byte_record(&mut record)? { for (i, _) in record.iter().enumerate() { cols.push(format!("c{i}")); } } } } else if let Some(n_col) = n_col { for i in 0..n_col { cols.push(format!("c{i}")); } } if cols.is_empty() && schema.is_none() { return Err(Error::ModuleError("no column specified".to_owned())); } if schema.is_none() { let mut sql = String::from("CREATE TABLE x("); for (i, col) in cols.iter().enumerate() { sql.push('"'); sql.push_str(col); sql.push_str("\" TEXT"); if i == cols.len() - 1 { sql.push_str(");"); } else { sql.push_str(", "); } } schema = Some(sql); } db.config(VTabConfig::DirectOnly)?; Ok((schema.unwrap(), vtab)) } // Only a forward full table scan is supported. fn best_index(&self, info: &mut IndexInfo) -> Result<()> { info.set_estimated_cost(1_000_000.); Ok(()) } fn open(&mut self) -> Result> { Ok(CsvTabCursor::new(self.reader()?)) } } impl CreateVTab<'_> for CsvTab { const KIND: VTabKind = VTabKind::Default; } /// A cursor for the CSV virtual table #[repr(C)] struct CsvTabCursor<'vtab> { /// Base class. Must be first base: ffi::sqlite3_vtab_cursor, /// The CSV reader object reader: csv::Reader, /// Current cursor position used as rowid row_number: usize, /// Values of the current row cols: csv::StringRecord, eof: bool, phantom: PhantomData<&'vtab CsvTab>, } impl CsvTabCursor<'_> { fn new<'vtab>(reader: csv::Reader) -> CsvTabCursor<'vtab> { CsvTabCursor { base: ffi::sqlite3_vtab_cursor::default(), reader, row_number: 0, cols: csv::StringRecord::new(), eof: false, phantom: PhantomData, } } /// Accessor to the associated virtual table. fn vtab(&self) -> &CsvTab { unsafe { &*(self.base.pVtab as *const CsvTab) } } } unsafe impl VTabCursor for CsvTabCursor<'_> { // Only a full table scan is supported. So `filter` simply rewinds to // the beginning. fn filter( &mut self, _idx_num: c_int, _idx_str: Option<&str>, _args: &Values<'_>, ) -> Result<()> { { let offset_first_row = self.vtab().offset_first_row.clone(); self.reader.seek(offset_first_row)?; } self.row_number = 0; self.next() } fn next(&mut self) -> Result<()> { { self.eof = self.reader.is_done(); if self.eof { return Ok(()); } self.eof = !self.reader.read_record(&mut self.cols)?; } self.row_number += 1; Ok(()) } fn eof(&self) -> bool { self.eof } fn column(&self, ctx: &mut Context, col: c_int) -> Result<()> { if col < 0 || col as usize >= self.cols.len() { return Err(Error::ModuleError(format!( "column index out of bounds: {col}" ))); } if self.cols.is_empty() { return ctx.set_result(&Null); } // TODO Affinity ctx.set_result(&self.cols[col as usize].to_owned()) } fn rowid(&self) -> Result { Ok(self.row_number as i64) } } impl From for Error { #[cold] fn from(err: csv::Error) -> Error { Error::ModuleError(err.to_string()) } } #[cfg(test)] mod test { use crate::vtab::csvtab; use crate::{Connection, Result}; use fallible_iterator::FallibleIterator; #[test] fn test_csv_module() -> Result<()> { let db = Connection::open_in_memory()?; csvtab::load_module(&db)?; db.execute_batch("CREATE VIRTUAL TABLE vtab USING csv(filename='test.csv', header=yes)")?; { let mut s = db.prepare("SELECT rowid, * FROM vtab")?; { let headers = s.column_names(); assert_eq!(vec!["rowid", "colA", "colB", "colC"], headers); } let ids: Result> = s.query([])?.map(|row| row.get::<_, i32>(0)).collect(); let sum = ids?.iter().sum::(); assert_eq!(sum, 15); } db.execute_batch("DROP TABLE vtab") } #[test] fn test_csv_cursor() -> Result<()> { let db = Connection::open_in_memory()?; csvtab::load_module(&db)?; db.execute_batch("CREATE VIRTUAL TABLE vtab USING csv(filename='test.csv', header=yes)")?; { let mut s = db.prepare( "SELECT v1.rowid, v1.* FROM vtab v1 NATURAL JOIN vtab v2 WHERE \ v1.rowid < v2.rowid", )?; let mut rows = s.query([])?; let row = rows.next()?.unwrap(); assert_eq!(row.get_unwrap::<_, i32>(0), 2); } db.execute_batch("DROP TABLE vtab") } }