summaryrefslogtreecommitdiffstats
path: root/third_party/rust/rusqlite/src/vtab/csvtab.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/rusqlite/src/vtab/csvtab.rs')
-rw-r--r--third_party/rust/rusqlite/src/vtab/csvtab.rs396
1 files changed, 396 insertions, 0 deletions
diff --git a/third_party/rust/rusqlite/src/vtab/csvtab.rs b/third_party/rust/rusqlite/src/vtab/csvtab.rs
new file mode 100644
index 0000000000..a65db054af
--- /dev/null
+++ b/third_party/rust/rusqlite/src/vtab/csvtab.rs
@@ -0,0 +1,396 @@
+//! CSV Virtual Table.
+//!
+//! Port of [csv](http://www.sqlite.org/cgi/src/finfo?name=ext/misc/csv.c) C
+//! extension: `https://www.sqlite.org/csv.html`
+//!
+//! # Example
+//!
+//! ```rust,no_run
+//! # use rusqlite::{Connection, Result};
+//! fn example() -> Result<()> {
+//! // Note: This should be done once (usually when opening the DB).
+//! let db = Connection::open_in_memory()?;
+//! rusqlite::vtab::csvtab::load_module(&db)?;
+//! // Assume my_csv.csv
+//! let schema = "
+//! CREATE VIRTUAL TABLE my_csv_data
+//! USING csv(filename = 'my_csv.csv')
+//! ";
+//! db.execute_batch(schema)?;
+//! // Now the `my_csv_data` (virtual) table can be queried as normal...
+//! Ok(())
+//! }
+//! ```
+use std::fs::File;
+use std::marker::PhantomData;
+use std::os::raw::c_int;
+use std::path::Path;
+use std::str;
+
+use crate::ffi;
+use crate::types::Null;
+use crate::vtab::{
+ escape_double_quote, parse_boolean, read_only_module, Context, CreateVTab, IndexInfo, VTab,
+ VTabConfig, VTabConnection, VTabCursor, VTabKind, Values,
+};
+use crate::{Connection, Error, Result};
+
+/// Register the "csv" module.
+/// ```sql
+/// CREATE VIRTUAL TABLE vtab USING csv(
+/// filename=FILENAME -- Name of file containing CSV content
+/// [, schema=SCHEMA] -- Alternative CSV schema. 'CREATE TABLE x(col1 TEXT NOT NULL, col2 INT, ...);'
+/// [, header=YES|NO] -- First row of CSV defines the names of columns if "yes". Default "no".
+/// [, columns=N] -- Assume the CSV file contains N columns.
+/// [, delimiter=C] -- CSV delimiter. Default ','.
+/// [, quote=C] -- CSV quote. Default '"'. 0 means no quote.
+/// );
+/// ```
+pub fn load_module(conn: &Connection) -> Result<()> {
+ let aux: Option<()> = None;
+ conn.create_module("csv", read_only_module::<CsvTab>(), aux)
+}
+
+/// An instance of the CSV virtual table
+#[repr(C)]
+struct CsvTab {
+ /// Base class. Must be first
+ base: ffi::sqlite3_vtab,
+ /// Name of the CSV file
+ filename: String,
+ has_headers: bool,
+ delimiter: u8,
+ quote: u8,
+ /// Offset to start of data
+ offset_first_row: csv::Position,
+}
+
+impl CsvTab {
+ fn reader(&self) -> Result<csv::Reader<File>, csv::Error> {
+ csv::ReaderBuilder::new()
+ .has_headers(self.has_headers)
+ .delimiter(self.delimiter)
+ .quote(self.quote)
+ .from_path(&self.filename)
+ }
+
+ fn parse_byte(arg: &str) -> Option<u8> {
+ if arg.len() == 1 {
+ arg.bytes().next()
+ } else {
+ None
+ }
+ }
+}
+
+unsafe impl<'vtab> VTab<'vtab> for CsvTab {
+ type Aux = ();
+ type Cursor = CsvTabCursor<'vtab>;
+
+ fn connect(
+ db: &mut VTabConnection,
+ _aux: Option<&()>,
+ args: &[&[u8]],
+ ) -> Result<(String, CsvTab)> {
+ if args.len() < 4 {
+ return Err(Error::ModuleError("no CSV file specified".to_owned()));
+ }
+
+ let mut vtab = CsvTab {
+ base: ffi::sqlite3_vtab::default(),
+ filename: "".to_owned(),
+ has_headers: false,
+ delimiter: b',',
+ quote: b'"',
+ offset_first_row: csv::Position::new(),
+ };
+ let mut schema = None;
+ let mut n_col = None;
+
+ let args = &args[3..];
+ for c_slice in args {
+ let (param, value) = super::parameter(c_slice)?;
+ match param {
+ "filename" => {
+ if !Path::new(value).exists() {
+ return Err(Error::ModuleError(format!(
+ "file '{}' does not exist",
+ value
+ )));
+ }
+ vtab.filename = value.to_owned();
+ }
+ "schema" => {
+ schema = Some(value.to_owned());
+ }
+ "columns" => {
+ if let Ok(n) = value.parse::<u16>() {
+ if n_col.is_some() {
+ return Err(Error::ModuleError(
+ "more than one 'columns' parameter".to_owned(),
+ ));
+ } else if n == 0 {
+ return Err(Error::ModuleError(
+ "must have at least one column".to_owned(),
+ ));
+ }
+ n_col = Some(n);
+ } else {
+ return Err(Error::ModuleError(format!(
+ "unrecognized argument to 'columns': {}",
+ value
+ )));
+ }
+ }
+ "header" => {
+ if let Some(b) = parse_boolean(value) {
+ vtab.has_headers = b;
+ } else {
+ return Err(Error::ModuleError(format!(
+ "unrecognized argument to 'header': {}",
+ value
+ )));
+ }
+ }
+ "delimiter" => {
+ if let Some(b) = CsvTab::parse_byte(value) {
+ vtab.delimiter = b;
+ } else {
+ return Err(Error::ModuleError(format!(
+ "unrecognized argument to 'delimiter': {}",
+ value
+ )));
+ }
+ }
+ "quote" => {
+ if let Some(b) = CsvTab::parse_byte(value) {
+ if b == b'0' {
+ vtab.quote = 0;
+ } else {
+ vtab.quote = b;
+ }
+ } else {
+ return Err(Error::ModuleError(format!(
+ "unrecognized argument to 'quote': {}",
+ value
+ )));
+ }
+ }
+ _ => {
+ return Err(Error::ModuleError(format!(
+ "unrecognized parameter '{}'",
+ param
+ )));
+ }
+ }
+ }
+
+ if vtab.filename.is_empty() {
+ return Err(Error::ModuleError("no CSV file specified".to_owned()));
+ }
+
+ let mut cols: Vec<String> = Vec::new();
+ if vtab.has_headers || (n_col.is_none() && schema.is_none()) {
+ let mut reader = vtab.reader()?;
+ if vtab.has_headers {
+ {
+ let headers = reader.headers()?;
+ // headers ignored if cols is not empty
+ if n_col.is_none() && schema.is_none() {
+ cols = headers
+ .into_iter()
+ .map(|header| escape_double_quote(header).into_owned())
+ .collect();
+ }
+ }
+ vtab.offset_first_row = reader.position().clone();
+ } else {
+ let mut record = csv::ByteRecord::new();
+ if reader.read_byte_record(&mut record)? {
+ for (i, _) in record.iter().enumerate() {
+ cols.push(format!("c{}", i));
+ }
+ }
+ }
+ } else if let Some(n_col) = n_col {
+ for i in 0..n_col {
+ cols.push(format!("c{}", i));
+ }
+ }
+
+ if cols.is_empty() && schema.is_none() {
+ return Err(Error::ModuleError("no column specified".to_owned()));
+ }
+
+ if schema.is_none() {
+ let mut sql = String::from("CREATE TABLE x(");
+ for (i, col) in cols.iter().enumerate() {
+ sql.push('"');
+ sql.push_str(col);
+ sql.push_str("\" TEXT");
+ if i == cols.len() - 1 {
+ sql.push_str(");");
+ } else {
+ sql.push_str(", ");
+ }
+ }
+ schema = Some(sql);
+ }
+ db.config(VTabConfig::DirectOnly)?;
+ Ok((schema.unwrap(), vtab))
+ }
+
+ // Only a forward full table scan is supported.
+ fn best_index(&self, info: &mut IndexInfo) -> Result<()> {
+ info.set_estimated_cost(1_000_000.);
+ Ok(())
+ }
+
+ fn open(&mut self) -> Result<CsvTabCursor<'_>> {
+ Ok(CsvTabCursor::new(self.reader()?))
+ }
+}
+
+impl CreateVTab<'_> for CsvTab {
+ const KIND: VTabKind = VTabKind::Default;
+}
+
+/// A cursor for the CSV virtual table
+#[repr(C)]
+struct CsvTabCursor<'vtab> {
+ /// Base class. Must be first
+ base: ffi::sqlite3_vtab_cursor,
+ /// The CSV reader object
+ reader: csv::Reader<File>,
+ /// Current cursor position used as rowid
+ row_number: usize,
+ /// Values of the current row
+ cols: csv::StringRecord,
+ eof: bool,
+ phantom: PhantomData<&'vtab CsvTab>,
+}
+
+impl CsvTabCursor<'_> {
+ fn new<'vtab>(reader: csv::Reader<File>) -> CsvTabCursor<'vtab> {
+ CsvTabCursor {
+ base: ffi::sqlite3_vtab_cursor::default(),
+ reader,
+ row_number: 0,
+ cols: csv::StringRecord::new(),
+ eof: false,
+ phantom: PhantomData,
+ }
+ }
+
+ /// Accessor to the associated virtual table.
+ fn vtab(&self) -> &CsvTab {
+ unsafe { &*(self.base.pVtab as *const CsvTab) }
+ }
+}
+
+unsafe impl VTabCursor for CsvTabCursor<'_> {
+ // Only a full table scan is supported. So `filter` simply rewinds to
+ // the beginning.
+ fn filter(
+ &mut self,
+ _idx_num: c_int,
+ _idx_str: Option<&str>,
+ _args: &Values<'_>,
+ ) -> Result<()> {
+ {
+ let offset_first_row = self.vtab().offset_first_row.clone();
+ self.reader.seek(offset_first_row)?;
+ }
+ self.row_number = 0;
+ self.next()
+ }
+
+ fn next(&mut self) -> Result<()> {
+ {
+ self.eof = self.reader.is_done();
+ if self.eof {
+ return Ok(());
+ }
+
+ self.eof = !self.reader.read_record(&mut self.cols)?;
+ }
+
+ self.row_number += 1;
+ Ok(())
+ }
+
+ fn eof(&self) -> bool {
+ self.eof
+ }
+
+ fn column(&self, ctx: &mut Context, col: c_int) -> Result<()> {
+ if col < 0 || col as usize >= self.cols.len() {
+ return Err(Error::ModuleError(format!(
+ "column index out of bounds: {}",
+ col
+ )));
+ }
+ if self.cols.is_empty() {
+ return ctx.set_result(&Null);
+ }
+ // TODO Affinity
+ ctx.set_result(&self.cols[col as usize].to_owned())
+ }
+
+ fn rowid(&self) -> Result<i64> {
+ Ok(self.row_number as i64)
+ }
+}
+
+impl From<csv::Error> for Error {
+ #[cold]
+ fn from(err: csv::Error) -> Error {
+ Error::ModuleError(err.to_string())
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use crate::vtab::csvtab;
+ use crate::{Connection, Result};
+ use fallible_iterator::FallibleIterator;
+
+ #[test]
+ fn test_csv_module() -> Result<()> {
+ let db = Connection::open_in_memory()?;
+ csvtab::load_module(&db)?;
+ db.execute_batch("CREATE VIRTUAL TABLE vtab USING csv(filename='test.csv', header=yes)")?;
+
+ {
+ let mut s = db.prepare("SELECT rowid, * FROM vtab")?;
+ {
+ let headers = s.column_names();
+ assert_eq!(vec!["rowid", "colA", "colB", "colC"], headers);
+ }
+
+ let ids: Result<Vec<i32>> = s.query([])?.map(|row| row.get::<_, i32>(0)).collect();
+ let sum = ids?.iter().sum::<i32>();
+ assert_eq!(sum, 15);
+ }
+ db.execute_batch("DROP TABLE vtab")
+ }
+
+ #[test]
+ fn test_csv_cursor() -> Result<()> {
+ let db = Connection::open_in_memory()?;
+ csvtab::load_module(&db)?;
+ db.execute_batch("CREATE VIRTUAL TABLE vtab USING csv(filename='test.csv', header=yes)")?;
+
+ {
+ let mut s = db.prepare(
+ "SELECT v1.rowid, v1.* FROM vtab v1 NATURAL JOIN vtab v2 WHERE \
+ v1.rowid < v2.rowid",
+ )?;
+
+ let mut rows = s.query([])?;
+ let row = rows.next()?.unwrap();
+ assert_eq!(row.get_unwrap::<_, i32>(0), 2);
+ }
+ db.execute_batch("DROP TABLE vtab")
+ }
+}