diff options
Diffstat (limited to 'src/lib/util/csv_file.h')
-rw-r--r-- | src/lib/util/csv_file.h | 575 |
1 files changed, 575 insertions, 0 deletions
diff --git a/src/lib/util/csv_file.h b/src/lib/util/csv_file.h new file mode 100644 index 0000000..65b62ba --- /dev/null +++ b/src/lib/util/csv_file.h @@ -0,0 +1,575 @@ +// Copyright (C) 2014-2020 Internet Systems Consortium, Inc. ("ISC") +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef CSV_FILE_H +#define CSV_FILE_H + +#include <exceptions/exceptions.h> +#include <boost/lexical_cast.hpp> +#include <boost/shared_ptr.hpp> +#include <fstream> +#include <ostream> +#include <string> +#include <vector> + +namespace isc { +namespace util { + +/// @brief Exception thrown when an error occurs during CSV file processing. +class CSVFileError : public Exception { +public: + CSVFileError(const char* file, size_t line, const char* what) : + isc::Exception(file, line, what) { }; +}; + +/// @brief Represents a single row of the CSV file. +/// +/// The object of this type can create the string holding a collection of the +/// comma separated values, representing a row of the CSV file. It allows the +/// selection of any character as a separator for the values. The default +/// separator is the comma symbol. +/// +/// The @c CSVRow object can be constructed in two different ways. The first +/// option is that the caller creates an object holding empty values +/// and then adds values one by one. Note that it is possible to either add +/// a string or a number. The number is converted to the appropriate text +/// representation. When all the values are added, the text representation of +/// the row can be obtained by calling @c CSVRow::render function or output +/// stream operator. +/// +/// The @c CSVRow object can be also constructed by parsing a row of a CSV +/// file. In this case, the separator has to be known in advance and passed to +/// the class constructor. The constructor will call the @c CSVRow::parse +/// function internally to tokenize the CSV row and create the collection of +/// values. The class accessors can be then used to retrieve individual values. +/// +/// This class is meant to be used by the @c CSVFile class to manipulate +/// individual rows of the CSV file. +class CSVRow { +public: + + /// @brief Constructor, creates the raw to be used for output. + /// + /// Creates CSV row with empty values. The values should be + /// later set using the @c CSVRow::writeAt functions. When the + /// @c CSVRow::render is called, the text representation of the + /// row will be created using a separator character specified + /// as an argument of this constructor. + /// + /// This constructor is exception-free. + /// + /// @param cols Number of values in the row. + /// @param separator Character used as a separator between values in the + /// text representation of the row. + CSVRow(const size_t cols = 0, const char separator = ','); + + /// @brief Constructor, parses a single row of the CSV file. + /// + /// This constructor should be used to parse a single row of the CSV + /// file. The separator being used for the particular row needs to + /// be known in advance and specified as an argument of the constructor + /// if other than the default separator is used in the row being parsed. + /// An example string to be parsed by this function looks as follows: + /// "foo,bar,foo-bar". + /// + /// This constructor is exception-free. + /// + /// @param text Text representation of the CSV row. + /// @param separator Character being used as a separator in a parsed file. + CSVRow(const std::string& text, const char separator = ','); + + /// @brief Returns number of values in a CSV row. + size_t getValuesCount() const { + return (values_.size()); + } + + /// @brief Parse the CSV file row. + /// + /// This function parses a string containing CSV values and assigns them + /// to the @c values_ private container. These values can be retrieved + /// from the container by calling @c CSVRow::readAt function. + /// + /// This function is exception-free. + /// + /// @param line String holding a row of comma separated values. + void parse(const std::string& line); + + /// @brief Retrieves a value from the internal container. + /// + /// @param at Index of the value in the container. The values are indexed + /// from 0, where 0 corresponds to the left-most value in the CSV file row. + /// + /// @return Value at specified index in the text form. + /// + /// @throw CSVFileError if the index is out of range. The number of elements + /// being held by the container can be obtained using + /// @c CSVRow::getValuesCount. + std::string readAt(const size_t at) const; + + /// @brief Retrieves a value from the internal container, free of escaped + /// characters. + /// + /// Returns a copy of the internal container value at the given index + /// which has had all escaped characters replaced with their unescaped + /// values. Escaped characters embedded using the following format: + /// + /// This function fetches the value at the given index and passes it + /// into CSVRow::unescapeCharacters which replaces any escaped special + /// characters with their unescaped form. + /// + /// @param at Index of the value in the container. The values are indexed + /// from 0, where 0 corresponds to the left-most value in the CSV file row. + /// + /// @return Value at specified index in the text form. + /// + /// @throw CSVFileError if the index is out of range. The number of elements + /// being held by the container can be obtained using + /// @c CSVRow::getValuesCount. + std::string readAtEscaped(const size_t at) const; + + /// @brief Trims a given number of elements from the end of a row + /// + /// @param count number of elements to trim + /// + /// @throw CSVFileError if the number to trim is larger than + /// then the number of elements + void trim(const size_t count); + + /// @brief Retrieves a value from the internal container. + /// + /// This method is reads a value from the internal container and converts + /// this value to the type specified as a template parameter. Internally + /// it uses @c boost::lexical_cast. + /// + /// @param at Index of the value in the container. The values are indexed + /// from 0, where 0 corresponds to the left-most value in the CSV file row. + /// @tparam T type of the value to convert to. + /// + /// @return Converted value. + /// + /// @throw CSVFileError if the index is out of range or if the + /// @c boost::bad_lexical_cast is thrown by the @c boost::lexical_cast. + template<typename T> + T readAndConvertAt(const size_t at) const { + T cast_value; + try { + cast_value = boost::lexical_cast<T>(readAt(at).c_str()); + + } catch (const boost::bad_lexical_cast& ex) { + isc_throw(CSVFileError, ex.what()); + } + return (cast_value); + } + + /// @brief Creates a text representation of the CSV file row. + /// + /// This function iterates over all values currently held in the internal + /// @c values_ container and appends them to a string. The values are + /// separated using the separator character specified in the constructor. + /// + /// This function is exception free. + /// + /// @return Text representation of the CSV file row. + std::string render() const; + + /// @brief Replaces the value at specified index. + /// + /// This function is used to set values to be rendered using + /// @c CSVRow::render function. + /// + /// @param at Index of the value to be replaced. + /// @param value Value to be written given as string. + /// + /// @throw CSVFileError if index is out of range. + void writeAt(const size_t at, const char* value); + + /// @brief Replaces the value at specified index. + /// + /// This function is used to set values to be rendered using + /// @c CSVRow::render function. + /// + /// @param at Index of the value to be replaced. + /// @param value Value to be written given as string. + /// + /// @throw CSVFileError if index is out of range. + void writeAt(const size_t at, const std::string& value) { + writeAt(at, value.c_str()); + } + + /// @brief Replaces the value at the specified index with a value that has + /// had special characters escaped + /// + /// This function first calls @c CSVRow::escapeCharacters to replace + /// special characters with their escaped form. It then sets the value + /// to be rendered using @c CSVRow::render function. + /// + /// @param at Index of the value to be replaced. + /// @param value Value to be written given as string. + /// + /// @throw CSVFileError if index is out of range. + void writeAtEscaped(const size_t at, const std::string& value); + + /// @brief Appends the value as a new column. + /// + /// @param value Value to be written. + /// @tparam T Type of the value being written. + template<typename T> + void append(const T value) { + try { + values_.push_back(boost::lexical_cast<std::string>(value)); + } catch (const boost::bad_lexical_cast& ex) { + isc_throw(CSVFileError, "unable to stringify the value to be " + "appended to the CSV file row."); + } + } + + /// @brief Replaces the value at specified index. + /// + /// This function is used to set values to be rendered using + /// @c CSVRow::render function. + /// + /// @param at Index of the value to be replaced. + /// @param value Value to be written - typically a number. + /// @tparam T Type of the value being written. + /// + /// @throw CSVFileError if index is out of range. + template<typename T> + void writeAt(const size_t at, const T value) { + checkIndex(at); + try { + values_[at] = boost::lexical_cast<std::string>(value); + } catch (const boost::bad_lexical_cast& ex) { + isc_throw(CSVFileError, "unable to stringify the value to be" + " written in the CSV file row at position '" + << at << "'"); + } + } + + /// @brief Equality operator. + /// + /// Two CSV rows are equal when their string representation is equal. This + /// includes the order of fields, separator etc. + /// + /// @param other Object to compare to. + bool operator==(const CSVRow& other) const { + return (render() == other.render()); + } + + /// @brief Unequality operator. + /// + /// Two CSV rows are unequal when their string representation is unequal. + /// This includes the order of fields, separator etc. + /// + /// @param other Object to compare to. + bool operator!=(const CSVRow& other) const { + return (render() != other.render()); + } + + /// @brief Returns a copy of a string with special characters escaped + /// + /// @param orig_str string which may contain characters that require + /// escaping. + /// @param characters list of characters which require escaping. + /// + /// The escaped characters will use the following format: + /// + /// @verbatim + /// &#x{xx} + /// @endverbatim + /// + /// where {xx} is the two digit hexadecimal ASCII value of the character + /// escaped. A comma, for example is: + /// + /// &\#x2c + /// + /// @return A copy of the original string with special characters escaped. + static std::string escapeCharacters(const std::string& orig_str, + const std::string& characters); + + /// @brief Returns a copy of a string with special characters unescaped + /// + /// This function reverses the escaping of characters done by @c + /// CSVRow::escapeCharacters. + /// + /// @param escaped_str string which may contain escaped characters. + /// + /// @return A string free of escaped characters + static std::string unescapeCharacters(const std::string& escaped_str); + +private: + + /// @brief Check if the specified index of the value is in range. + /// + /// This function is used internally by other functions. + /// + /// @param at Value index. + /// @throw CSVFileError if specified index is not in range. + void checkIndex(const size_t at) const; + + /// @brief Separator character specified in the constructor. + /// + /// @note Separator is held as a string object (one character long), + /// because the boost::is_any_of algorithm requires a string, not a + /// char value. If we held the separator as a char, we would need to + /// convert it to string on every call to @c CSVRow::parse. + std::string separator_; + + /// @brief Internal container holding values that belong to the row. + std::vector<std::string> values_; + + /// @brief Prefix used to escape special characters. + static const std::string escape_tag; +}; + +/// @brief Overrides standard output stream operator for @c CSVRow object. +/// +/// The resulting string of characters is the same as the one returned by +/// @c CSVRow::render function. +/// +/// @param os Output stream. +/// @param row Object representing a CSV file row. +std::ostream& operator<<(std::ostream& os, const CSVRow& row); + +/// @brief Provides input/output access to CSV files. +/// +/// This class provides basic methods to access (parse) and create CSV files. +/// The file is identified by its name qualified with the absolute path. +/// The name of the file is passed to the constructor. Constructor doesn't +/// open/create a file, but simply records a file name specified by a caller. +/// +/// There are two functions that can be used to open a file: +/// - @c open - opens an existing file; if the file doesn't exist it creates it, +/// - @c recreate - removes existing file and creates a new one. +/// +/// When the file is opened its header file is parsed and column names are +/// identified. At this point it is already possible to get the list of the +/// column names using appropriate accessors. The data rows are not parsed +/// at this time. The row parsing is triggered by calling @c next function. +/// The result of parsing a row is stored in the @c CSVRow object passed as +/// a parameter. +/// +/// When the new file is created (when @c recreate is called), the CSV header is +/// immediately written into it. The header consists of the column names +/// specified with the @c addColumn function. The subsequent rows are written +/// into this file by calling @c append. +class CSVFile { +public: + + /// @brief Constructor. + /// + /// @param filename CSV file name. + CSVFile(const std::string& filename); + + /// @brief Destructor + virtual ~CSVFile(); + + /// @brief Adds new column name. + /// + /// This column adds a new column but doesn't write it to the file yet. + /// The name of the column will be placed in the CSV header when new file + /// is created by calling @c recreate or @c open function. + /// + /// @param col_name Name of the column. + /// + /// @throw CSVFileError if a column with the specified name exists. + void addColumn(const std::string& col_name); + + /// @brief Writes the CSV row into the file. + /// + /// @param row Object representing a CSV file row. + /// + /// @throw CSVFileError When error occurred during IO operation or if the + /// size of the row doesn't match the number of columns. + void append(const CSVRow& row) const; + + /// @brief Closes the CSV file. + void close(); + + /// @brief Checks if the CSV file exists and can be opened for reading. + /// + /// This method doesn't check if the existing file has a correct file + /// format. + /// + /// @return true if file exists, false otherwise. + bool exists() const; + + /// @brief Flushes a file. + void flush() const; + + /// @brief Returns the number of columns in the file. + size_t getColumnCount() const { + return (cols_.size()); + } + + /// @brief Returns the path to the CSV file. + std::string getFilename() const { + return (filename_); + } + + /// @brief Returns the description of the last error returned by the + /// @c CSVFile::next function. + /// + /// @return Description of the last error during row validation. + std::string getReadMsg() const { + return (read_msg_); + } + + /// @brief Returns the index of the column having specified name. + /// + /// This function is exception safe. + /// + /// @param col_name Name of the column. + /// @return Index of the column. + /// @throw OutOfRange if column with such name doesn't exist. + size_t getColumnIndex(const std::string& col_name) const; + + /// @brief Returns the name of the column. + /// + /// @param col_index Index of the column. + /// + /// @return Name of the column. + /// @throw CSVFileError if the specified index is out of range. + std::string getColumnName(const size_t col_index) const; + + /// @brief Reads next row from CSV file. + /// + /// This function will return the @c CSVRow object representing a + /// parsed row if parsing is successful. If the end of file has been + /// reached, the empty row is returned (a row containing no values). + /// + /// @param [out] row Object receiving the parsed CSV file. + /// @param skip_validation Do not perform validation. + /// + /// @return true if row has been read and validated; false if validation + /// failed. + bool next(CSVRow& row, const bool skip_validation = false); + + /// @brief Opens existing file or creates a new one. + /// + /// This function will try to open existing file if this file has size + /// greater than 0. If the file doesn't exist or has size of 0, the + /// file is recreated. If the existing file has been opened, the header + /// is parsed and column names are initialized in the @c CSVFile object. + /// By default, the data pointer in the file is set to the beginning of + /// the first row. In order to retrieve the row contents the @c next + /// function should be called. If a @c seek_to_end parameter is set to + /// true, the file will be opened and the internal pointer will be set + /// to the end of file. + /// + /// @param seek_to_end A boolean value which indicates if the input and + /// output file pointer should be set at the end of file. + /// + /// @throw CSVFileError when IO operation fails. + + virtual void open(const bool seek_to_end = false); + + /// @brief Creates a new CSV file. + /// + /// The file creation will fail if there are no columns specified. + /// Otherwise, this function will write the header to the file. + /// In order to write rows to opened file, the @c append function + /// should be called. + virtual void recreate(); + + /// @brief Sets error message after row validation. + /// + /// The @c CSVFile::validate function is responsible for setting the + /// error message after validation of the row read from the CSV file. + /// It will use this function to set this message. Note, that the + /// @c validate function can set a message after successful validation + /// too. Such message could say "success", or something similar. + /// + /// @param read_msg Error message to be set. + void setReadMsg(const std::string& read_msg) { + read_msg_ = read_msg; + } + + /// @brief Represents empty row. + static CSVRow EMPTY_ROW() { + static CSVRow row(0); + return (row); + } + +protected: + + /// @brief Adds a column regardless if the file is open or not. + /// + /// This function adds as new column to the collection. It is meant to be + /// called internally by the methods of the base class and derived classes. + /// It must not be used in the public scope. The @c CSVFile::addColumn + /// must be used in the public scope instead, because it prevents addition + /// of the new column when the file is open. + /// + /// @param col_name Name of the column. + /// + /// @throw CSVFileError if a column with the specified name exists. + void addColumnInternal(const std::string& col_name); + + /// @brief Validate the row read from a file. + /// + /// This function implements a basic validation for the row read from the + /// CSV file. It is virtual so as it may be customized in derived classes. + /// + /// This default implementation checks that the number of values in the + /// row corresponds to the number of columns specified for this file. + /// + /// If row validation fails, the error message is noted and can be retrieved + /// using @c CSVFile::getReadMsg. The function which overrides this + /// base implementation is responsible for setting the error message using + /// @c CSVFile::setReadMsg. + /// + /// @param row A row to be validated. + /// + /// @return true if the column is valid; false otherwise. + virtual bool validate(const CSVRow& row); + +protected: + + /// @brief This function validates the header of the CSV file. + /// + /// If there are any columns added to the @c CSVFile object, it will + /// compare that they exactly match (including order) the header read + /// from the file. + /// + /// This function is called internally by @ref CSVFile::open. Derived classes + /// may add extra validation steps. + /// + /// @param header A row holding a header. + /// @return true if header matches the columns; false otherwise. + virtual bool validateHeader(const CSVRow& header); + +private: + /// @brief Sanity check if stream is open. + /// + /// Checks if the file stream is open so as IO operations can be performed + /// on it. This is internally called by the public class members to prevent + /// them from performing IO operations on invalid stream and using NULL + /// pointer to a stream. The @c clear() method is called on the stream + /// after the status has been checked. + /// + /// @throw CSVFileError if stream is closed or pointer to it is NULL. + void checkStreamStatusAndReset(const std::string& operation) const; + + /// @brief Returns size of the CSV file. + std::streampos size() const; + + /// @brief CSV file name. + std::string filename_; + + /// @brief Holds a pointer to the file stream. + boost::shared_ptr<std::fstream> fs_; + + /// @brief Holds CSV file columns. + std::vector<std::string> cols_; + + /// @brief Holds last error during row reading or validation. + std::string read_msg_; +}; + +} // namespace isc::util +} // namespace isc + +#endif // CSV_FILE_H |