summaryrefslogtreecommitdiffstats
path: root/src/lib/util/csv_file.h
blob: 65b62ba144430af036aec58ad700dc0f399f819f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
// Copyright (C) 2014-2020 Internet Systems Consortium, Inc. ("ISC")
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef CSV_FILE_H
#define CSV_FILE_H

#include <exceptions/exceptions.h>
#include <boost/lexical_cast.hpp>
#include <boost/shared_ptr.hpp>
#include <fstream>
#include <ostream>
#include <string>
#include <vector>

namespace isc {
namespace util {

/// @brief Exception thrown when an error occurs during CSV file processing.
class CSVFileError : public Exception {
public:
    CSVFileError(const char* file, size_t line, const char* what) :
        isc::Exception(file, line, what) { };
};

/// @brief Represents a single row of the CSV file.
///
/// The object of this type can create the string holding a collection of the
/// comma separated values, representing a row of the CSV file. It allows the
/// selection of any character as a separator for the values. The default
/// separator is the comma symbol.
///
/// The @c CSVRow object can be constructed in two different ways. The first
/// option is that the caller creates an object holding empty values
/// and then adds values one by one. Note that it is possible to either add
/// a string or a number. The number is converted to the appropriate text
/// representation. When all the values are added, the text representation of
/// the row can be obtained by calling @c CSVRow::render function or output
/// stream operator.
///
/// The @c CSVRow object can be also constructed by parsing a row of a CSV
/// file. In this case, the separator has to be known in advance and passed to
/// the class constructor. The constructor will call the @c CSVRow::parse
/// function internally to tokenize the CSV row and create the collection of
/// values. The class accessors can be then used to retrieve individual values.
///
/// This class is meant to be used by the @c CSVFile class to manipulate
/// individual rows of the CSV file.
class CSVRow {
public:

    /// @brief Constructor, creates the raw to be used for output.
    ///
    /// Creates CSV row with empty values. The values should be
    /// later set using the @c CSVRow::writeAt functions. When the
    /// @c CSVRow::render is called, the text representation of the
    /// row will be created using a separator character specified
    /// as an argument of this constructor.
    ///
    /// This constructor is exception-free.
    ///
    /// @param cols Number of values in the row.
    /// @param separator Character used as a separator between values in the
    /// text representation of the row.
    CSVRow(const size_t cols = 0, const char separator = ',');

    /// @brief Constructor, parses a single row of the CSV file.
    ///
    /// This constructor should be used to parse a single row of the CSV
    /// file. The separator being used for the particular row needs to
    /// be known in advance and specified as an argument of the constructor
    /// if other than the default separator is used in the row being parsed.
    /// An example string to be parsed by this function looks as follows:
    /// "foo,bar,foo-bar".
    ///
    /// This constructor is exception-free.
    ///
    /// @param text Text representation of the CSV row.
    /// @param separator Character being used as a separator in a parsed file.
    CSVRow(const std::string& text, const char separator = ',');

    /// @brief Returns number of values in a CSV row.
    size_t getValuesCount() const {
        return (values_.size());
    }

    /// @brief Parse the CSV file row.
    ///
    /// This function parses a string containing CSV values and assigns them
    /// to the @c values_ private container. These values can be retrieved
    /// from the container by calling @c CSVRow::readAt function.
    ///
    /// This function is exception-free.
    ///
    /// @param line String holding a row of comma separated values.
    void parse(const std::string& line);

    /// @brief Retrieves a value from the internal container.
    ///
    /// @param at Index of the value in the container. The values are indexed
    /// from 0, where 0 corresponds to the left-most value in the CSV file row.
    ///
    /// @return Value at specified index in the text form.
    ///
    /// @throw CSVFileError if the index is out of range. The number of elements
    /// being held by the container can be obtained using
    /// @c CSVRow::getValuesCount.
    std::string readAt(const size_t at) const;

    /// @brief Retrieves a value from the internal container, free of escaped
    /// characters.
    ///
    /// Returns a copy of the internal container value at the given index
    /// which has had all escaped characters replaced with their unescaped
    /// values. Escaped characters embedded using the following format:
    ///
    /// This function fetches the value at the given index and passes it
    /// into CSVRow::unescapeCharacters which replaces any escaped special
    /// characters with their unescaped form.
    ///
    /// @param at Index of the value in the container. The values are indexed
    /// from 0, where 0 corresponds to the left-most value in the CSV file row.
    ///
    /// @return Value at specified index in the text form.
    ///
    /// @throw CSVFileError if the index is out of range. The number of elements
    /// being held by the container can be obtained using
    /// @c CSVRow::getValuesCount.
    std::string readAtEscaped(const size_t at) const;

    /// @brief Trims a given number of elements from the end of a row
    ///
    /// @param count number of elements to trim
    ///
    /// @throw CSVFileError if the number to trim is larger than
    /// then the number of elements
    void trim(const size_t count);

    /// @brief Retrieves a value from the internal container.
    ///
    /// This method is reads a value from the internal container and converts
    /// this value to the type specified as a template parameter. Internally
    /// it uses @c boost::lexical_cast.
    ///
    /// @param at Index of the value in the container. The values are indexed
    /// from 0, where 0 corresponds to the left-most value in the CSV file row.
    /// @tparam T type of the value to convert to.
    ///
    /// @return Converted value.
    ///
    /// @throw CSVFileError if the index is out of range or if the
    /// @c boost::bad_lexical_cast is thrown by the @c boost::lexical_cast.
    template<typename T>
    T readAndConvertAt(const size_t at) const {
        T cast_value;
        try {
            cast_value = boost::lexical_cast<T>(readAt(at).c_str());

        } catch (const boost::bad_lexical_cast& ex) {
            isc_throw(CSVFileError, ex.what());
        }
        return (cast_value);
    }

    /// @brief Creates a text representation of the CSV file row.
    ///
    /// This function iterates over all values currently held in the internal
    /// @c values_ container and appends them to a string. The values are
    /// separated using the separator character specified in the constructor.
    ///
    /// This function is exception free.
    ///
    /// @return Text representation of the CSV file row.
    std::string render() const;

    /// @brief Replaces the value at specified index.
    ///
    /// This function is used to set values to be rendered using
    /// @c CSVRow::render function.
    ///
    /// @param at Index of the value to be replaced.
    /// @param value Value to be written given as string.
    ///
    /// @throw CSVFileError if index is out of range.
    void writeAt(const size_t at, const char* value);

    /// @brief Replaces the value at specified index.
    ///
    /// This function is used to set values to be rendered using
    /// @c CSVRow::render function.
    ///
    /// @param at Index of the value to be replaced.
    /// @param value Value to be written given as string.
    ///
    /// @throw CSVFileError if index is out of range.
    void writeAt(const size_t at, const std::string& value) {
        writeAt(at, value.c_str());
    }

    /// @brief Replaces the value at the specified index with a value that has
    /// had special characters escaped
    ///
    /// This function first calls @c CSVRow::escapeCharacters to replace
    /// special characters with their escaped form.  It then sets the value
    /// to be rendered using @c CSVRow::render function.
    ///
    /// @param at Index of the value to be replaced.
    /// @param value Value to be written given as string.
    ///
    /// @throw CSVFileError if index is out of range.
    void writeAtEscaped(const size_t at, const std::string& value);

    /// @brief Appends the value as a new column.
    ///
    /// @param value Value to be written.
    /// @tparam T Type of the value being written.
    template<typename T>
    void append(const T value) {
        try {
            values_.push_back(boost::lexical_cast<std::string>(value));
        } catch (const boost::bad_lexical_cast& ex) {
            isc_throw(CSVFileError, "unable to stringify the value to be "
                      "appended to the CSV file row.");
        }
    }

    /// @brief Replaces the value at specified index.
    ///
    /// This function is used to set values to be rendered using
    /// @c CSVRow::render function.
    ///
    /// @param at Index of the value to be replaced.
    /// @param value Value to be written - typically a number.
    /// @tparam T Type of the value being written.
    ///
    /// @throw CSVFileError if index is out of range.
    template<typename T>
    void writeAt(const size_t at, const T value) {
        checkIndex(at);
        try {
            values_[at] = boost::lexical_cast<std::string>(value);
        } catch (const boost::bad_lexical_cast& ex) {
            isc_throw(CSVFileError, "unable to stringify the value to be"
                      " written in the CSV file row at position '"
                      << at << "'");
        }
    }

    /// @brief Equality operator.
    ///
    /// Two CSV rows are equal when their string representation is equal. This
    /// includes the order of fields, separator etc.
    ///
    /// @param other Object to compare to.
    bool operator==(const CSVRow& other) const {
        return (render() == other.render());
    }

    /// @brief Unequality operator.
    ///
    /// Two CSV rows are unequal when their string representation is unequal.
    /// This includes the order of fields, separator etc.
    ///
    /// @param other Object to compare to.
    bool operator!=(const CSVRow& other) const {
        return (render() != other.render());
    }

    /// @brief Returns a copy of a string with special characters escaped
    ///
    /// @param orig_str string which may contain characters that require
    /// escaping.
    /// @param characters list of characters which require escaping.
    ///
    /// The escaped characters will use the following format:
    ///
    /// @verbatim
    /// &#x{xx}
    /// @endverbatim
    ///
    /// where {xx} is the two digit hexadecimal ASCII value of the character
    /// escaped. A comma, for example is:
    ///
    /// &\#x2c
    ///
    /// @return A copy of the original string with special characters escaped.
    static std::string escapeCharacters(const std::string& orig_str,
                                        const std::string& characters);

    /// @brief Returns a copy of a string with special characters unescaped
    ///
    /// This function reverses the escaping of characters done by @c
    /// CSVRow::escapeCharacters.
    ///
    /// @param escaped_str string which may contain escaped characters.
    ///
    /// @return A string free of escaped characters
    static std::string unescapeCharacters(const std::string& escaped_str);

private:

    /// @brief Check if the specified index of the value is in range.
    ///
    /// This function is used internally by other functions.
    ///
    /// @param at Value index.
    /// @throw CSVFileError if specified index is not in range.
    void checkIndex(const size_t at) const;

    /// @brief Separator character specified in the constructor.
    ///
    /// @note Separator is held as a string object (one character long),
    /// because the boost::is_any_of algorithm requires a string, not a
    /// char value. If we held the separator as a char, we would need to
    /// convert it to string on every call to @c CSVRow::parse.
    std::string separator_;

    /// @brief Internal container holding values that belong to the row.
    std::vector<std::string> values_;

    /// @brief Prefix used to escape special characters.
    static const std::string escape_tag;
};

/// @brief Overrides standard output stream operator for @c CSVRow object.
///
/// The resulting string of characters is the same as the one returned by
/// @c CSVRow::render function.
///
/// @param os Output stream.
/// @param row Object representing a CSV file row.
std::ostream& operator<<(std::ostream& os, const CSVRow& row);

/// @brief Provides input/output access to CSV files.
///
/// This class provides basic methods to access (parse) and create CSV files.
/// The file is identified by its name qualified with the absolute path.
/// The name of the file is passed to the constructor. Constructor doesn't
/// open/create a file, but simply records a file name specified by a caller.
///
/// There are two functions that can be used to open a file:
/// - @c open - opens an existing file; if the file doesn't exist it creates it,
/// - @c recreate - removes existing file and creates a new one.
///
/// When the file is opened its header file is parsed and column names are
/// identified. At this point it is already possible to get the list of the
/// column names using appropriate accessors. The data rows are not parsed
/// at this time. The row parsing is triggered by calling @c next function.
/// The result of parsing a row is stored in the @c CSVRow object passed as
/// a parameter.
///
/// When the new file is created (when @c recreate is called), the CSV header is
/// immediately written into it. The header consists of the column names
/// specified with the @c addColumn function. The subsequent rows are written
/// into this file by calling @c append.
class CSVFile {
public:

    /// @brief Constructor.
    ///
    /// @param filename CSV file name.
    CSVFile(const std::string& filename);

    /// @brief Destructor
    virtual ~CSVFile();

    /// @brief Adds new column name.
    ///
    /// This column adds a new column but doesn't write it to the file yet.
    /// The name of the column will be placed in the CSV header when new file
    /// is created by calling @c recreate or @c open function.
    ///
    /// @param col_name Name of the column.
    ///
    /// @throw CSVFileError if a column with the specified name exists.
    void addColumn(const std::string& col_name);

    /// @brief Writes the CSV row into the file.
    ///
    /// @param row Object representing a CSV file row.
    ///
    /// @throw CSVFileError When error occurred during IO operation or if the
    /// size of the row doesn't match the number of columns.
    void append(const CSVRow& row) const;

    /// @brief Closes the CSV file.
    void close();

    /// @brief Checks if the CSV file exists and can be opened for reading.
    ///
    /// This method doesn't check if the existing file has a correct file
    /// format.
    ///
    /// @return true if file exists, false otherwise.
    bool exists() const;

    /// @brief Flushes a file.
    void flush() const;

    /// @brief Returns the number of columns in the file.
    size_t getColumnCount() const {
        return (cols_.size());
    }

    /// @brief Returns the path to the CSV file.
    std::string getFilename() const {
        return (filename_);
    }

    /// @brief Returns the description of the last error returned by the
    /// @c CSVFile::next function.
    ///
    /// @return Description of the last error during row validation.
    std::string getReadMsg() const {
        return (read_msg_);
    }

    /// @brief Returns the index of the column having specified name.
    ///
    /// This function is exception safe.
    ///
    /// @param col_name Name of the column.
    /// @return Index of the column.
    /// @throw OutOfRange if column with such name doesn't exist.
    size_t getColumnIndex(const std::string& col_name) const;

    /// @brief Returns the name of the column.
    ///
    /// @param col_index Index of the column.
    ///
    /// @return Name of the column.
    /// @throw CSVFileError if the specified index is out of range.
    std::string getColumnName(const size_t col_index) const;

    /// @brief Reads next row from CSV file.
    ///
    /// This function will return the @c CSVRow object representing a
    /// parsed row if parsing is successful. If the end of file has been
    /// reached, the empty row is returned (a row containing no values).
    ///
    /// @param [out] row Object receiving the parsed CSV file.
    /// @param skip_validation Do not perform validation.
    ///
    /// @return true if row has been read and validated; false if validation
    /// failed.
    bool next(CSVRow& row, const bool skip_validation = false);

    /// @brief Opens existing file or creates a new one.
    ///
    /// This function will try to open existing file if this file has size
    /// greater than 0. If the file doesn't exist or has size of 0, the
    /// file is recreated. If the existing file has been opened, the header
    /// is parsed and column names are initialized in the @c CSVFile object.
    /// By default, the data pointer in the file is set to the beginning of
    /// the first row. In order to retrieve the row contents the @c next
    /// function should be called. If a @c seek_to_end parameter is set to
    /// true, the file will be opened and the internal pointer will be set
    /// to the end of file.
    ///
    /// @param seek_to_end A boolean value which indicates if the input and
    /// output file pointer should be set at the end of file.
    ///
    /// @throw CSVFileError when IO operation fails.

    virtual void open(const bool seek_to_end = false);

    /// @brief Creates a new CSV file.
    ///
    /// The file creation will fail if there are no columns specified.
    /// Otherwise, this function will write the header to the file.
    /// In order to write rows to opened file, the @c append function
    /// should be called.
    virtual void recreate();

    /// @brief Sets error message after row validation.
    ///
    /// The @c CSVFile::validate function is responsible for setting the
    /// error message after validation of the row read from the CSV file.
    /// It will use this function to set this message. Note, that the
    /// @c validate function can set a message after successful validation
    /// too. Such message could say "success", or something similar.
    ///
    /// @param read_msg Error message to be set.
    void setReadMsg(const std::string& read_msg) {
        read_msg_ = read_msg;
    }

    /// @brief Represents empty row.
    static CSVRow EMPTY_ROW() {
        static CSVRow row(0);
        return (row);
    }

protected:

    /// @brief Adds a column regardless if the file is open or not.
    ///
    /// This function adds as new column to the collection. It is meant to be
    /// called internally by the methods of the base class and derived classes.
    /// It must not be used in the public scope. The @c CSVFile::addColumn
    /// must be used in the public scope instead, because it prevents addition
    /// of the new column when the file is open.
    ///
    /// @param col_name Name of the column.
    ///
    /// @throw CSVFileError if a column with the specified name exists.
    void addColumnInternal(const std::string& col_name);

    /// @brief Validate the row read from a file.
    ///
    /// This function implements a basic validation for the row read from the
    /// CSV file. It is virtual so as it may be customized in derived classes.
    ///
    /// This default implementation checks that the number of values in the
    /// row corresponds to the number of columns specified for this file.
    ///
    /// If row validation fails, the error message is noted and can be retrieved
    /// using @c CSVFile::getReadMsg. The function which overrides this
    /// base implementation is responsible for setting the error message using
    /// @c CSVFile::setReadMsg.
    ///
    /// @param row A row to be validated.
    ///
    /// @return true if the column is valid; false otherwise.
    virtual bool validate(const CSVRow& row);

protected:

    /// @brief This function validates the header of the CSV file.
    ///
    /// If there are any columns added to the @c CSVFile object, it will
    /// compare that they exactly match (including order) the header read
    /// from the file.
    ///
    /// This function is called internally by @ref CSVFile::open. Derived classes
    /// may add extra validation steps.
    ///
    /// @param header A row holding a header.
    /// @return true if header matches the columns; false otherwise.
    virtual bool validateHeader(const CSVRow& header);

private:
    /// @brief Sanity check if stream is open.
    ///
    /// Checks if the file stream is open so as IO operations can be performed
    /// on it. This is internally called by the public class members to prevent
    /// them from performing IO operations on invalid stream and using NULL
    /// pointer to a stream. The @c clear() method is called on the stream
    /// after the status has been checked.
    ///
    /// @throw CSVFileError if stream is closed or pointer to it is NULL.
    void checkStreamStatusAndReset(const std::string& operation) const;

    /// @brief Returns size of the CSV file.
    std::streampos size() const;

    /// @brief CSV file name.
    std::string filename_;

    /// @brief Holds a pointer to the file stream.
    boost::shared_ptr<std::fstream> fs_;

    /// @brief Holds CSV file columns.
    std::vector<std::string> cols_;

    /// @brief Holds last error during row reading or validation.
    std::string read_msg_;
};

} // namespace isc::util
} // namespace isc

#endif // CSV_FILE_H