1 files changed, 1507 insertions, 0 deletions
diff --git a/debian/src/geoip-csv-to-dat.cpp b/debian/src/geoip-csv-to-dat.cpp
new file mode 100644
index 0000000..2e59fa0
--- /dev/null
+++ b/debian/src/geoip-csv-to-dat.cpp
@@ -0,0 +1,1507 @@
+/* geoip-csv-to-dat - convert a country database from CSV to GeoIP binary format
+ *
+ * Copyright (c) 2009 Kalle Olavi Niemitalo.
+ * Copyright (c) 2011 Patrick Matthäi
+ * Copyright (c) 2014 Andrew Moise
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#define _GNU_SOURCE 1
+#include <algorithm>
+#include <arpa/inet.h>
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <error.h>
+#include <fstream>
+#include <getopt.h>
+#include <iostream>
+#include <queue>
+#include <set>
+#include <sstream>
+#include <stack>
+#include <string>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sysexits.h>
+#include <vector>
+#include <GeoIP.h>
+
+// Format of GeoIP Database files
+// ======================================
+//
+// 1. Binary trie mapping IP addresses to locations.
+// 2. Information about each location (only in city DBs).
+// 3. Optional unused data.
+// 4. Optional database-info block.
+// 5. Optional structure-info block.
+//
+// Binary trie
+// -----------
+//
+// The trie treats IP addresses as bit sequences and maps them to
+// location numbers.
+//
+// In the country database format, each such number is a country ID
+// that GeoIP_id_by_ipnum() would return.  The meanings of country IDs
+// are hardcoded in libGeoIP and cannot be overridden by the database.
+//
+// In the city database format, each such number is a position to seek
+// to, within section #2 (information about each location), in order
+// to find a record giving information about the location associated
+// with that IP.
+//
+// The root node of the trie is at the beginning of the file, and the
+// other nodes then follow it.	Each node has the same size and
+// consists of two little-endian pointers that correspond to the two
+// possible values of a bit.  The pointers are 24-bit, and each node
+// is thus 6 bytes long.
+//
+// To traverse the trie, you go one bit at a time through the IP
+// address you're looking up -- starting at bit 0, and starting at the
+// root node of the trie. For each bit, you look at the current node,
+// and take the left branch (the first of the two pointers) if the bit
+// is 0, otherwise take the right branch. It is allowed for a node
+// pointer to refer back to earlier nodes in the file, but loops are
+// not allowed.
+//
+// If the pointer you're looking at is less than the total number of
+// nodes in the trie, it indicates the next node you should
+// examine. If the pointer you're looking at is greater than or equal
+// to the total number of nodes, it indicates a leaf -- the end of
+// your search.
+//
+// The meaning of the leaf pointers depends on the database type:
+//
+// In country databases, 0xFFFF?? indicates that the country ID
+// for that IP address is equal to the ?? part. 0xFFFF00 indicates
+// that the IP address you queried is not in the database.
+//
+// City databases contain an extra segment which contains location
+// information records. A leaf pointer equal to (number of nodes in
+// the trie + X) indicates that the location information for the IP
+// address you queried can be found in the location information
+// segment, at offset X. A leaf pointer exactly equal to the number of
+// nodes in the trie indicates that the location information for the
+// IP address you queried is not stored in this database.
+//
+// Location segment
+// --------------------
+//
+// City databases contain a location segment. Each record in this
+// segment contains information about a single location which IP
+// addresses may be mapped to. Pointers to records in this segment are
+// contained within leaf nodes of the trie; each record contains, in
+// this order:
+//
+// * A country ID, as a single byte
+// * A "region" (generally state or province), as a NULL-terminated string
+// * A city name, as a NULL-terminated string
+// * A postal code, as a NULL-terminated string
+// * Encoded latitude, as a little-endian 3-byte integer. To convert
+//   back to actual latitude, divide by 10000 and subtract 180.
+// * Encoded longitude, as a little-endian 3-byte integer. To convert
+//   back to actual longitude, divide by 10000 and subtract 180.
+// * Area code and metro code (ONLY if the country is US). These are
+//   encoded into a single little-endian 3-byte integer. The area code
+//   is the encoded value modulo 1000, and the metro code is the
+//   encoded value divided by 1000. If the country is not US, this
+//   field is not present.
+//
+// The string fields may be equal to empty strings, but all fields are
+// always included (except area/metro code, which is included if and
+// only if the country is US).
+//
+// All strings seem to be in ISO-8859-1 encoding.
+//
+// Optional unused data
+// --------------------
+//
+// The file format permits any amount of extra data between the binary
+// trie and the optional blocks.
+//
+// Optional database-info block
+// ----------------------------
+//
+// Near the end of the file, there may be a three-byte tag (0x00 0x00
+// 0x00) followed by at most DATABASE_INFO_MAX_SIZE - 1 = 99 bytes of
+// text that describes the database.  GeoIP_database_info() returns
+// this text and appends a terminating '\0'.
+//
+// The GeoLite Country IPv4 database downloadable from MaxMind
+// includes this database-info block.
+//
+// Optional structure-info block
+// -----------------------------
+//
+// At the very end of the file, there may be a three-byte tag (0xFF
+// 0xFF 0xFF) followed by at most STRUCTURE_INFO_MAX_SIZE - 1 = 19
+// bytes.  The first byte is the database type,
+// e.g. GEOIP_COUNTRY_EDITION = 1 or GEOIP_COUNTRY_EDITION_V6 = 12,
+// possibly with 105 added to it.  Type-specific information then
+// follows.  There is no type-specific information for the country
+// editions.
+//
+// The GeoLite Country IPv4 database downloadable from MaxMind does
+// not include this structure-info block.
+
+
+
+/*************************************************
+ * Binary trie
+ *
+ * This section implements a data structure representing the trie
+ * which, within a .dat file, maps IP address ranges to locations.
+ *************************************************/
+
+namespace {
+	class binary_trie
+	{
+	public:
+		typedef uint_fast32_t edge_type;
+		struct node
+		{
+			edge_type edges[2];
+		};
+
+		explicit binary_trie(edge_type leaf);
+		void set_range(
+			const uint8_t range_min[],
+			const uint8_t range_max[],
+			std::size_t bit_count,
+			edge_type leaf);
+		void reorder_depth_first();
+		void reorder_in_blocks(std::size_t bytes_per_block);
+
+		std::vector<node>::iterator nodes_begin() { return nodes.begin(); }
+		std::vector<node>::iterator nodes_end() { return nodes.end(); }
+
+	private:
+		std::vector<node> nodes;
+
+		// This could be std::vector<bool> but that seems slower.
+		typedef std::vector<uint8_t> bits_vector;
+
+		void set_range_in_node(
+			const bits_vector *min_bits,
+			const bits_vector *max_bits,
+			std::size_t bit_pos,
+			edge_type edit_node,
+			edge_type leaf);
+		void set_range_in_edge(
+			const bits_vector *min_bits,
+			const bits_vector *max_bits,
+			std::size_t bit_pos,
+			edge_type edit_node,
+			bool bit,
+			edge_type leaf);
+		void reorder(
+			const std::vector<edge_type> &old_to_new,
+			const std::vector<edge_type> &new_to_old);
+	};
+}
+
+/** Construct a binary trie and its root node.
+ *
+ * \param leaf
+ * Both edges of the root node will initially point to this leaf.
+ * The caller should provide a value that means nothing was found.  */
+binary_trie::binary_trie(edge_type leaf)
+{
+	const node node = {{ leaf, leaf }};
+	nodes.push_back(node);
+}
+
+/** Edit the trie so it maps a range of bit sequences to the same
+ * leaf.
+ *
+ * \param range_min
+ * The first bit sequence in the range.  Eight bits are packed in each
+ * byte.  The most significant bit of the whole sequence is in the
+ * most significant bit of the first byte.
+ *
+ * \param range_max
+ * The last bit sequence in the range.
+ *
+ * \param bit_count
+ * The number of bits in both sequences.
+ *
+ * \param leaf
+ * The leaf to which all the bit sequences in the range should be
+ * mapped.  */
+void
+binary_trie::set_range(
+	const uint8_t range_min[],
+	const uint8_t range_max[],
+	std::size_t bit_count,
+	edge_type leaf)
+{
+	bits_vector min_bits(bit_count);
+	bits_vector max_bits(bit_count);
+	for (std::size_t i = 0; i < bit_count; ++i) {
+		std::size_t byte_pos = i / 8;
+		uint8_t mask = 1 << ((~i) % 8);
+		min_bits[i] = ((range_min[byte_pos] & mask) != 0);
+		max_bits[i] = ((range_max[byte_pos] & mask) != 0);
+	}
+	set_range_in_node(&min_bits, &max_bits, 0, 0, leaf);
+}
+
+/** Edit a node in the trie so it maps a range of bit sequences to the
+ * same leaf.
+ *
+ * \param min_bits
+ * The first bit sequence in the range, or NULL if unbounded.
+ *
+ * \param max_bits
+ * The last bit sequence in the range, or NULL if unbounded.
+ *
+ * \param bit_pos
+ * Which bit in the sequences corresponds to \a edit_node.
+ *
+ * \param edit_node
+ * The node to be modified.
+ *
+ * \param leaf
+ * The leaf to which all the bit sequences in the range should be
+ * mapped.  */
+void
+binary_trie::set_range_in_node(
+	const bits_vector *min_bits,
+	const bits_vector *max_bits,
+	std::size_t bit_pos,
+	edge_type edit_node,
+	edge_type leaf)
+{
+	if (!min_bits || (*min_bits)[bit_pos] == false) {
+		set_range_in_edge(min_bits,
+				  (max_bits && (*max_bits)[bit_pos] == false)
+				  ? max_bits : NULL,
+				  bit_pos + 1, edit_node, false, leaf);
+	}
+	if (!max_bits || (*max_bits)[bit_pos] == true) {
+		set_range_in_edge((min_bits && (*min_bits)[bit_pos] == true)
+				  ? min_bits : NULL,
+				  max_bits,
+				  bit_pos + 1, edit_node, true, leaf);
+	}
+}
+
+/** Edit an edge in the trie so it maps a range of bit sequences to
+ * the same leaf.
+ *
+ * \param min_bits
+ * The first bit sequence in the range, or NULL if unbounded.
+ *
+ * \param max_bits
+ * The last bit sequence in the range, or NULL if unbounded.
+ *
+ * \param bit_pos
+ * Which bit in the sequences corresponds to \a bit.
+ *
+ * \param edit_node
+ * The node in which the edge to be modified is located.
+ *
+ * \param bit
+ * Which edge of \a edit_node should be modified.
+ *
+ * \param leaf
+ * The leaf to which all the bit sequences in the range should be
+ * mapped.  */
+void
+binary_trie::set_range_in_edge(
+	const bits_vector *min_bits,
+	const bits_vector *max_bits,
+	std::size_t bit_pos,
+	edge_type edit_node,
+	bool bit,
+	edge_type leaf)
+{
+	// Check if the range fills this edge entirely.
+	bool entire = true;
+	if (min_bits
+	    && std::find(min_bits->begin() + bit_pos, min_bits->end(),
+			 true) != min_bits->end())
+		entire = false;
+	if (max_bits
+	    && std::find(max_bits->begin() + bit_pos, max_bits->end(),
+			 false) != max_bits->end())
+		entire = false;
+
+	if (entire) {
+		nodes[edit_node].edges[bit] = leaf;
+	} else {
+		edge_type next = nodes[edit_node].edges[bit];
+		if (next >= nodes.size()) {
+			const node new_node = {{ next, next }};
+			next = nodes.size();
+			nodes.push_back(new_node);
+			nodes[edit_node].edges[bit] = next;
+		}
+
+		set_range_in_node(min_bits, max_bits, bit_pos,
+				  next, leaf);
+	}
+}
+
+/** Renumber the nodes in depth-first order.  */
+void
+binary_trie::reorder_depth_first()
+{
+	std::vector<edge_type> old_to_new, new_to_old;
+	std::stack<edge_type> depth_first;
+	old_to_new.resize(nodes.size(), -1);
+	new_to_old.reserve(nodes.size());
+	depth_first.push(0);
+	while (!depth_first.empty()) {
+		const edge_type edge = depth_first.top();
+		depth_first.pop();
+		if (edge < nodes.size()) {
+			old_to_new[edge] = new_to_old.size();
+			new_to_old.push_back(edge);
+			depth_first.push(nodes[edge].edges[1]);
+			depth_first.push(nodes[edge].edges[0]);
+		}
+	}
+	reorder(old_to_new, new_to_old);
+}
+
+/** Renumber the nodes to make lookups use CPU and disk caches more
+ * effectively.
+ *
+ * First group the nodes into blocks so that each block contains the
+ * root of a subtrie and as many levels of its descendants as will
+ * fit.  This way, after the root is paged in, the next few lookup
+ * steps need not page in anything else.  Then, sort the nodes of each
+ * block in depth-first order.  That should give each lookup almost
+ * 1/2 chance to find the next node immediately adjacent.
+ *
+ * With a block size of 1024 bytes, this renumbering reduces the time
+ * required for random lookups by about 1.1%, compared to a plain
+ * depth-first order.  However, it's still 2.3% slower than the
+ * database optimized by MaxMind.  */
+void
+binary_trie::reorder_in_blocks(
+	std::size_t bytes_per_block)
+{
+	const edge_type none = -1;
+	std::vector<edge_type> old_to_new, new_to_old;
+	ssize_t bytes_left = bytes_per_block;
+	old_to_new.resize(nodes.size(), none);
+	new_to_old.reserve(nodes.size());
+	for (edge_type subtrie = 0; subtrie < nodes.size(); ++subtrie) {
+		// If subtrie has already been added to the output,
+		// ignore it.
+		if (old_to_new[subtrie] != none)
+			continue;
+
+		// Walk breadth-first from subtrie until we have a
+		// block full of nodes or the subtrie runs out.  Don't
+		// add these nodes immediately to the output, however.
+		// Instead just list them in nodes_in_block.
+		std::set<edge_type> nodes_in_block;
+		std::queue<edge_type> breadth_first;
+		breadth_first.push(subtrie);
+		if (bytes_left <= 0)
+			bytes_left += bytes_per_block;
+		while (bytes_left > 0 && !breadth_first.empty()) {
+			edge_type edge = breadth_first.front();
+			breadth_first.pop();
+			if (edge >= nodes.size())
+				continue;
+
+			// Let the last node of the block straddle the
+			// block boundary.  That's better than making
+			// the hotter first node do so.
+			bytes_left -= 6;
+			nodes_in_block.insert(edge);
+
+			breadth_first.push(nodes[edge].edges[0]);
+			breadth_first.push(nodes[edge].edges[1]);
+		}
+
+		// Add the nodes from nodes_in_block to the output in
+		// depth-first order.  This assumes they are all
+		// reachable from subtrie.
+		std::stack<edge_type> depth_first;
+		depth_first.push(subtrie);
+		while (!depth_first.empty()) {
+			edge_type edge = depth_first.top();
+			depth_first.pop();
+			if (nodes_in_block.find(edge)
+			    == nodes_in_block.end())
+				continue;
+
+			old_to_new[edge] = new_to_old.size();
+			new_to_old.push_back(edge);
+
+			depth_first.push(nodes[edge].edges[1]);
+			depth_first.push(nodes[edge].edges[0]);
+		}
+	}
+	reorder(old_to_new, new_to_old);
+}
+
+void
+binary_trie::reorder(
+	const std::vector<edge_type> &old_to_new,
+	const std::vector<edge_type> &new_to_old)
+{
+	std::vector<node> new_nodes;
+	new_nodes.reserve(new_to_old.size());
+	for (std::vector<edge_type>::const_iterator
+		     it = new_to_old.begin();
+	     it != new_to_old.end(); ++it) {
+		node new_node;
+		for (int bit = 0; bit <= 1; ++bit) {
+			edge_type old_edge = nodes[*it].edges[bit];
+			if (old_edge < nodes.size())
+				new_node.edges[bit] = old_to_new[old_edge];
+			else
+				new_node.edges[bit] = old_edge;
+		}
+		new_nodes.push_back(new_node);
+	}
+	swap(new_nodes, nodes);
+}
+
+/*************************************************
+ * CSV file support
+ *
+ * This section implements reading from .csv files.
+ *************************************************/
+
+namespace {
+	/** Interface for classes interested in .csv data -- this should be
+	 * implemented and then passed to csv_read_file(), which will then
+	 * call read_csv_line(), providing the data in the .csv file. */
+	class csv_data_reader
+	{
+	public:
+		virtual ~csv_data_reader() {}
+
+		virtual void read_csv_line(const char *csv_file_name,
+					   int csv_line_number,
+					   std::vector<std::string> &fields) = 0;
+	};
+}
+
+namespace {
+	/** Convert a line from a .csv file into a vector of
+	 *  tokens. For internal use by the .csv reading code. */
+	void
+	csv_line_to_vector(
+		const std::string &line,
+		std::vector<std::string> &fields)
+	{
+		fields.clear();
+		std::vector<char> field;
+		bool quoted = false;
+		bool spaces_after_comma = false;
+		for (std::string::const_iterator it = line.begin();
+		     it != line.end(); ++it) {
+			if (*it == '"') {
+				quoted = !quoted;
+				spaces_after_comma = false;
+			} else if (*it == ',' && !quoted) {
+				fields.push_back(std::string(field.begin(), field.end()));
+				field.clear();
+				spaces_after_comma = true;
+			} else if (*it == ' ' && spaces_after_comma) {
+			} else {
+				field.push_back(*it);
+				spaces_after_comma = false;
+			}
+		}
+		fields.push_back(std::string(field.begin(), field.end()));
+	}
+
+	/** Load data from a CSV-formatted stream.
+	 *
+	 * \param reader
+	 * The reader to call for each line of the CSV
+	 *
+	 * \param csv_file_name
+	 * The name of the file that \a csv_stream is reading.
+	 * This string is used only for error messages.
+	 *
+	 * \param csv_stream
+	 * The stream to read from.   */
+	void
+	csv_read_stream(
+		csv_data_reader &reader,
+		const char *csv_file_name,
+		std::istream &csv_stream)
+	{
+		std::string csv_line;
+		std::vector<std::string> csv_fields;
+		int csv_line_number = 0;
+		while (getline(csv_stream, csv_line)) {
+			++csv_line_number;
+			csv_line_to_vector(csv_line, csv_fields);
+			reader.read_csv_line(csv_file_name, csv_line_number, csv_fields);
+		}
+		if (csv_stream.bad()) {
+			error(EX_IOERR, errno, "%s", csv_file_name);
+		}
+	}
+
+	/** Load data from a CSV-formatted file or standard input.
+	 *
+	 * \param reader
+	 * The reader to call for each line of the CSV.
+	 *
+	 * \param csv_file_name
+	 * The name of the CSV file that should be read, or "-" for
+	 * standard input.   */
+	void
+	csv_read_file(
+		csv_data_reader &reader,
+		const char *csv_file_name)
+	{
+		if (std::strcmp(csv_file_name, "-") == 0) {
+			csv_read_stream(reader, csv_file_name, std::cin);
+		} else {
+			std::ifstream csv_stream(csv_file_name, std::ios::in);
+			if (!csv_stream) {
+				error(EX_NOINPUT, errno, "%s", csv_file_name);
+			}
+			csv_read_stream(reader, csv_file_name, csv_stream);
+		}
+	}
+}
+
+/*************************************************
+ * .dat file support
+ *
+ * This section implements support code for writing out .dat files in
+ * Maxmind DB format.
+ *************************************************/
+
+namespace {
+
+	/** .dat file writer class
+	 *
+	 * To write out a .dat file, construct a dat_writer, then call
+	 * (in this order):
+	 *
+	 * write_trie()
+	 * write_database_info (optional)
+	 * write_structure_info()
+	 *
+	 * Setting dat_file_name to "-" will write to standard output;
+	 * otherwise, a file will be created, and closed when the
+	 * dat_writer is deleted. */
+
+	class dat_writer
+	{
+	public:
+		dat_writer(const char *dat_file_name, GeoIPDBTypes database_type);
+		virtual ~dat_writer();
+
+		void write_trie(binary_trie &trie);
+		void write_database_info(const char *database_info);
+		virtual void write_structure_info();
+
+	protected:
+		std::ostream *dat_stream;
+		bool need_to_delete_stream;
+		std::string dat_file_name;
+		GeoIPDBTypes database_type;
+	};
+
+}
+
+dat_writer::dat_writer(const char *dat_file_name, GeoIPDBTypes database_type):
+	dat_file_name(dat_file_name),
+	database_type(database_type)
+{
+	if (std::strcmp(dat_file_name, "-") == 0) {
+		dat_stream = &std::cout;
+		need_to_delete_stream = false;
+	} else {
+		dat_stream = new std::ofstream(dat_file_name, std::ios::out | std::ios::binary);
+		if (!dat_stream) {
+			error(EX_CANTCREAT, errno, "%s", dat_file_name);
+		}
+		need_to_delete_stream = true;
+	}
+}
+
+dat_writer::~dat_writer()
+{
+	if (need_to_delete_stream)
+		delete dat_stream;
+}
+
+void dat_writer::write_trie(binary_trie &trie)
+{
+	for (std::vector<binary_trie::node>::iterator it = trie.nodes_begin();
+	     it != trie.nodes_end(); ++it)
+	{
+		union {
+			uint8_t bytes[6];
+			char chars[6];
+		} binary = {{
+			(it->edges[0]	   ) & 0xFF,
+			(it->edges[0] >>  8) & 0xFF,
+			(it->edges[0] >> 16) & 0xFF,
+			(it->edges[1]	   ) & 0xFF,
+			(it->edges[1] >>  8) & 0xFF,
+			(it->edges[1] >> 16) & 0xFF
+		}};
+		dat_stream->write(binary.chars, 6);
+		if (dat_stream->bad())
+			error(EX_IOERR, errno, "%s", dat_file_name.c_str());
+	}
+}
+
+void dat_writer::write_database_info(const char *database_info)
+{
+	const char tag[3] = { 0, 0, 0 };
+	dat_stream->write(tag, 3);
+	dat_stream->write(database_info, std::strlen(database_info));
+	if (dat_stream->bad()) {
+		error(EX_IOERR, errno, "%s", dat_file_name.c_str());
+	}
+}
+
+void dat_writer::write_structure_info()
+{
+	const unsigned char structure_info[4] = { 0xFF, 0xFF, 0xFF, database_type };
+	dat_stream->write((const char *)structure_info, 4);
+}
+
+/*************************************************
+ * .dat file writer class, extended for city DBs
+ *************************************************/
+
+namespace
+{
+
+	class city_dat_writer : public dat_writer
+	{
+	public:
+		// All serialized location information, in one big
+		// undifferentiated block
+		std::stringstream location_stream;
+
+		// Seek offset of each location within
+		// location_stream (relative to the beginning of
+		// location_stream). An offset of -1 means that that
+		// location is not in the table (can happen if the
+		// location info's out of order).
+		std::vector<int> location_pos;
+
+		// Set of location IDs that are actually going to be used;
+		// we'll silently ignore any locations not in this set.
+		std::set<int> needed_locations;
+	  
+		city_dat_writer(const char *dat_file_name, GeoIPDBTypes database_type);
+
+		// Notify of a location ID we need -- this MUST be
+		// called for every location ID you care about before
+		// the location CSV is read; any ID not explicitly
+		// notified will be discarded.
+		void notify_need_location(int loc_id);
+		
+		void serialize_location_info(std::vector<std::string> &info,
+					     const char *input_file_name,
+					     int input_line_number);
+
+		void finalize_location_offsets(binary_trie &trie);
+		void write_locations();
+		virtual void write_structure_info(binary_trie &trie);
+	};
+
+}
+
+city_dat_writer::city_dat_writer(const char *dat_file_name, GeoIPDBTypes database_type)
+	: dat_writer(dat_file_name, database_type)
+{ }
+
+void city_dat_writer::notify_need_location(int loc_id)
+{
+	needed_locations.insert(loc_id);
+}
+
+void city_dat_writer::finalize_location_offsets(binary_trie &trie)
+{
+	// We're going to convert the location numbers in the trie
+	// into the final location numbers we're going to want to
+	// write to disk. Previous to this call, leaf nodes in the
+	// trie have the value:
+	//
+	// 0x1000000 + the location number
+	//
+	// After this call, leaf nodes in the trie have the value:
+	//
+	// (total number of nodes in the trie) + (offset of location
+	// record in the location segment)
+	//
+	// Absence of a record is indicated by the value 0x1000000
+	// before this call, and by the value (total number of nodes
+	// in the trie) after this call.
+
+	int trie_size = std::distance(trie.nodes_begin(), trie.nodes_end());
+
+	for(std::vector<binary_trie::node>::iterator it = trie.nodes_begin();
+	    it != trie.nodes_end(); ++it)
+	{
+		if (it->edges[0] == 0x1000000) // No data
+			it->edges[0] = trie_size;
+		else if (it->edges[0] > 0x1000000) { // Ptr to location block
+			int loc_id = it->edges[0] - 0x1000000;
+			if (loc_id >= location_pos.size() || location_pos[loc_id] == -1)
+				error(EX_DATAERR, 1, "Location %d exists in blocks but not in locations", loc_id);
+
+			int offset = location_pos[loc_id] + trie_size;
+			if (offset > 0xFFFFFF)
+				error(EX_DATAERR, 1, "Overflow! Offset for location %d too large (0x%x > 0xFFFFFF)", loc_id, offset);
+			it->edges[0] = offset;
+		}
+		// Any other value would indicate a non-leaf node
+
+		if (it->edges[1] == 0x1000000) // No data
+			it->edges[1] = trie_size;
+		else if (it->edges[1] > 0x1000000) { // Ptr to location block
+			int loc_id = it->edges[1] - 0x1000000;
+			if (loc_id >= location_pos.size() || location_pos[loc_id] == -1)
+				error(EX_DATAERR, 1, "Location %d exists in blocks but not in locations", loc_id);
+
+			int offset = location_pos[loc_id] + trie_size;
+			if (offset > 0xFFFFFF)
+				error(EX_DATAERR, 1, "Overflow! Offset for location %d too large (0x%x > 0xFFFFFF)", loc_id, offset);
+			it->edges[1] = offset;
+		}
+		// Any other value would indicate a non-leaf node
+	}
+}
+
+void city_dat_writer::write_locations()
+{
+	*dat_stream << location_stream.rdbuf();
+
+	if (dat_stream->bad())
+	{
+		error(EX_IOERR, errno, "%s", dat_file_name.c_str());
+	}
+}
+
+void city_dat_writer::write_structure_info(binary_trie &trie)
+{
+	int trie_size = std::distance(trie.nodes_begin(), trie.nodes_end());
+
+	const unsigned char structure_info[7] = { 0xFF,
+						  0xFF,
+						  0xFF,
+						  database_type,
+						  (trie_size      ) & 0xFF,
+						  (trie_size >> 8 ) & 0xFF,
+						  (trie_size >> 16) & 0xFF};
+	dat_stream->write((const char *)structure_info, 7);
+}
+
+/** Convert location info into on-disk format
+ *
+ *  \param info the location info read from the .csv file:
+ *
+ *  info[CSV_LOCATION_FIELD_COUNTRY] is the country id
+ *  info[CSV_LOCATION_FIELD_REGION] is the region
+ *  info[CSV_LOCATION_FIELD_CITY] is the city
+ *
+ *  ... and so on.
+ *
+ *  \param result a vector to append the on-disk converted information
+ *  to.
+ *
+ *  \param input_line_number input file line number (for error
+ *  notifications)
+ **/
+
+void city_dat_writer::serialize_location_info(std::vector<std::string> &info,
+					      const char *input_file_name,
+					      int input_line_number)
+{
+	// First, we determine the offset of this location block.
+	int loc_id = ::atoi(info[0].c_str());
+
+	if (needed_locations.find(loc_id) == needed_locations.end()) {
+		// We don't need this location, so we skip serializing
+		// it altogether.
+
+		return;
+	}
+
+	if (loc_id >= location_pos.size()) {
+		// We need to add to the location table (this is the
+		// usual case).
+		
+		while(loc_id > location_pos.size()) {
+			// If some numbers were skipped in the data,
+			// then we need to add some empty locations to
+			// the table before we find our spot.
+			location_pos.push_back(-1);
+		}
+
+		// Now we have our spot, insert this location.
+		location_pos.push_back(location_stream.tellp());
+	} else {
+		// We already have a space in the table for this location --
+		// if it's not empty, then we have two locations with the same
+		// ID, and we print an error.
+		if (location_pos[loc_id] != -1) {
+			error_at_line(EX_DATAERR, 0, input_file_name,
+				      input_line_number,
+				      "Duplicate location info for ID %d",
+				      loc_id);
+		}
+		location_pos[loc_id] = location_stream.tellp();
+	}
+
+	// Country ID
+	int country_id;
+	if (info[1] != "AN")
+		country_id = GeoIP_id_by_code(info[1].c_str());
+	else
+		country_id = GeoIP_id_by_code("CW");
+
+	if (country_id == 0) {
+		error(EX_DATAERR, 1, dat_file_name.c_str(), input_line_number,
+		      "Unrecognized country code: %s", info[1].c_str());
+	}
+	location_stream.put(country_id);
+
+	// Region
+	location_stream << info[2];
+	location_stream.put('\0');
+
+	// City
+	location_stream << info[3];
+	location_stream.put('\0');
+
+	// Postal code
+	location_stream << info[4];
+	location_stream.put('\0');
+
+	// Latitude
+	double latitude_dbl = ::atof(info[5].c_str());
+	int latitude_int = (latitude_dbl + 180) * 10000;
+	location_stream.put((latitude_int >>  0) & 0xFF);
+	location_stream.put((latitude_int >>  8) & 0xFF);
+	location_stream.put((latitude_int >> 16) & 0xFF);
+
+	// Longitude
+	double longitude_dbl = ::atof(info[6].c_str());
+	int longitude_int = (longitude_dbl + 180) * 10000;
+	location_stream.put((longitude_int >>  0) & 0xFF);
+	location_stream.put((longitude_int >>  8) & 0xFF);
+	location_stream.put((longitude_int >> 16) & 0xFF);
+
+	// Area code and metro code
+	if (info[1] == "US") {
+		int metro_code = ::atoi(info[7].c_str());
+		int area_code = ::atoi(info[8].c_str());
+		int area_metro_combined = metro_code * 1000 + area_code;
+		location_stream.put((area_metro_combined >>  0) & 0xFF);
+		location_stream.put((area_metro_combined >>  8) & 0xFF);
+		location_stream.put((area_metro_combined >> 16) & 0xFF);
+	}
+}
+
+/*************************************************
+ * Command line and options
+ *
+ * This section implements the command line parsing and stores the
+ * options for controlling the program's behavior.
+ *************************************************/
+
+namespace {
+
+	struct cmdline {
+		const char *ip_block_csv_file_name;
+		const char *location_csv_file_name;
+		const char *dat_file_name;
+		int address_family;
+	        GeoIPDBTypes database_type;
+		const char *database_info;
+		bool verbose;
+
+		cmdline(int argc, char **argv);
+	};
+}
+
+cmdline::cmdline(int argc, char **argv):
+	ip_block_csv_file_name("-"),
+	location_csv_file_name(NULL),
+	dat_file_name("-"),
+	address_family(AF_INET),
+	database_type(GEOIP_COUNTRY_EDITION),
+	database_info(NULL),
+	verbose(false)
+{
+	enum {
+		OPT_HELP = -2
+	};
+
+	static const struct option long_options[] = {
+		{ "inet", no_argument, NULL, '4' },
+		{ "inet6", no_argument, NULL, '6' },
+		{ "info", required_argument, NULL, 'i' },
+		{ "location-csv", required_argument, NULL, 'l' },
+		{ "output", required_argument, NULL, 'o' },
+		{ "type", required_argument, NULL, 't' },
+		{ "verbose", no_argument, NULL, 'v' },
+		{ "help", no_argument, NULL, OPT_HELP },
+		{ NULL, 0, NULL, 0 }
+	};
+	static const char *const usage = "\
+Usage: %s [OPTION] [CSV-FILE]...\n\
+Convert a GeoIP database from CSV to GeoIP binary format.\n\
+\n\
+  -4, --inet              set database type to GEOIP_COUNTRY_EDITION, v4 addresses (default)\n\
+  -6, --inet6             set database type to GEOIP_COUNTRY_EDITION_V6, v6 addresses\n\
+  -t, --type=TYPE         set database type explicitly (e.g. to GEOIP_CITY_EDITION_REV1)\n\
+  -i, --info=TEXT         add copyright or other info TEXT to output\n\
+  -l, --location-csv=FILE set location CSV file name (required for GEOIP_CITY_EDITION_REV1)\n\
+  -o, --output=FILE       write the binary data to FILE, not stdout\n\
+  -v, --verbose           show what is going on\n\
+      --help              display this help and exit\n";
+
+	for (;;) {
+		int optret = getopt_long(argc, argv, "46i:l:o:t:v", long_options, NULL);
+
+		if (optret == -1)
+			break;
+		switch (optret) {
+		case '4':
+			address_family = AF_INET;
+			break;
+		case '6':
+			database_type = GEOIP_COUNTRY_EDITION_V6;
+			address_family = AF_INET6;
+			break;
+		case 'i':
+			database_info = optarg;
+			if (std::strlen(database_info) > 99) {
+				error(EX_USAGE, 0,
+				      "Database info must not be longer than 99 bytes");
+			}
+			break;
+		case 'l':
+			location_csv_file_name = optarg;
+			break;
+		case 'o':
+			dat_file_name = optarg;
+			break;
+		case 't':
+			if (!strcmp(optarg, "GEOIP_COUNTRY_EDITION")) {
+				database_type = GEOIP_COUNTRY_EDITION;
+			} else if (!strcmp(optarg, "GEOIP_COUNTRY_EDITION_V6")) {
+				database_type = GEOIP_COUNTRY_EDITION_V6;
+				address_family = AF_INET6;
+			} else if (!strcmp(optarg, "GEOIP_CITY_EDITION_REV1")) {
+				database_type = GEOIP_CITY_EDITION_REV1;
+			} else {
+				error(EX_USAGE, 0,
+				      "Unrecognized database type (we support GEOIP_COUNTRY_EDITION, GEOIP_COUNTRY_EDITION_V6, \
+GEOIP_CITY_EDITION_REV1)");
+			}
+			break;
+		case 'v':
+			verbose = true;
+			break;
+		case OPT_HELP:
+			std::printf(usage, program_invocation_name);
+			std::exit(EX_OK);
+		case '?':
+			std::fprintf(stderr,
+				     "Try `%s --help' for more information.\n",
+				     program_invocation_name);
+			std::exit(EX_USAGE);
+		default:
+			std::abort();
+		}
+	}
+
+	if (optind < argc)
+		ip_block_csv_file_name = argv[optind++];
+
+	if (database_type == GEOIP_CITY_EDITION_REV1 && location_csv_file_name == NULL) {
+		error(EX_USAGE, 0,
+		      "Must specify -l option when type is GEOIP_CITY_EDITION_REV1");
+	}
+
+	if (optind < argc) {
+		error(EX_USAGE, 0,
+		      "Only one non-option argument is allowed");
+	}
+}
+
+/*************************************************
+ * Country DB reading and writing
+ *
+ * This section contains code implementing coverting a country .csv
+ * file to a country .dat file.
+ *************************************************/
+
+namespace {
+
+	class country_db_impl : public csv_data_reader
+	{
+	public:
+		binary_trie trie;
+		struct cmdline &cmdline;
+
+		enum {
+			CSV_FIELD_MIN_TEXT,
+			CSV_FIELD_MAX_TEXT,
+			CSV_FIELD_MIN_DECIMAL,
+			CSV_FIELD_MAX_DECIMAL,
+			CSV_FIELD_COUNTRY_CODE,
+			CSV_FIELD_COUNTRY_NAME,
+			CSV_FIELDS
+		};
+
+		country_db_impl(struct cmdline &cmdline);
+		void convert_db(std::ostream *verbose_stream);
+		void read_csv_line(const char *csv_file_name,
+				   int csv_line_number,
+				   std::vector<std::string> &fields);
+	};
+
+}
+
+country_db_impl::country_db_impl(struct cmdline &in_cmdline):
+	cmdline(in_cmdline),
+	trie(0xFFFF00)
+{ }
+
+/** Callback for receiving .csv data (see csv_read_file()) */
+
+void country_db_impl::read_csv_line(const char *csv_file_name,
+				    int csv_line_number,
+				    std::vector<std::string> &csv_fields)
+{
+	if (csv_fields.size() != CSV_FIELDS) {
+		error_at_line(EX_DATAERR, 0, csv_file_name, csv_line_number,
+			      "Wrong number of fields");
+	}
+
+	if (csv_fields[CSV_FIELD_COUNTRY_CODE] == "AN") {
+		csv_fields[CSV_FIELD_COUNTRY_CODE] = "CW";
+	}
+	else if (csv_fields[CSV_FIELD_COUNTRY_CODE] == "XK") {
+		csv_fields[CSV_FIELD_COUNTRY_CODE] = "RS";
+	}
+
+	const int countryid = GeoIP_id_by_code(csv_fields[CSV_FIELD_COUNTRY_CODE].c_str());
+	if (countryid == 0) {
+		error_at_line(EX_DATAERR, 0, csv_file_name, csv_line_number,
+			      "Unrecognized country code: %s",
+			      csv_fields[CSV_FIELD_COUNTRY_CODE].c_str());
+	}
+	const binary_trie::edge_type leaf = 0xFFFF00 + countryid;
+
+	union {
+		struct in_addr inet;
+		uint8_t inetbytes[4];
+		struct in6_addr inet6;
+	} minaddr, maxaddr;
+	if (inet_pton(cmdline.address_family, csv_fields[CSV_FIELD_MIN_TEXT].c_str(), &minaddr) <= 0) {
+		error_at_line(EX_DATAERR, 0, csv_file_name, csv_line_number,
+			      "Cannot parse minimum address: %s",
+			      csv_fields[CSV_FIELD_MIN_TEXT].c_str());
+	}
+	if (inet_pton(cmdline.address_family, csv_fields[CSV_FIELD_MAX_TEXT].c_str(), &maxaddr) <= 0) {
+		error_at_line(EX_DATAERR, 0, csv_file_name, csv_line_number,
+			      "Cannot parse maximum address: %s",
+			      csv_fields[CSV_FIELD_MAX_TEXT].c_str());
+	}
+	switch (cmdline.address_family) {
+	case AF_INET:
+		trie.set_range(minaddr.inetbytes, maxaddr.inetbytes,
+			       32, leaf);
+		break;
+	case AF_INET6:
+		trie.set_range(minaddr.inet6.s6_addr, maxaddr.inet6.s6_addr,
+			       128, leaf);
+		break;
+	default:
+		abort();
+	}
+}
+
+/** Convert a country DB from .csv to .dat. Parameters are mainly
+ *  controlled by the cmdline object. verbose_stream is (if non-NULL)
+ *  the stream to write verbose information to. */
+
+void country_db_impl::convert_db(std::ostream *verbose_stream)
+{
+	if (verbose_stream) {
+		*verbose_stream << program_invocation_name
+				<< ": Reading CSV and building the trie"
+				<< std::endl;
+	}
+	csv_read_file(*this, cmdline.ip_block_csv_file_name);
+
+	if (verbose_stream) {
+		*verbose_stream << program_invocation_name
+				<< ": Optimizing" << std::endl;
+	}
+	trie.reorder_depth_first();
+	trie.reorder_in_blocks(1024);
+
+	if (verbose_stream) {
+		*verbose_stream << program_invocation_name
+				<< ": Writing output" << std::endl;
+	}
+
+	dat_writer writer(cmdline.dat_file_name, cmdline.database_type);
+	writer.write_trie(trie);
+	if (cmdline.database_info)
+		writer.write_database_info(cmdline.database_info);
+	writer.write_structure_info();
+}
+
+/*************************************************
+ * City DB conversion
+ *
+ * This section implements converting the two .csv files storing city
+ * data to a city .dat file.
+ *************************************************/
+
+namespace {
+
+	/** Implementation for converting a city DB
+	 */
+
+	class city_db_impl : public csv_data_reader
+	{
+	public:
+		// Trie mapping IP ranges to location blocks
+		binary_trie trie;
+
+		// Writer for .dat file
+		city_dat_writer writer;
+
+		enum {
+			STAGE_READING_BLOCKS,
+			STAGE_READING_LOCATIONS
+		};
+
+		// Which stage of CSV reading we're at (out of above
+		// enum)
+		int which_stage;
+
+		struct cmdline &cmdline;
+
+		enum {
+			CSV_BLOCK_FIELD_MIN_DECIMAL,
+			CSV_BLOCK_FIELD_MAX_DECIMAL,
+			CSV_BLOCK_FIELD_LOC,
+			CSV_BLOCK_FIELDS
+		};
+
+		enum {
+			CSV_LOCATION_FIELD_ID,
+			CSV_LOCATION_FIELD_COUNTRY,
+			CSV_LOCATION_FIELD_REGION,
+			CSV_LOCATION_FIELD_CITY,
+			CSV_LOCATION_FIELD_POSTALCODE,
+			CSV_LOCATION_FIELD_LATITUDE,
+			CSV_LOCATION_FIELD_LONGITUDE,
+			CSV_LOCATION_FIELD_METROCODE,
+			CSV_LOCATION_FIELD_AREACODE,
+			CSV_LOCATION_FIELDS
+		};
+
+		city_db_impl(struct cmdline &cmdline);
+		void convert_db(std::ostream *verbose_stream);
+		void read_csv_line(const char *csv_file_name,
+				   int csv_line_number,
+				   std::vector<std::string> &fields);
+
+		void read_location_line(const char *csv_file_name,
+					int csv_line_number,
+					std::vector<std::string> &fields);
+		void read_block_line(const char *csv_file_name,
+				     int csv_line_number,
+				     std::vector<std::string> &fields);
+
+		// Check that a token within the "header" of the CSV
+		// files is what we expect it to be, and cause a data
+		// error if not.
+		void check_csv_header_token(std::vector<std::string> &tokens,
+					    int token_number,
+					    const char *token_expected,
+					    const char *csv_file_name,
+					    int csv_line_number);
+	};
+
+}
+
+city_db_impl::city_db_impl(struct cmdline &in_cmdline):
+	trie(0x1000000), // We use 0x1000000 as the beginning of the
+	                 // location information, since we don't know
+	                 // the real value and we'll need to remap all
+			 // the offsets later anyway.
+	writer(in_cmdline.dat_file_name, in_cmdline.database_type),
+	cmdline(in_cmdline),
+	which_stage(STAGE_READING_BLOCKS)
+{ }
+
+/** Convert a city DB from .csv to .dat. Parameters are mainly
+ *  controlled by the cmdline object. verbose_stream is (if non-NULL)
+ *  the stream to write verbose information to. */
+
+void
+city_db_impl::convert_db(std::ostream *verbose_stream)
+{
+	// Read the block data from CSV
+	if (verbose_stream) {
+		*verbose_stream << program_invocation_name
+				<< ": Reading block CSV and building the trie"
+				<< std::endl;
+	}
+
+	which_stage = STAGE_READING_BLOCKS;
+	csv_read_file(*this, cmdline.ip_block_csv_file_name);
+
+	if (verbose_stream) {
+		int trie_size = std::distance(trie.nodes_begin(), trie.nodes_end());
+
+		*verbose_stream << program_invocation_name
+				<< ": Done reading blocks, trie size is "
+				<< trie_size
+				<< std::endl;
+	}
+
+	// Read the location data from CSV
+	if (verbose_stream) {
+		*verbose_stream << program_invocation_name
+				<< ": Reading location CSV"
+				<< std::endl;
+	}
+
+	which_stage = STAGE_READING_LOCATIONS;
+	csv_read_file(*this, cmdline.location_csv_file_name);
+
+	// Optimize
+	if (verbose_stream) {
+		*verbose_stream << program_invocation_name
+				<< ": Optimizing" << std::endl;
+	}
+
+	trie.reorder_depth_first();
+	trie.reorder_in_blocks(1024);
+
+	// Finalize offsets
+	if (verbose_stream) {
+		*verbose_stream << program_invocation_name
+				<< ": Linking location and block data" << std::endl;
+	}
+
+	writer.finalize_location_offsets(trie);
+
+	// Write
+	if (verbose_stream) {
+		*verbose_stream << program_invocation_name
+				<< ": Writing output" << std::endl;
+	}
+
+	writer.write_trie(trie);
+	writer.write_locations();
+	if (cmdline.database_info)
+		writer.write_database_info(cmdline.database_info);
+	writer.write_structure_info(trie);
+}
+
+/** Callback for receiving CSV data (see csv_read_file()). We use
+ *  this both for reading the location CSV and the block CSV; which
+ *  stage we're at is indicated by the which_stage variable. */
+
+void city_db_impl::read_csv_line(const char *csv_file_name,
+				 int csv_line_number,
+				 std::vector<std::string> &csv_fields)
+{
+	switch(which_stage) {
+	case STAGE_READING_BLOCKS:
+		read_block_line(csv_file_name, csv_line_number, csv_fields);
+		break;
+	case STAGE_READING_LOCATIONS:
+		read_location_line(csv_file_name, csv_line_number, csv_fields);
+		break;
+	default:
+		error(EX_SOFTWARE, 1, "Invalid which_stage value: %d", which_stage);
+	}
+}
+
+/** Callback for reading one line of the block CSV. */
+
+void city_db_impl::read_block_line(const char *csv_file_name,
+				   int csv_line_number,
+				   std::vector<std::string> &csv_fields)
+{
+	if (csv_line_number == 1)
+		return; // Assume this is copyright information and
+			// skip doing anything to it
+
+	if (csv_fields.size() != CSV_BLOCK_FIELDS) {
+		error_at_line(EX_DATAERR, 0, csv_file_name, csv_line_number,
+			      "Wrong number of fields");
+		return;
+	}
+
+	if (csv_line_number == 2) {
+		// Assume this is header information -- we check it to
+		// make sure we're looking at the right format of file.
+		check_csv_header_token(csv_fields, CSV_BLOCK_FIELD_MIN_DECIMAL, "startIpNum",
+				       csv_file_name, csv_line_number);
+		check_csv_header_token(csv_fields, CSV_BLOCK_FIELD_MAX_DECIMAL, "endIpNum",
+				       csv_file_name, csv_line_number);
+		check_csv_header_token(csv_fields, CSV_BLOCK_FIELD_LOC, "locId",
+				       csv_file_name, csv_line_number);
+
+		// Format checks out, we're now done with this line
+		return;
+	}
+
+	const int loc_id = atoi(csv_fields[CSV_BLOCK_FIELD_LOC].c_str());
+	const binary_trie::edge_type leaf = 0x1000000 + loc_id;
+
+	if (cmdline.address_family != AF_INET) {
+		error(EX_SOFTWARE, 1, "IPv6 with city database is unimplemented.");
+	}
+
+	union {
+		struct in_addr inet;
+		uint8_t inetbytes[4];
+	} minaddr, maxaddr;
+
+	if (inet_aton(csv_fields[CSV_BLOCK_FIELD_MIN_DECIMAL].c_str(), &(minaddr.inet)) == 0) {
+		error_at_line(EX_DATAERR, 1, csv_file_name, csv_line_number,
+			      "Invalid min IP address");
+	}
+	if (inet_aton(csv_fields[CSV_BLOCK_FIELD_MAX_DECIMAL].c_str(), &(maxaddr.inet)) == 0) {
+		error_at_line(EX_DATAERR, 1, csv_file_name, csv_line_number,
+			      "Invalid max IP address");
+	}
+
+	writer.notify_need_location(loc_id);
+	trie.set_range(minaddr.inetbytes, maxaddr.inetbytes, 32, leaf);
+}
+
+/** Callback for reading one line of the location CSV. */
+
+void city_db_impl::read_location_line(const char *csv_file_name,
+				      int csv_line_number,
+				      std::vector<std::string> &csv_fields)
+{
+	if (csv_line_number == 1)
+		return; // Assume this is copyright information and
+			// skip it entirely
+
+	if (csv_fields.size() != CSV_LOCATION_FIELDS) {
+		error_at_line(EX_DATAERR, 0, csv_file_name, csv_line_number,
+			      "Wrong number of fields");
+		return;
+	}
+
+	if (csv_line_number == 2) {
+		// Assume this is header information -- we check it to
+		// make sure we're looking at the right format of file.
+		check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_ID, "locId",
+				       csv_file_name, csv_line_number);
+		check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_COUNTRY, "country",
+				       csv_file_name, csv_line_number);
+		check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_REGION, "region",
+				       csv_file_name, csv_line_number);
+		check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_CITY, "city",
+				       csv_file_name, csv_line_number);
+		check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_POSTALCODE, "postalCode",
+				       csv_file_name, csv_line_number);
+		check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_LATITUDE, "latitude",
+				       csv_file_name, csv_line_number);
+		check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_LONGITUDE, "longitude",
+				       csv_file_name, csv_line_number);
+		check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_METROCODE, "metroCode",
+				       csv_file_name, csv_line_number);
+		check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_AREACODE, "areaCode",
+				       csv_file_name, csv_line_number);
+
+		// Format checks out, we're now done with this line
+		return;
+	}
+
+	writer.serialize_location_info(csv_fields, csv_file_name, csv_line_number);
+}
+
+void city_db_impl::check_csv_header_token(std::vector<std::string> &tokens,
+					  int token_number,
+					  const char *token_expected,
+					  const char *csv_file_name,
+					  int csv_line_number)
+{
+	if (tokens[token_number] != token_expected) {
+		error_at_line(EX_DATAERR, 1, csv_file_name, csv_line_number,
+			      "Incorrect format: field %d is \"%s\", but we expected \"%s\"",
+			      token_number, tokens[token_number].c_str(), token_expected);
+	}
+}
+
+/*************************************************
+ * Main program
+ *
+ * This is the entry point.
+ *************************************************/
+
+int
+main(int argc, char **argv)
+{
+	cmdline cmdline(argc, argv);
+
+	std::ostream *verbose_stream;
+	if (!cmdline.verbose)
+		verbose_stream = NULL;
+	else if (strcmp(cmdline.dat_file_name, "-") == 0)
+		verbose_stream = &std::cerr;
+	else
+		verbose_stream = &std::cout;
+
+	switch(cmdline.database_type) {
+	case GEOIP_COUNTRY_EDITION:
+	case GEOIP_COUNTRY_EDITION_V6:
+		{
+			country_db_impl country_db(cmdline);
+			country_db.convert_db(verbose_stream);
+			break;
+		}
+
+	case GEOIP_CITY_EDITION_REV1:
+		{
+			city_db_impl city_db(cmdline);
+			city_db.convert_db(verbose_stream);
+			break;
+		}
+	}
+
+	if (verbose_stream) {
+		*verbose_stream << program_invocation_name
+				<< ": All done" << std::endl;
+	}
+
+	return 0;
+}