summaryrefslogtreecommitdiffstats
path: root/src/tools/ceph-diff-sorted.cc
blob: f8e4c28e64789ecc5f0a50205dc4d293a4aec291 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab

/*
 * diffsorted -- a utility to compute a line-by-line diff on two
 * sorted input files
 *
 * Copyright © 2019 Red Hat
 *
 * Author: J. Eric Ivancich
 *
 * This is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License version 2.1, as published by the Free Software
 * Foundation.
 */


/*
 * SUMMARY
 *
 * The `diffsorted` utility does a line-by-line diff on two sorted text
 * files and indicating lines that are in one file but not the other
 * using diff-style notation (although line numbers are not indicated).
 *
 * USAGE
 *
 *     rgw-diff-sorted file1.txt file2.txt
 *
 * NOTES
 *
 * Each files should have its lines in sorted order and should have no
 * empty lines.
 *
 * A potential input file can be sorted using the `sort` utility provided
 * that LANG=C to insure byte lexical order. For example:
 *
 *     LANG=C sort unsorted.txt >sorted.txt
 *
 * or:
 *
 *     export LANG=C
 *     sort unsorted.txt >sorted.txt
 *
 * EXIT STATUS
 *
 *     0 : files same
 *     1 : files different
 *     2 : usage problem (e.g., wrong number of command-line arguments)
 *     3 : problem opening input file
 *     4 : bad file content (e.g., unsorted order or empty lines)
 */


#include <iostream>
#include <fstream>


struct FileOfLines {
  const char* filename;
  std::ifstream input;
  std::string this_line, prev_line;
  bool next_eof;
  bool is_eof;

  FileOfLines(const char* _filename) :
    filename(_filename),
    input(filename),
    next_eof(false),
    is_eof(false)
  { }

  void dump(const std::string& prefix) {
    do {
      std::cout << prefix << this_line << std::endl;
      advance();
    } while (!eof());
  }

  bool eof() const {
    return is_eof;
  }

  bool good() const {
    return input.good();
  }

  void advance() {
    if (next_eof) {
      is_eof = true;
      return;
    }

    prev_line = this_line;
    std::getline(input, this_line);
    if (this_line.empty()) {
      if (!input.eof()) {
	std::cerr << "Error: " << filename << " has an empty line." <<
	  std::endl;
	exit(4);
      }
      is_eof = true;
      return;
    } else if (input.eof()) {
      next_eof = true;
    }

    if (this_line < prev_line) {
      std::cerr << "Error: " << filename << " is not in sorted order; \"" <<
	this_line << "\" follows \"" << prev_line << "\"." << std::endl;
      exit(4);
    }
  }

  const std::string line() const {
    return this_line;
  }
};

int main(int argc, const char* argv[]) {
  if (argc != 3) {
    std::cerr << "Usage: " << argv[0] << " <file1> <file2>" << std::endl;
    exit(2);
  }

  FileOfLines input1(argv[1]);
  if (!input1.good()) {
    std::cerr << "Error opening " << argv[1] <<
      "." << std::endl;
    exit(3);
  }

  FileOfLines input2(argv[2]);
  if (!input2.good()) {
    std::cerr << "Error opening " << argv[2] <<
      "." << std::endl;
    exit(3);
  }

  bool files_same = true;

  input1.advance();
  input2.advance();

  while (!input1.eof() && !input2.eof()) {
    if (input1.line() == input2.line()) {
      input1.advance();
      input2.advance();
    } else if (input1.line() < input2.line()) {
      files_same = false;
      std::cout << "< " << input1.line() << std::endl;
      input1.advance();
    } else {
      files_same = false;
      std::cout << "> " << input2.line() << std::endl;
      input2.advance();
    }
  }

  if (!input1.eof()) {
    files_same = false;
    input1.dump("< ");
  } else if (!input2.eof()) {
    files_same = false;
    input2.dump("> ");
  }

  if (files_same) {
    exit(0);
  } else {
    exit(1);
  }
}