summaryrefslogtreecommitdiffstats
path: root/tool/stripccomments.c
diff options
context:
space:
mode:
Diffstat (limited to 'tool/stripccomments.c')
-rw-r--r--tool/stripccomments.c228
1 files changed, 228 insertions, 0 deletions
diff --git a/tool/stripccomments.c b/tool/stripccomments.c
new file mode 100644
index 0000000..53933c0
--- /dev/null
+++ b/tool/stripccomments.c
@@ -0,0 +1,228 @@
+/**
+ Strips C- and C++-style comments from stdin, sending the results to
+ stdout. It assumes that its input is legal C-like code, and does
+ only little error handling.
+
+ It treats string literals as anything starting and ending with
+ matching double OR single quotes OR backticks (for use with
+ scripting languages which use those). It assumes that a quote
+ character within a string which uses the same quote type is escaped
+ by a backslash. It should not be used on any code which might
+ contain C/C++ comments inside heredocs, and similar constructs, as
+ it will strip those out.
+
+ Usage: $0 [--keep-first|-k] < input > output
+
+ The --keep-first (-k) flag tells it to retain the first comment in the
+ input stream (which is often a license or attribution block). It
+ may be given repeatedly, each one incrementing the number of
+ retained comments by one.
+
+ License: Public Domain
+ Author: Stephan Beal (stephan@wanderinghorse.net)
+*/
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+
+#if 1
+#define MARKER(pfexp) \
+ do{ printf("MARKER: %s:%d:\t",__FILE__,__LINE__); \
+ printf pfexp; \
+ } while(0)
+#else
+#define MARKER(exp) if(0) printf
+#endif
+
+struct {
+ FILE * input;
+ FILE * output;
+ int rc;
+ int keepFirst;
+} App = {
+ 0/*input*/,
+ 0/*output*/,
+ 0/*rc*/,
+ 0/*keepFirst*/
+};
+
+void do_it_all(void){
+ enum states {
+ S_NONE = 0 /* not in comment */,
+ S_SLASH1 = 1 /* slash - possibly comment prefix */,
+ S_CPP = 2 /* in C++ comment */,
+ S_C = 3 /* in C comment */
+ };
+ int ch, prev = EOF;
+ FILE * out = App.output;
+ int const slash = '/';
+ int const star = '*';
+ int line = 1;
+ int col = 0;
+ enum states state = S_NONE /* current state */;
+ int elide = 0 /* true if currently eliding output */;
+ int state3Col = -99
+ /* huge kludge for odd corner case: */
+ /*/ <--- here. state3Col marks the source column in which a C-style
+ comment starts, so that it can tell if star-slash inside a
+ C-style comment is the end of the comment or is the weird corner
+ case marked at the start of _this_ comment block. */;
+ for( ; EOF != (ch = fgetc(App.input)); prev = ch,
+ ++col){
+ switch(state){
+ case S_NONE:
+ if('\''==ch || '"'==ch || '`'==ch){
+ /* Read string literal...
+ needed to properly catch comments in strings. */
+ int const quote = ch,
+ startLine = line, startCol = col;
+ int ch2, escaped = 0, endOfString = 0;
+ fputc(ch, out);
+ for( ++col; !endOfString && EOF != (ch2 = fgetc(App.input));
+ ++col ){
+ switch(ch2){
+ case '\\': escaped = !escaped;
+ break;
+ case '`':
+ case '\'':
+ case '"':
+ if(!escaped && quote == ch2) endOfString = 1;
+ escaped = 0;
+ break;
+ default:
+ escaped = 0;
+ break;
+ }
+ if('\n'==ch2){
+ ++line;
+ col = 0;
+ }
+ fputc(ch2, out);
+ }
+ if(EOF == ch2){
+ fprintf(stderr, "Unexpected EOF while reading %s literal "
+ "on line %d column %d.\n",
+ ('\''==ch) ? "char" : "string",
+ startLine, startCol);
+ App.rc = 1;
+ return;
+ }
+ break;
+ }
+ else if(slash == ch){
+ /* MARKER(("state 0 ==> 1 @ %d:%d\n", line, col)); */
+ state = S_SLASH1;
+ break;
+ }
+ fputc(ch, out);
+ break;
+ case S_SLASH1: /* 1 slash */
+ /* MARKER(("SLASH1 @ %d:%d App.keepFirst=%d\n",
+ line, col, App.keepFirst)); */
+ switch(ch){
+ case '*':
+ /* Enter C comment */
+ if(App.keepFirst>0){
+ elide = 0;
+ --App.keepFirst;
+ }else{
+ elide = 1;
+ }
+ /*MARKER(("state 1 ==> 3 @ %d:%d\n", line, col));*/
+ state = S_C;
+ state3Col = col-1;
+ if(!elide){
+ fputc(prev, out);
+ fputc(ch, out);
+ }
+ break;
+ case '/':
+ /* Enter C++ comment */
+ if(App.keepFirst>0){
+ elide = 0;
+ --App.keepFirst;
+ }else{
+ elide = 1;
+ }
+ /*MARKER(("state 1 ==> 2 @ %d:%d\n", line, col));*/
+ state = S_CPP;
+ if(!elide){
+ fputc(prev, out);
+ fputc(ch, out);
+ }
+ break;
+ default:
+ /* It wasn't a comment after all. */
+ state = S_NONE;
+ if(!elide){
+ fputc(prev, out);
+ fputc(ch, out);
+ }
+ }
+ break;
+ case S_CPP: /* C++ comment */
+ if('\n' == ch){
+ /* MARKER(("state 2 ==> 0 @ %d:%d\n", line, col)); */
+ state = S_NONE;
+ elide = 0;
+ }
+ if(!elide){
+ fputc(ch, out);
+ }
+ break;
+ case S_C: /* C comment */
+ if(!elide){
+ fputc(ch, out);
+ }
+ if(slash == ch){
+ if(star == prev){
+ /* MARKER(("state 3 ==> 0 @ %d:%d\n", line, col)); */
+ /* Corner case which breaks this: */
+ /*/ <-- slash there */
+ /* That shows up twice in a piece of 3rd-party
+ code i use. */
+ /* And thus state3Col was introduced :/ */
+ if(col!=state3Col+2){
+ state = S_NONE;
+ elide = 0;
+ state3Col = -99;
+ }
+ }
+ }
+ break;
+ default:
+ assert(!"impossible!");
+ break;
+ }
+ if('\n' == ch){
+ ++line;
+ col = 0;
+ state3Col = -99;
+ }
+ }
+}
+
+static void usage(char const *zAppName){
+ fprintf(stderr, "Strips C- and C++-style comments from stdin and sends "
+ "the results to stdout.\n");
+ fprintf(stderr, "Usage: %s [--keep-first|-k] < input > output\n", zAppName);
+}
+
+int main( int argc, char const * const * argv ){
+ int i;
+ for(i = 1; i < argc; ++i){
+ char const * zArg = argv[i];
+ while( '-'==*zArg ) ++zArg;
+ if( 0==strcmp(zArg,"k")
+ || 0==strcmp(zArg,"keep-first") ){
+ ++App.keepFirst;
+ }else{
+ usage(argv[0]);
+ return 1;
+ }
+ }
+ App.input = stdin;
+ App.output = stdout;
+ do_it_all();
+ return App.rc ? 1 : 0;
+}