#! /usr/bin/perl # Copyright (c) 2021-2022, PostgreSQL Global Development Group # # This script looks for symbols that are referenced in #ifdef or defined() # tests without having #include'd the file that defines them. Since this # situation won't necessarily lead to any compiler message, it seems worth # having an automated check for it. In particular, use this to audit the # results of pgrminclude! # # Usage: configure and build a PG source tree (non-VPATH), then start this # script at the top level. It's best to enable as many configure options # as you can, especially --enable-cassert which is known to affect include # requirements. NB: you MUST use gcc, unless you have another compiler that # can be persuaded to spit out the names of referenced include files. # # The results are necessarily platform-dependent, so use care in interpreting # them. We try to process all .c files, even those not intended for the # current platform, so there will be some phony failures. # # src/tools/pginclude/pgcheckdefines # use strict; use warnings; use Cwd; use File::Basename; my $topdir = cwd(); # Programs to use my $FIND = "find"; my $MAKE = "make"; # # Build arrays of all the .c and .h files in the tree # # We ignore .h files under src/include/port/, since only the one exposed as # src/include/port.h is interesting. (XXX Windows ports have additional # files there?) Ditto for .h files in src/backend/port/ subdirectories. # Including these .h files would clutter the list of define'd symbols and # cause a lot of false-positive results. # my (@cfiles, @hfiles); open my $pipe, '-|', "$FIND * -type f -name '*.c'" or die "can't fork: $!"; while (<$pipe>) { chomp; push @cfiles, $_; } close $pipe or die "$FIND failed: $!"; open $pipe, '-|', "$FIND * -type f -name '*.h'" or die "can't fork: $!"; while (<$pipe>) { chomp; push @hfiles, $_ unless m|^src/include/port/| || m|^src/backend/port/\w+/|; } close $pipe or die "$FIND failed: $!"; # # For each .h file, extract all the symbols it #define's, and add them to # a hash table. To cover the possibility of multiple .h files defining # the same symbol, we make each hash entry a hash of filenames. # my %defines; foreach my $hfile (@hfiles) { open my $fh, '<', $hfile or die "can't open $hfile: $!"; while (<$fh>) { if (m/^\s*#\s*define\s+(\w+)/) { $defines{$1}{$hfile} = 1; } } close $fh; } # # For each file (both .h and .c), run the compiler to get a list of what # files it #include's. Then extract all the symbols it tests for defined-ness, # and check each one against the previously built hashtable. # foreach my $file (@hfiles, @cfiles) { my ($fname, $fpath) = fileparse($file); chdir $fpath or die "can't chdir to $fpath: $!"; # # Ask 'make' to parse the makefile so we can get the correct flags to # use. CPPFLAGS in particular varies for each subdirectory. If we are # processing a .h file, we might be in a subdirectory that has no # Makefile, in which case we have to fake it. Note that there seems # no easy way to prevent make from recursing into subdirectories and # hence printing multiple definitions --- we keep the last one, which # should come from the current Makefile. # my $MAKECMD; if (-f "Makefile" || -f "GNUmakefile") { $MAKECMD = "$MAKE -qp"; } else { my $subdir = $fpath; chop $subdir; my $top_builddir = ".."; my $tmp = $fpath; while (($tmp = dirname($tmp)) ne '.') { $top_builddir = $top_builddir . "/.."; } $MAKECMD = "$MAKE -qp 'subdir=$subdir' 'top_builddir=$top_builddir' -f '$top_builddir/src/Makefile.global'"; } my ($CPPFLAGS, $CFLAGS, $CFLAGS_SL, $PTHREAD_CFLAGS, $CC); open $pipe, '-|', "$MAKECMD" or die "can't fork: $!"; while (<$pipe>) { if (m/^CPPFLAGS :?= (.*)/) { $CPPFLAGS = $1; } elsif (m/^CFLAGS :?= (.*)/) { $CFLAGS = $1; } elsif (m/^CFLAGS_SL :?= (.*)/) { $CFLAGS_SL = $1; } elsif (m/^PTHREAD_CFLAGS :?= (.*)/) { $PTHREAD_CFLAGS = $1; } elsif (m/^CC :?= (.*)/) { $CC = $1; } } # If make exits with status 1, it's not an error, it just means make # thinks some files may not be up-to-date. Only complain on status 2. close PIPE; die "$MAKE failed in $fpath\n" if $? != 0 && $? != 256; # Expand out stuff that might be referenced in CFLAGS $CFLAGS =~ s/\$\(CFLAGS_SL\)/$CFLAGS_SL/; $CFLAGS =~ s/\$\(PTHREAD_CFLAGS\)/$PTHREAD_CFLAGS/; # # Run the compiler (which had better be gcc) to get the inclusions. # "gcc -H" reports inclusions on stderr as "... filename" where the # number of dots varies according to nesting depth. # my @includes = (); my $COMPILE = "$CC $CPPFLAGS $CFLAGS -H -E $fname"; open $pipe, '-|', "$COMPILE 2>&1 >/dev/null" or die "can't fork: $!"; while (<$pipe>) { if (m/^\.+ (.*)/) { my $include = $1; # Ignore system headers (absolute paths); but complain if a # .c file includes a system header before any PG header. if ($include =~ m|^/|) { warn "$file includes $include before any Postgres inclusion\n" if $#includes == -1 && $file =~ m/\.c$/; next; } # Strip any "./" (assume this appears only at front) $include =~ s|^\./||; # Make path relative to top of tree my $ipath = $fpath; while ($include =~ s|^\.\./||) { $ipath = dirname($ipath) . "/"; } $ipath =~ s|^\./||; push @includes, $ipath . $include; } else { warn "$CC: $_"; } } # The compiler might fail, particularly if we are checking a file that's # not supposed to be compiled at all on the current platform, so don't # quit on nonzero status. close PIPE or warn "$COMPILE failed in $fpath\n"; # # Scan the file to find #ifdef, #ifndef, and #if defined() constructs # We assume #ifdef isn't continued across lines, and that defined(foo) # isn't split across lines either # open my $fh, '<', $fname or die "can't open $file: $!"; my $inif = 0; while (<$fh>) { my $line = $_; if ($line =~ m/^\s*#\s*ifdef\s+(\w+)/) { checkit($file, $1, @includes); } if ($line =~ m/^\s*#\s*ifndef\s+(\w+)/) { checkit($file, $1, @includes); } if ($line =~ m/^\s*#\s*if\s+/) { $inif = 1; } if ($inif) { while ($line =~ s/\bdefined(\s+|\s*\(\s*)(\w+)//) { checkit($file, $2, @includes); } if (!($line =~ m/\\$/)) { $inif = 0; } } } close $fh; chdir $topdir or die "can't chdir to $topdir: $!"; } exit 0; # Check an is-defined reference sub checkit { my ($file, $symbol, @includes) = @_; # Ignore if symbol isn't defined in any PG include files if (!defined $defines{$symbol}) { return; } # # Try to match source(s) of symbol to the inclusions of the current file # (including itself). We consider it OK if any one matches. # # Note: these tests aren't bulletproof; in theory the inclusion might # occur after the use of the symbol. Given our normal file layout, # however, the risk is minimal. # foreach my $deffile (keys %{ $defines{$symbol} }) { return if $deffile eq $file; foreach my $reffile (@includes) { return if $deffile eq $reffile; } } # # If current file is a .h file, it's OK for it to assume that one of the # base headers (postgres.h or postgres_fe.h) has been included. # if ($file =~ m/\.h$/) { foreach my $deffile (keys %{ $defines{$symbol} }) { return if $deffile eq 'src/include/c.h'; return if $deffile eq 'src/include/postgres.h'; return if $deffile eq 'src/include/postgres_fe.h'; return if $deffile eq 'src/include/pg_config.h'; return if $deffile eq 'src/include/pg_config_manual.h'; } } # my @places = keys %{ $defines{$symbol} }; print "$file references $symbol, defined in @places\n"; # print "includes: @includes\n"; return; }