diff options
Diffstat (limited to 'src/include/catalog/reformat_dat_file.pl')
-rwxr-xr-x | src/include/catalog/reformat_dat_file.pl | 312 |
1 files changed, 312 insertions, 0 deletions
diff --git a/src/include/catalog/reformat_dat_file.pl b/src/include/catalog/reformat_dat_file.pl new file mode 100755 index 0000000..1cadbfd --- /dev/null +++ b/src/include/catalog/reformat_dat_file.pl @@ -0,0 +1,312 @@ +#!/usr/bin/perl +#---------------------------------------------------------------------- +# +# reformat_dat_file.pl +# Perl script that reads in catalog data file(s) and writes out +# functionally equivalent file(s) in a standard format. +# +# In each entry of a reformatted file, metadata fields (if present) +# come first, with normal attributes starting on the following line, +# in the same order as the columns of the corresponding catalog. +# Comments and blank lines are preserved. +# +# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group +# Portions Copyright (c) 1994, Regents of the University of California +# +# src/include/catalog/reformat_dat_file.pl +# +#---------------------------------------------------------------------- + +use strict; +use warnings; + +use FindBin; +use Getopt::Long; + +# If you copy this script to somewhere other than src/include/catalog, +# you'll need to modify this "use lib" or provide a suitable -I switch. +use lib "$FindBin::RealBin/../../backend/catalog/"; +use Catalog; + +# Names of the metadata fields of a catalog entry. +# Note: oid is a normal column from a storage perspective, but it's more +# important than the rest, so it's listed first among the metadata fields. +# Note: line_number is also a metadata field, but we never write it out, +# so it's not listed here. +my @METADATA = + ('oid', 'oid_symbol', 'array_type_oid', 'descr', 'autogenerated'); + +# Process command line switches. +my $output_path = ''; +my $full_tuples = 0; + +GetOptions( + 'output=s' => \$output_path, + 'full-tuples' => \$full_tuples) || usage(); + +# Sanity check arguments. +die "No input files.\n" unless @ARGV; + +# Make sure output_path ends in a slash. +if ($output_path ne '' && substr($output_path, -1) ne '/') +{ + $output_path .= '/'; +} + +# Read all the input files into internal data structures. +# We pass data file names as arguments and then look for matching +# headers to parse the schema from. +my %catalogs; +my %catalog_data; +my @catnames; +foreach my $datfile (@ARGV) +{ + $datfile =~ /(.+)\.dat$/ + or die "Input files need to be data (.dat) files.\n"; + + my $header = "$1.h"; + die "There in no header file corresponding to $datfile" + if !-e $header; + + my $catalog = Catalog::ParseHeader($header); + my $catname = $catalog->{catname}; + my $schema = $catalog->{columns}; + + push @catnames, $catname; + $catalogs{$catname} = $catalog; + + $catalog_data{$catname} = Catalog::ParseData($datfile, $schema, 1); +} + +######################################################################## +# At this point, we have read all the data. If you are modifying this +# script for bulk editing, this is a good place to build lookup tables, +# if you need to. In the following example, the "next if !ref $row" +# check below is a hack to filter out non-hash objects. This is because +# we build the lookup tables from data that we read using the +# "preserve_formatting" parameter. +# +##Index access method lookup. +#my %amnames; +#foreach my $row (@{ $catalog_data{pg_am} }) +#{ +# next if !ref $row; +# $amnames{$row->{oid}} = $row->{amname}; +#} +######################################################################## + +# Write the data. +foreach my $catname (@catnames) +{ + my $catalog = $catalogs{$catname}; + my @attnames; + my $schema = $catalog->{columns}; + + foreach my $column (@$schema) + { + my $attname = $column->{name}; + + # We may have ordinary columns at the storage level that we still + # want to format as a special value. Exclude these from the column + # list so they are not written twice. + push @attnames, $attname + if !(grep { $_ eq $attname } @METADATA); + } + + # Write output files to specified directory. + my $datfile = "$output_path$catname.dat"; + open my $dat, '>', $datfile + or die "can't open $datfile: $!"; + + foreach my $data (@{ $catalog_data{$catname} }) + { + + # Hash ref representing a data entry. + if (ref $data eq 'HASH') + { + my %values = %$data; + + ############################################################ + # At this point we have the full tuple in memory as a hash + # and can do any operations we want. As written, it only + # removes default values, but this script can be adapted to + # do one-off bulk-editing. + ############################################################ + + if (!$full_tuples) + { + # If it's an autogenerated entry, drop it completely. + next if $values{autogenerated}; + # Else, just drop any default/computed fields. + strip_default_values(\%values, $schema, $catname); + } + + print $dat "{"; + + # Separate out metadata fields for readability. + my $metadata_str = format_hash(\%values, @METADATA); + if ($metadata_str) + { + print $dat $metadata_str; + + # User attributes start on next line. + print $dat ",\n "; + } + + my $data_str = format_hash(\%values, @attnames); + print $dat $data_str; + print $dat " },\n"; + } + + # Preserve blank lines. + elsif ($data =~ /^\s*$/) + { + print $dat "\n"; + } + + # Preserve comments or brackets that are on their own line. + elsif ($data =~ /^\s*(\[|\]|#.*?)\s*$/) + { + print $dat "$1\n"; + } + } + close $dat; +} + +# Remove column values for which there is a matching default, +# or if the value can be computed from other columns. +sub strip_default_values +{ + my ($row, $schema, $catname) = @_; + + # Delete values that match defaults. + foreach my $column (@$schema) + { + my $attname = $column->{name}; + + # It's okay if we have no oid value, since it will be assigned + # automatically before bootstrap. + die "strip_default_values: $catname.$attname undefined\n" + if !defined $row->{$attname} and $attname ne 'oid'; + + if (defined $column->{default} + and ($row->{$attname} eq $column->{default})) + { + delete $row->{$attname}; + } + } + + # Delete computed values. See AddDefaultValues() in Catalog.pm. + # Note: This must be done after deleting values matching defaults. + if ($catname eq 'pg_proc') + { + delete $row->{pronargs} if defined $row->{proargtypes}; + } + + # If a pg_type entry has an auto-generated array type, then its + # typarray field is a computed value too (see GenerateArrayTypes). + if ($catname eq 'pg_type') + { + delete $row->{typarray} if defined $row->{array_type_oid}; + } + + return; +} + +# Format the individual elements of a Perl hash into a valid string +# representation. We do this ourselves, rather than use native Perl +# facilities, so we can keep control over the exact formatting of the +# data files. +sub format_hash +{ + my $data = shift; + my @orig_attnames = @_; + + # Copy attname to new array if it has a value, so we can determine + # the last populated element. We do this because we may have default + # values or empty metadata fields. + my @attnames; + foreach my $orig_attname (@orig_attnames) + { + push @attnames, $orig_attname + if defined $data->{$orig_attname}; + } + + # When calling this function, we ether have an open-bracket or a + # leading space already. + my $char_count = 1; + + my $threshold; + my $hash_str = ''; + my $element_count = 0; + + foreach my $attname (@attnames) + { + $element_count++; + + # To limit the line to 80 chars, we need to account for the + # trailing characters. + if ($element_count == $#attnames + 1) + { + # Last element, so allow space for ' },' + $threshold = 77; + } + else + { + # Just need space for trailing comma + $threshold = 79; + } + + if ($element_count > 1) + { + $hash_str .= ','; + $char_count++; + } + + my $value = $data->{$attname}; + + # Escape single quotes. + $value =~ s/'/\\'/g; + + # Include a leading space in the key-value pair, since this will + # always go after either a comma or an additional padding space on + # the next line. + my $element = " $attname => '$value'"; + my $element_length = length($element); + + # If adding the element to the current line would expand the line + # beyond 80 chars, put it on the next line. We don't do this for + # the first element, since that would create a blank line. + if ($element_count > 1 and $char_count + $element_length > $threshold) + { + + # Put on next line with an additional space preceding. There + # are now two spaces in front of the key-value pair, lining + # it up with the line above it. + $hash_str .= "\n $element"; + $char_count = $element_length + 1; + } + else + { + $hash_str .= $element; + $char_count += $element_length; + } + } + return $hash_str; +} + +sub usage +{ + die <<EOM; +Usage: reformat_dat_file.pl [options] datafile... + +Options: + --output PATH output directory (default '.') + --full-tuples write out full tuples, including default values + +Non-option arguments are the names of input .dat files. +Updated files are written to the output directory, +possibly overwriting the input files. + +EOM +} |