#!/usr/bin/env perl # (C) 2011 by Argonne National Laboratory. # See COPYRIGHT in top-level directory. # This script scavenges the MPICH source tree looking for "subconfigure.m4" # files. It then analyzes the dependencies between them and emits a # "subsys_include.m4" file that "m4_include"s them all in the correct order and # writes an autoconf macro that is suitable for expanding subsys macros in the # correct order. # # Finding a "dnl MPICH_SUBCFG_BEFORE=BAR" statement inside of # FOO/subconfigure.m4 means that FOO depends on BAR and that FOO's macros should # be emitted *before* BAR's macros so that FOO can influence BAR's # configuration. # # Finding a "dnl MPICH_SUBCFG_AFTER=QUUX" statement inside of # BAZ/subconfigure.m4 means that BAZ uses QUUX and that BAZ's macros be emitted # *after* QUUX's macros so that BAZ can utilize information exported by QUUX's # configuration macros. # # We have both forms of macros because some subsystems know their consumers # explicitly and some packages only know what they consume explicitly, and some # packages are a blend. For example, ch3 depends on the mpid/common/sched code # in order to support NBC ops so a BEFORE statement is used in the # ch3/subconfigure.m4 in order to "enable_mpid_common_sched=yes". The nemesis # netmods all depend on the core nemesis configuration happening first, so they # use an AFTER statement in each of their subconfigure.m4 files. ################################################################################ use strict; use warnings; use Getopt::Long; use Data::Dumper; ################################################################################ # Default global settings and constants # unlikely to change, since this name is assumed in lots of other pieces of the # build system my $SUBCFG_NAME = "subconfigure.m4"; my $OUTPUT_FILE = "subsys_include.m4"; # if true, add a dependency (ancestor-->child) when $SUBCFG_NAME files are # encountered and they have an ancestor $SUBCFG_NAME file in an enclosing # directory my $USE_IMPLICIT_EDGES = 1; # the existence of this file means we should stop recursing down # the enclosing directory tree my $stop_sentinel = ".no_subcfg_recursion"; # which directories should be recursively searched for "subconfigure.m4" files my @root_dirs = qw( src ); # coloring constants for the graph algorithms later on my ($WHITE, $GRAY, $BLACK) = (1, 2, 3); ################################################################################ # Option processing: here's a great place to permit overriding the default # global settings from above if we ever need to in the future. my $do_bist = 0; GetOptions( "--help" => \&print_usage, "--bist" => \$do_bist, ) or die "unable to process options, stopped"; sub print_usage { print <B indicates that # A should come before B in a topo sort). We concretely express this DAG as an # adjacency list stored in a hash. Keys of the hash are filenames, values are # refs to hashes whose keys are outbound edge filenames. # IOW: # ( a => {b=>1,c=>1}, b => {c=>1}, c => {} ) # represents the following (crudely drawn) graph: # a-->b-->c # \------^ my %dag = (); # Helper routine, adds a new edge to the given dag hash (passed by reference), # automatically creating src or dst vertices as necessary. A cute bit of calling # syntax is that the src and dst args can be separated by "=>" because it's just # a fancy comma in perl. sub add_edge { my $dag_ref = shift; my $src = shift; my $dst = shift; die "\$dag_ref is invalid, stopped" unless ref($dag_ref) eq "HASH"; die "\$src is invalid, stopped" unless $src; die "\$dst is invalid, stopped" unless $dst; $dag_ref->{$src} = {} unless exists $dag_ref->{$src}; $dag_ref->{$src}->{$dst} = 1; } foreach my $k (keys %found_files) { # add the vertex to the graph with no edges to start $dag{$k} = {} unless exists $dag{$k}; my $anc = $found_files{$k}; if ($anc and $USE_IMPLICIT_EDGES) { # need to add the implicit edge from the ancestor to $k add_edge(\%dag, $anc => $k); } # now process the file and add any explicit edges open FILE, '<', $k; while (my $line = ) { if ($line =~ m/^\s*dnl +MPICH_SUBCFG_([A-Z_]*)=(.*)\s*$/) { my $bef_aft = $1; my $arg = $2; # users can set GEN_SUBCFG_NO_ERROR=1 in the environment to prevent # this script from complaining about missing '/' chars if ($arg !~ m|/| and not $ENV{GEN_SUBCFG_NO_ERROR}) { print "ERROR: no '/' characters detected in '$arg', possible old-style structured comment still present\n"; exit 1; } # normalize the $arg to match our DAG $arg .= "/$SUBCFG_NAME"; if ($bef_aft eq "BEFORE") { add_edge(\%dag, $k => $arg); } elsif ($bef_aft eq "AFTER") { add_edge(\%dag, $arg => $k); } else { die "unrecognized structured comment ('MPICH_SUBCFG_${bef_aft}')\n". "at $k:$., possible typo? Stopped"; } } } close FILE; } ################################################################################ # We now have a DAG expressing the dependency information between the various # subconfigure.m4 files. Now we need to topologically sort it. # # We use the topo sort algorithm given in "Introduction to Algorithms" (1st # ed.), page 486 with a small modification to detect cycles in the digraph. We # perform a DFS on the DAG, coloring vertices as we go. We could compute # discovery and finishing times, as well as predecessors, but we don't need that # information for topological sorting or cycle detection. As each vertex is # finished (colored BLACK) we prepend it to an array. The resulting array is # sorted in "ascending" topological order (a,b,c in our previous example). # the output array in which the sorted results will be stored my @tsorted = (); topo_sort(\%dag, \@tsorted); ################################################################################ # Now just emit the $OUTPUT_FILE in the correct format. open OUTFD, '>', $OUTPUT_FILE; my $datestamp = scalar(localtime); print OUTFD <{$u} = $GRAY; foreach my $v (keys %{$dag_ref->{$u}}) { # detect cycles in the graph at this point, see ("Classification of edges" # in CLR) if ((exists $colors_ref->{$v}) && $colors_ref->{$v} == $GRAY) { # We are already exploring the tree from $v, so this is a "back edge", # indicating a cycle is present in the digraph. This is erroneous in # our usage, since we cannot topologically sort a cyclic graph. die "A back edge was found in the digraph but a DAG is required.\n". "The back edge was from\n". " $u\n". "to\n". " $v\n". "Stopped"; } elsif ((exists $colors_ref->{$v}) && $colors_ref->{$v} == $WHITE) { dfs_visit($dag_ref, $colors_ref, $v, $out_arr_ref); } } $colors_ref->{$u} = $BLACK; # append $u to the output unshift @$out_arr_ref, $u; } # Takes two arguments, a ref to a DAG hash and a reference to an output array. # Returns in the output array a valid topological sort of the given DAG. sub topo_sort { my $dag_ref = shift; my $out_arr_ref = shift; # helper hash that is indexed by vertex name in order to avoid building a # complicated set of nested structures inside the main DAG my $colors_ref = {}; # values are one of $WHITE, $GRAY, or $BLACK # a simplified version of the DFS(G) algorithm foreach my $u (keys %$dag_ref) { $colors_ref->{$u} = $WHITE; } foreach my $u (keys %$dag_ref) { if ($colors_ref->{$u} == $WHITE) { dfs_visit($dag_ref, $colors_ref, $u, $out_arr_ref); } } } ################################################################################ # self tests # run this subroutine to self-test portions of this script sub bist { bist_topo_sort(); print "all self-tests PASSED\n"; exit 0; } sub bist_topo_sort { my $dag; my $out_arr; my $expected; $dag = { a => {b=>1,c=>1}, b => {c=>1}, c => {} }; $out_arr = []; $expected = [ qw(a b c) ]; topo_sort($dag, $out_arr); cmp_arrays($out_arr, $expected); $dag = { a => {}, b => {}, c => {} }; $out_arr = []; topo_sort($dag, $out_arr); # this DAG has no single expected result, so just check lengths unless (scalar @$out_arr eq scalar @$expected) { die "\$out_arr and \$expected differ in length, stopped\n"; } my $routine = (caller(0))[3]; print "$routine PASSED\n"; } sub cmp_arrays { my $out_arr = shift; my $expected = shift; #print "out_arr=".Dumper($out_arr)."\n"; #print "expected=".Dumper($expected)."\n"; unless (scalar @$out_arr eq scalar @$expected) { die "\$out_arr and \$expected differ in length, stopped\n"; } for (my $i = 0; $i < @$out_arr; ++$i) { unless ($out_arr->[$i] eq $expected->[$i]) { die "element $i of \$out_arr differs from the expected value (". $out_arr->[$i]." ne ".$expected->[$i]."), stopped\n"; } } }