#!/usr/bin/perl
#
# cfman v2.0: man page cross-referencer
# author: Tom Christiansen <tchrist@convex.com>
# date: 15 November 89
#
# usage: cfman [ -d debug-devel ] [ -s sub-sections ] 
#	       [ -p manpath ] [ -x xrefpath ] 

($iam = $0) =~ s%.*/%%;
 
$] =~ /(\d+\.\d+).*\nPatch level: (\d+)/;
die "$iam: requires at least perl version 3.0, patchlevel 1 to run correctly\n"
	if $1 < 3.0 || ($1 == 3.0 && $2 < 1);
    

require 'getopts.pl';

&Getopts('d:s:p:P:x:') || &usage;

$manpath = $opt_p if defined $opt_p;
$manpath = $opt_P if defined $opt_P;
$manpath = $ENV{'MANPATH'} unless $manpath;
$manpath = "/usr/man" unless $manpath;
@manpath = split(/:/,$manpath);

$opt_x =~ /^:/ && ( $opt_x = $manpath . $opt_x );
@xrefpath = $opt_x ? split(/:/,$opt_x) : @manpath;

$debug = $opt_d;

@sections = $opt_s ? split(/ */,$opt_s) : 1..8;

if ($debug) {
    $" = ':';
    print "manpath is @manpath\n";
    print "xrefpath is @xrefpath\n";
    $" = ' ';
} 

file:    foreach $file ( $#ARGV >= $[ ? @ARGV : '*.*' ) {
	     printf STDERR "considering %s\n", $file if $debug & 1;
	     $bingo = 0;
tree:        foreach $tree ( @manpath ) {
		 print "ROOT is $tree\n" if $debug;
		 if (!chdir $tree) {
		    warn "cannot chdir to $tree: $!";
		    next tree;
		 } 
		 $rootdir = $tree;
		 if ( $file =~ m#^/# ) {
		    &read_manpages($file); 
		    next file;
		 } 
section:         foreach $section ( @sections ) {
		    &scan_section($tree,$section,$file);
		 }
	     } 
	     print "no man pages matched \"$file\"\n" unless $bingo;
	  }


exit 0;

############################################################################
#
# scan_section()
#
#	checks a given man tree (like /usr/local/man) in a 
#	certain subsection (like '1'), checking for a certain
#	file, like 'tty' (which mean 'tty.*', 'system.3*', or '*.*'.
#
#	will recurse on a subsection name contaning a shell meta-character
#
############################################################################

sub scan_section {
    local ( $manroot, $subsec, $files ) = @_;
    local ( $mandir );

    $mandir = "man" . $subsec;


    # subsec may have been ? or *; if so, recurse!
    if ( &has_meta($mandir) ) {  
	for (<${mandir}>) {
	    if (&has_meta($_)) { 
		warn "bad glob of $mandir"; 
		last; 
	    } 
	    s/^man//;
	    &scan_section($manroot,$_,$files);
	} 
	return;
    } 

    $files = "$files.*" unless $files =~ /\./;

    if (!chdir $mandir) {
	warn "couldn't chdir to $mandir: $!\n" if $debug;
	return;
    } 

    printf STDERR "chdir to %s of %s\n", $mandir, $manroot if $debug & 1;

    &read_manpages ( &has_meta($files) ? &glob($files) : ($files));

    chdir('..');
} 

############################################################################
#
# read_manpages()
#
#	passed a list of filename, which are man pages.  opens each one
#	verifying that the file really is in the place that the .TH line.
#	skips to SEE ALSO section and then verifies existence of each 
#	referenced man page.
############################################################################


sub read_manpages {
    local (@pages) = @_;

    local ($junk, $sopage, $basename, $line, $page, $pname, $pext, $gotTH);
    local(%seen);


page:
    foreach $page ( @pages ) {
	next page if $page =~ /\.(BAK|OLD)$/i;

	if ($seen{$page}++) {
	    print "already saw $page\n" if $debug & 1;
	    next page;
	}

	if (!open page) {
	    warn "couldn't open $page: $!\n";
	    next page;
	}

	$bingo = 1; # global var

	print "checking $page\n" if $debug & 1;

	$gotTH = 0;
	$line = 0;
	$sopage = '';

line:   while (<page>) {
	    print if $debug & 16;
	    next line if /^'''/ || /^\.\\"/;

	    # deal with .so's on the first line.
	    # /usr/ucb/man uses this instead of links.
	    if (!($line++) && /^\.so\s+(.*)/) {
		$sopage = $1;
		print "$page -> $sopage\n" if $debug & 1;
		($basename = $sopage) =~ s%.*/%%;
		if ($seen{$basename}++) {
		    print "already saw $basename\n" if $debug & 1;
		    next page;
		} 
		if (!open(page,"../$sopage")) {
		    print "$page: cannot open $sopage: $!\n";
		    next page;
		} 
		$page = $basename;
		next line;
	    } 

	    # check for internally consistent .TH line
	    if ( /^\.(TH|SC)/ ) { # SC is for mh
		 $gotTH++;
		 printf STDERR "TH checking %s", $_ if $debug & 4;
		 do flush();
		 s/"+//g;
		 ($junk, $pname, $pext) = split;
		 if (&macro($pname)) {
			printf STDERR "%s: can't resolve troff macro in .TH: %s\n",
			    $page, $pname;
			next line;
		 } 
		 $pext =~ y/A-Z/a-z/;
		 $pname =~ s/\\-/-/g;
		 $pname =~ y/A-Z/a-z/ if $pname =~ /^[\$0-9A-Z_\055]+$/;
		 ($pexpr = $page) =~ s/([.+])/\\$1/g;
		 $pexpr =~ s%.*/%%;
		 if ( "$pname.$pext" !~ /^$pexpr$/i) {
		      printf "%s: thinks it's in %s(%s)\n", 
			  $page, $pname, $pext;
		 } 
		 next line;
	    }

	    next line unless /^\.S[Hh]\s+"*SEE ALSO"*/ 
		|| /^\.S[Hh]\s+REFERENCES/	# damn posix
		|| /^\.Sa\s*$/; 		# damn mh

	    # finally found the cross-references
xref:       while (<page>) {
		print if $debug & 16;
		last line if /^\.(S[Hh]|Co|Hi|Bu)/; # i really hate mh macros
		next xref unless /\(/;
		next xref if /^.PP/;
		chop;
		s/\\f[RIPB]//g;
		s/\\\|//g;
		s/\\-/-/g;
entry:          foreach $entry ( split(/,/) ) {
		    #print "got entry $entry\n";
		    next entry unless $entry =~ /\(.*\)/;
		    $pname = ''; $pext = '';
		    $1 = ''; $2 = '';
		    ($pname, $pext) = 
			($entry =~ /([A-Za-z0-9\$._\-]+)\s*\(([^)]+)\).*$/); 
		    if ($debug & 8) {
			printf STDERR "entry was %s, pname is %s, pext is %s\n",
			    $entry, $pname, $pext;
		    }     
		    if (&macro($pname)) {
			printf "%s: can't resolve troff macro in SEE ALSO: %s\n",
			    $page, $pname;
			next entry;
		    } 
		    next entry if !$pname || !$pext || $pext !~ /^\w+$/;
		    $pext =~ y/A-Z/a-z/;
		    $pname =~ y/A-Z/a-z/ if $pname =~ /^[A-Z_0-9\-]+$/;
		    #($psect = $pext) =~ s/^(.).*/$1/;
		    do check_xref($page,$pname,$pext);

		}	# entry: foreach $entry ( split(/,/) ) 
	    }		# xref:  while (<page>)
	}		# line:  while (<page>) 
	printf "%s: missing .TH\n", $page if (!$gotTH);
    }  			# page:  foreach $page ( @pages )
}     			# sub    read_manapages


###########################################################################
#
# check_xref()
#
#	given the name of the page we're looking for, check for a
#	cross reference of a given man page and its assumed subsection
#
###########################################################################

sub check_xref {
    local ($name, $target, $section) = @_;
    local ($basesec, $subsec, $newsec );

    printf STDERR " xref of %s(%s)\n", $target, $section if $debug & 2;

    return if &pathcheck($target,$section);


    # if we get this far, something's wrong, so begin notify
    printf "%s: %s(%s)", $name, $target, $section;

    ($basesec, $subsec) = ($section =~ /^(\d)(.*)$/);

    if ($name =~ /\.\d*([nlp])$/ && ($section == 1 || $section == 8)
	    && ($newsec = &pathcheck($target,$1))) { # hack for manl idiocy
	&really($target,$newsec);
	return;
    }

    # first check if page.Xn is really in page.X
    if ( $subsec && ($newsec = &pathcheck($target,$basesec))) {
	&really($target,$newsec);
	return;
    } 

    if ( $basesec == 1 && &pathcheck($target,8))  {
	&really($target,8);
	return;
    }

    if ( $basesec == 8 && &pathcheck($target,1))  {
	&really($target,1);
	return;
    }

    # maybe it thinks it's in 8 but got erroneously in 1
    if ( $basesec =~ /[18]/ && ($newsec = &pathcheck($target,'l')))  {
	&really($target,$newsec);
	return;
    } 

    # maybe page.X is really in page.Xn; this is expensive
    if ( !$subsec && ($newsec = &pathcheck($target,$basesec.'*'))) {
	&really($target,$newsec);
	return;
    } 

    printf " missing\n";
    do flush();
}

###########################################################################
#
# pathcheck()
#
#	takes a name (like 'tty') and a section (like '1d')
#	and looks for 'tty.1d' first in the current root, 
#	then in all other elements of @xrefpath.  the section
#	may have a meta-character in it (like '8*').
#
#	returns the subsection in which we found the page, or
#	null if we failed.
#
###########################################################################

sub pathcheck {
    local ( $name, $section ) = @_;
    local ( $basesec, $metasec, $fullpath, @expansion, $tree, %checked  ); 
    local ( $return ) = 0;

    $metasec = &has_meta($section);

    ($basesec) = ($section =~ /^(.)/);

    foreach $tree ( $rootdir, @xrefpath ) {
	next if !$tree || $checked{$tree}++;  # only check each tree once

	$fullpath = "$tree/man$basesec/$name.$section";  

	print "   testing $fullpath\n" if $debug & 8;

	if (!$metasec) {
	    if (-e $fullpath) {
		$return = $section;
	    }
	} else {
	    open(SAVERR, '>&STDERR');  # csh globbing brain damage
	    close STDERR;
	    if ((@expansion = <${fullpath}>) && !&has_meta($expansion[0])) {
	    			# redundant meta check due to sh brain-damage
		#for (@expansion) { s/.*\.//; } 
		#$section = join(' or ',@expansion);
		($section) = ($expansion[0] =~ /([^.]+)$/);
		$return = $section;
	    }
	    open(STDERR, '>&SAVERR');  # csh globbing brain damage
	    close SAVERR;
	}
    } 
    printf STDERR "   pathcheck returns $section\n" if $debug & 8;
    $return;
} 

#---------------------------------------------------------------------------

sub flush {
    $| = 1; 
    print ''; 
    $| = 0;
}

sub has_meta {
    $_[0] =~ /[[*?]/;
} 

sub macro {
    @_[0] =~ /^\\\*\(/;
} 

sub really {
    local($was,$is) = @_;
    print " really in $was($is)\n";
}

sub usage {
    die "usage: $iam [-d debug-level] [-s sub-sections] [-p manpath] 
    	[-x xrefpath] [pattern ...] \n";
}

sub glob {
    local($expr) = @_;
    local(@retlist) = ();
    local(*METADIR);				# paranoia

    die "glob: null expr" unless $expr;		# assert

    if ($expr =~ /\//) {
	warn "glob: \"$expr\" has slashes, punting...";
	return <${expr}>;
    } 

    $expr =~ s/\*/.*/g;
    $expr =~ s/\?/./g;

    unless (opendir(METADIR, '.')) {
	warn "glob: can't opendir ".": $!\n";
    } else {
	@retlist = sort grep(/$expr/o, grep(!/^\./, readdir(METADIR)));
	closedir METADIR;
    }
    return @retlist;
} 
