#!/usr/bin/perl
#
# makewhatis: perl rewrite for makewhatis
# author: tom christiansen <tchrist@convex.com>
#
# Copyright 1990 Convex Computer Corporation.
# All rights reserved.

eval "exec /usr/bin/perl -S $0 $*"    # some bozo called us with 'sh foo'
    if $running_under_some_shell;     #   'catman -w' likes to do this; sigh


&source('stat.pl');

($program = $0) =~ s,.*/,,;

$GUNZIP = "gunzip";

$MAXWHATISLEN =  300;   
$MAXDATUM     = 1024; 	# DBM is such a pain

umask 022;

&source('getopts.pl');

do Getopts('ynvdP:M:') || &usage;

$opt_P = shift if $#ARGV >= 0;

&usage if $#ARGV > -1;

sub usage { die "usage: $program [-n] [-y] [-v] [[-M] manpath]\n"; } 

$nflag = $opt_n;
$yflag = $opt_y;

$manpath = $opt_M if $opt_M;
$manpath = $opt_P if $opt_P;		# backwards contemptibility
$manpath = "/usr/man" unless $manpath;
@manpath = split(/:/,$manpath);

$| = $debug = ($opt_d || $opt_v);

$SIG{'INT'}  = 'CLEANUP';
$SIG{'TERM'} = 'CLEANUP';

$SIG{'HUP'}  = 'IGNORE';

# originally was:  chop($cwd = `pwd`);
    chop($cwd = $ENV{'PWD'});

$WHATIS = "whatis";

# ---------------------------------------------------------------------------
# main loop
#
# chdir to each root in man path.  save mtime of dbase for later compares
# with files in case of nflag or yflag.   
# ---------------------------------------------------------------------------

$| = 1;

foreach $root ( @manpath ) {
    local($dbtime, $filecount, $entries);

    $root = "$cwd/$root" if $root !~ m:^/:;  # normalize to fullpathname
    chdir $root || (warn "can't chdir to $root: $!", next);

    print "$program: processing man tree $root...\n";

    if ($nflag || $yflag) { 
	unless (&Stat('whatis.pag')) {
	    print "couldn't stat $root/whatis DBM file\n" if $debug;
	    &rebuild(0, 0) if $yflag;
	    next;
	}
	$dbtime = $st_mtime;
    }
    &rebuild($nflag, $yflag);
}

exit $status;

# ---------------------------------------------------------------------------
# rebuild -- open a new whatis database, store all references in files in 
# 	     this root to it.  if dont_touch or test_stale parms set, just
#	     do the checks.  if test_stale, recurse on a real rebuild.
# ---------------------------------------------------------------------------

sub rebuild {
    local($dont_touch, $test_stale) = @_;

    local(%seen);		# {dev,ino} pairs of files seen
    local(%so);			# the .so references seen
    local(@WHATIS);		# whatis list
    local($entries, $filecount) = (0,0);

    unless ($dont_touch || $test_stale) {
	if (!open (WHATIS, "> $WHATIS.$$")) {
	    warn "can't open $root/$WHATIS.$$: $!\n";
	    $status = 1;
	    return;;
	}
	if (!dbmopen(WHATIS, "$WHATIS.$$", 0644)) {
	    warn "can't dbmopen $root/$WHATIS: $!\n";
	    $status = 1;
	    return;
	}
    }

    foreach $mandir ( <man?*> ) {
	next if $mandir =~ /man0.*/;
	next if $mandir =~ /\.(old|bak)$/i;
	next if $mandir =~ /~$/;
	next unless -d $mandir;

	if (!chdir $mandir) {
	    warn "can't chdir to $root/$mandir: $!\n";
	    next;
	}

	($dirext) = $mandir =~ /man(.*)$/;
	$dirext =~ s/\.z$//;

	print "subdir is $mandir\n" if $debug;

	if (!opendir(mandir,'.')) {
	    warn "can't opendir('$root/$mandir'): $!\n";
	    next;
	}

	# read each file in directory.  use readdir not globbing
	# because we don't want to blow up on huge directories
FILE:	while ($FILE = readdir(mandir)) {
	    $gziped = $mandir =~ m:.*\.z:;
	    next FILE if $FILE =~ /^\.{1,2}/;

	    if ($FILE !~ /\S\.[^z\s]/) {
		print STDERR "Skipping non-man file: $root/$mandir/$FILE\n";
		next FILE;
	    } 

	    # this will be optimized into a case statement
	    if      ($FILE =~ /\.old(\.z)?$/i) {
		next;
	    } elsif ($FILE =~ /\.bak(\.z)?$/i) {
		next;
	    } elsif ($FILE =~ /\.out(\.z)?$/i) {
		next;
	    } elsif ($FILE =~ /~(\.z)?$/) {
		next;
	    }

	    ($tmpfile = $FILE) =~ s/\.z$//;

	    ($filenam, $filext) = 
		$tmpfile =~ /^(\S+)\.([^.]+)$/;

	    #if ($filext eq '.z') {
		#($filenam, $filext) = $filenam =~ /^(\S+)\.([^.]+)(\.z)?$/;
	    #}

	    if ($filext !~ /^${dirext}.*/ && $mandir ne 'mano') {
		print STDERR "$FILE has a funny extension to be in $root/$mandir\n";
	    }

	    unless (&Stat($FILE)) {
		warn "can't stat $root/$mandir/$FILE: $!\n";
		next FILE;
	    } 

	    if ($dont_touch || $test_stale) {
		next unless $st_mtime > $dbtime;
		print "$root/$mandir/$FILE newer than its dbm whatis file\n";
		closedir mandir;
		chdir $root;
		&rebuild(0,0) if $test_stale;
		return;
	    }

	    if ($apage = $seen{$st_dev,$st_ino}) {
		printf "already saw %s, linked to %s\n", $FILE, $apage
		    if $debug;
		&chopext($page = $FILE);
		unless ($WHATIS{$page}) {
		    print "forgot $page\n" if $debug;
		    $apage =~ s/\.z$//;
		    &store_indirect($page, $apage);
		}
		next FILE;
	    } 
	    $seen{$st_dev,$st_ino} = $FILE;

	    $gziped |= $FILE =~ /\.z$/;
	    
	    if (!open(FILE, $gziped ? "$GUNZIP < $FILE |" : $FILE)) {
		warn "can't open $FILE: $!\n";
		next FILE; 
	    }

	    $filecount++;
	    print "opened $root/$mandir/$FILE\n" if $debug;

	    &extract_names;
	} 
	closedir mandir;
	chdir $root || die "can't chdir back to $root: $!";
    } 

    unless ($dont_touch || $test_stale) {
	$, = "\n";
	print WHATIS (sort @WHATIS),'';
	$, = '';
	close WHATIS || warn "can't close $WHATIS.$$: $!";
	rename ("$WHATIS.$$", $WHATIS) 
	    || warn "can't rename $WHATIS.$$ to $WHATIS: $!";
	&check_sos();
	dbmclose(WHATIS) || warn  "can't dbmclose $WHATIS: $!";
	for $ext ( 'pag', 'dir' ) {
	    unlink "$WHATIS.$ext"; 
	    rename("$WHATIS.$$.$ext", "$WHATIS.$ext")
		|| warn "can't rename $WHATIS.$$.$ext:  $!";
	} 
	print "$program: $root: found $entries entries in $filecount files\n";
    } 
} 


# in case we get interrupted
#
sub CLEANUP {
    print stderr "<<INTERRUPTED>> reading $FILE\n";
    chdir $root;
    unlink "$WHATIS.$$", "$WHATIS.$$.pag", "$WHATIS.$$.dir";
    exit 1;
} 

# get next line from FILE, honoring escaped newlines
#
sub getline {
    local ($_);

    $_ = <FILE>;
    {
        chop;
        if (/\\$/) {
            chop;
            $_ .= ' ';
            $_ .= <FILE>;
            redo;
        }
    }
    $_;
}

sub extract_names {
    local($_);
    local($needcmdlist) = 0;
    local($foundname) = 0;
    local(@lines);
    local($page, $page2, $indirect, $foundname, @lines, $nameline);
    local($cmdlist, $ocmdlist, $tmpfile, $section);
    local($prototype, $seenpage);

# _FIND_OUT_WHY_THIS_IS_BAD_!!! -JimC
#    unless (-T FILE) {
#	print STDERR "$FILE: not a text file\n";
#	next;
#    } 


    $_ = <FILE>; 	#   first check for leading .so reference
    if (/^\.so\s+(man.+\/\S+)/) {
	local($indirect, $indirect2);
	$indirect = $1;
	($page)  = $FILE     =~ m:([^.]+)\.[^.]*$:;
	($page2) = $indirect =~ m:.*/([^/]+)$:;
	($indirect2 = $indirect) =~ s!/!.z/!;
	if (-e "../$indirect" || -e "../$indirect.z" || -e $indirect2) {
	    $so{$page} = $page2;
	    print "$FILE: .so alias for $indirect\n" if $debug;
	} else {
	    print STDERR "$FILE .so references non-existent $indirect\n";
	}
	return;
    } else {
	/^\.TH\s+(\S*)\s+(\S+)/ && &doTH($1, $2);
	/^\.Dt\s+(\S*)\s+(\S+)/ && &doDt($1, $2);
    } 

LINE: while (<FILE>) {
	/^\.TH\s+(\S*)\s+(\S+)/ && &doTH($1, $2);
	/^\.Dt\s+(\S*)\s+(\S+)/ && &doDt($1, $2);
	next LINE unless /^\.S[Hh]\s+"?NAME"?/i || /^\.NA\s?/;
	$foundname = 1;
	@lines = ();
	$nameline = '';
NAME:	while ($_ = &getline()) {
	    last NAME if /^\.(S[hHYS])\s?/;  # MH support
	    if ( $_ eq '.br' ) {
		push(@lines, $nameline) if $nameline;
		$nameline = '';
		next NAME;
	    } 
		else {
		s/^\.Nm\s+// || s/^\.Nd\s+/- /;
	    }
	    s/^\.[IB]\b//;	# Kill Bold and Italics
	    next if /^\./;
	    $nameline .= ' ' if $nameline;
	    $nameline .= $_;
	} 

	push(@lines, $nameline);

	for ( @lines ) {
	    next unless ord;
	    s/\\f([PBIR0-4]|\(..)//g;	# kill font changes
	    s/\\s[+-]?\d+//g;		# kill point changes
	    s/\\&//g;			# and \&
	    s/\\\((ru|ul)/_/g;		# xlate to '_'
	    s/\\\((mi|hy|em)/-/g;	# xlate to '-'
	    s/\\\*\(..//g  &&		# no troff strings
		print STDERR "trimmed troff string macro in NAME section of $FILE\n";
	    s/\\//g;		   	# kill all remaining backslashes 
	    s/^\.\\"\s*//;		# comments
	    if (!/\s*-+\s+/) {
		#        ^ otherwise L-devices would be L
		printf STDERR "$FILE: no separated dash in \"%s\"\n", $_;
		$needcmdlist = 1;   	# forgive their braindamage
		s/.*-//;
		$desc = $_;
	    } else {
		($cmdlist, $desc) = ( $`, $' );
		$cmdlist =~ s/^\s+//;
	    }

	    # need this for two reasons: sprintf might blow up and so 
	    # might the dbm store due to 1k limit
	    #
	    $ocmdlist = $cmdlist;  # before truncation
	    if (length($cmdlist) > $MAXWHATISLEN) {
		printf STDERR "$FILE: truncating cmdlist from %d to %d bytes for DBM's sake\n",
			length($cmdlist), $MAXWHATISLEN;
		$cmdlist = substr($cmdlist,0,$MAXWHATISLEN) . "...";
	    } 

	    ($tmpfile = $FILE) =~ s/\.z$//;
	    ($page, $section) = $tmpfile =~ /^(\S+)\.(\S+)$/;
	    $cmdlist = $page if $needcmdlist; 

	    $prototype = ''; $seenpage = 0;

	    foreach $cmd (split(/\s*,\s*/,$ocmdlist)) {
		next unless $cmd;
		$seenpage |= ($cmd eq $page);
		if (! $prototype) {
		    &store_direct($cmd, $cmdlist, $tmpfile, $dirext, $desc);
		    $prototype = $cmd;
		} else {
		    &store_indirect($cmd, "$prototype.$filext");
		} 
	    } 
	    unless ($seenpage) {
		print "$FILE: forgot my own name!\n" if $debug;
		if ($prototype) {
		    &store_indirect($page, "$prototype.$filext");
		} else {
		    &store_direct($page, $page, $FILE, $dirext, '');
		    #&store_direct($page, $page, $FILE, $dirext, $desc);
		}
	    }
	}
    }  
    unless ($foundname) {
	print STDERR "$FILE: no NAME lines, so has no whatis description!\n";
	($tmpfile = $FILE) =~ s/\.z$//;
	($page, $section) = $tmpfile =~ /^(\S+)\.(\S+)$/;
	&store_direct($page, $page, $tmpfile, $dirext, 'NO DESCRIPTION');
    } 
}

# --------------------------------------------------------------------------
sub source {
    local($file) = @_;
    local($return) = 0;


    $return = do $file;
    die  "couldn't parse \"$file\": $@" if $@;
    die  "couldn't do \"$file\": $!" unless defined $return;
    warn "couldn't run \"$file\"" unless $return;
}


sub chopext {
    $_[0] =~ s/\.z$//;
    $_[0] =~ s/\.[^.]+$//;
} 

sub check_sos {
    local($key);

    foreach $key (keys %so) {
	unless (defined $WHATIS{$key}) {
	    printf STDERR 
		"%s was a .so alias for %s, but %s's NAME section doesn't know it!\n",
		$key, $so{$key}, $so{$key};
	    &store_indirect($key, $so{$key});
	} 
    } 
} 

sub store_direct {
    local($cmd, $list, $page, $section, $desc) = @_;
    local($datum);

    push(@WHATIS,sprintf("%-20s - %s", "$list ($filext)", $desc));

    $datum = join("\001", $list, $page, $section, $desc);

    if (defined $WHATIS{$cmd}) {
	if (length($WHATIS{$cmd}) + length($datum) + 1 > $MAXDATUM) {
	    print STDERR "can't store $page -- would break DBM\n";
	    return;
	} 
	$WHATIS{$cmd} .= "\002";
    } 

    print "storing $cmd\n" if $debug;
    $WHATIS{$cmd} .= $datum;
    $entries++;
} 

sub store_indirect {
    local($indirect, $real) = @_;

    print "storing $indirect as reference to $real\n"
	if $debug;

    $WHATIS{$indirect} .= "\002" if $WHATIS{$indirect};
    $WHATIS{$indirect} .= $real;
    $entries++;
} 

sub doTH {
    local($THname, $THext) = @_;
    local($int_name, $ext_name);

    ($int_name = "$THname.$THext") =~ tr/A-Z/a-z/;
    ($ext_name = "$filenam.$filext") =~ tr/A-Z/a-z/;

    if ($int_name ne $ext_name && $debug) {
	print STDERR "${FILE}'s .TH thinks it's in $int_name\n";
    } 
} 
 
# same as doTH but for 4.4 bsd style man pages (-mdoc)
sub doDt {
    local($Dtname, $Dtext) = @_;
    local($int_name, $ext_name);

    ($int_name = "$Dtname.$Dtext") =~ tr/A-Z/a-z/;
    ($ext_name = "$filenam.$filext") =~ tr/A-Z/a-z/;

    if ($int_name ne $ext_name && $debug) {
	print STDERR "${FILE}'s .Dt thinks it's in $int_name\n";
    } 
} 
