#!/usr/bin/perl -w
#
# test_inst: test the installation routines for ispell.  This focuses
# mostly on manual pages, since they're what I changed just before
# writing this script.
#
# Installs in a temporary directory and checks that the expected files
# have been installed.  Some files (man pages) will be opened and
# parsed to check their content.
#
# Bugs: works only for english dictionaries (those that install the
# english.5 manual page).
#
# $Log: test_inst,v $
# Revision 1.6  2003/08/24 11:11:14  ed
# To avoid warnings during the test suite, add '#undef foo' before each
# occurrence of '#define foo'.
#
# Revision 1.5  2003/08/23 10:56:27  ed
# Purging my old email address.
#
# Revision 1.4  2002/09/03 21:05:20  ed
# Removed dependency on DB_File: now we just read all of local.h into a
# big array, modify the array, and write it out again when done.  It
# turns out that this is all that the DB_RECNO mode did anyway.
#
# Revision 1.3  2002/05/19 14:53:35  ed
# Whitespace fixes.
#
# Revision 1.2  2001/10/16 16:55:43  ed
# Merged some more of SuSE's ispell-3.2.06-config.patch: the wordlist is
# in /usr/share/dict/ rather than /usr/dict/.  I changed this in all
# documentation, but the manual page ought to use the WORDS
# configuration variable rather than anything hardcoded.  Extended
# test_inst to check filenames mentioned in manual pages.
#
# Revision 1.1  2001/10/16 15:14:14  ed
# Merged some more patches from SuSE's ispell-3.2.06 package: parts of
# ispell-3.2.06-languages.patch.  This is to make the manual pages have
# the correct section (file formats go in section 5 not 4) and have
# correct cross-references (no xref to tib (1) or spell (1), which are
# not installed on SuSE).  I parameterized these changes as things in
# config.X.
#
# Makefile: Fixed bug creating directories by using mkdir -p instead of plain
# mkdir.  But -p isn't supported on all Unixes, so this is parameterized
# as the MKDIR variable.  Perhaps this should be a variable in config.X
# rather than the Makefile itself - but I don't know how to do that.
# Added ispell.5 as a target, it is generated from ispell.5X by applying
# substitutions.  This replaces the old ispell.4 which was installed
# as-is.
#
# Two new substitution variables TIB_XREF and SPELL_XREF are used in
# manual pages for making cross-references to tib and spell.  They
# should be defined as a fragment of troff code (ie, text) to be
# included in manual pages.  For example, TIB_XREF might be defined as
# 'tib (1)'.
#
# The section for file format manual pages is controlled by MANFFSECT,
# previously it was hardcoded as 4.  MANFFDIR and MANFFEXT replace
# MAN4DIR and MAN4EXT respectively.
#
# The target install-deformatters no longer calls a recursive Makefile
# inside the deformatters/ directory.  Instead the commands to build
# these two programs are included in the top-level Makefile.  Ditto
# clean-deformatters.
#
# To test the changes to installation, there is a new program test_inst
# which sets some variables in local.h, runs make install, and checks
# the installed files.  It concentrates on making sure the manual pages
# are okay, reading them and checking cross-references.  test_inst is
# now the last target of 'make test'.
#
# Dropped support for manual section 1L (local); now the executable
# commands go in plain old section 1.  It could be added back in using a
# similar mechanism to the section 4 / 5 choice if it is still needed.
#
# The Makefiles for the individual languages have also been changed to
# use MKDIR, and bugfixed to work when LIBDIR is a relative path.
# Previously, Makefiles in subdirectories would do 'cd $$LIBDIR', but
# that will fail if LIBDIR is something like '../install'.  They now go
# through some contortions to keep working whether LIBDIR is relative or
# absolute.  There is a lot of duplicated code across the language
# directories, it would be better factored out.  The english Makefile
# now builds the english.5 manual page by applying substitutions from
# english.5X, and its variants (altamer, american and british) now call
# english/Makefile to make this page and for 'make clean'.
#

use strict;
use File::Find;
if (@ARGV) {
    print STDERR "usage: $0\nbut best called from 'make test'\n";
    exit;
}

# Regexp to match a manual page xref (and set $1 and $2 or whatever)
my $xref_re = '([^() ]+)\s*\(([^() ]+)\)';

# Trap warnings and set this to keep track of whether anything went
# wrong - if it did, we won't delete our working area.
#
my $gone_wrong = 0; $SIG{__WARN__} = sub { ++$gone_wrong; warn @_ };

# Place to install under for test
my $prefix = 'test_inst.tmp';

# Binaries expected to install under BINDIR
my @bins = qw[ispell deformat-c deformat-sh buildhash icombine ijoin munchlist findaffix tryaffix];
my %in_bin; ++$in_bin{$_} foreach @bins;

# Section 1 manual pages
my @man1s = qw[ispell buildhash munchlist findaffix tryaffix];
my @man1s_cp = @man1s;
foreach (@man1s) {
    warn "installing section 1 manpage for $_ but no executable"
      if not $in_bin{$_};
}

# File format manual pages (normally section 5)
my @manffs = qw[ispell english];
my @manffs_cp = @manffs;

# 'Library' files.  At present this seems to be the wordlist data.
#
my @libs = qw[english.aff americanmed.hash american.hash english.hash];

# Configuration data to be written to local.h.
my %conf;
$conf{BINDIR} = "$prefix/bin";
$conf{LIBDIR} = "$prefix/lib";

# Test different values for the section of the manual used for file
# formats.
#
$conf{MANFFSECT} = 'ff';

# We make sure that the directory and extension of the installed
# manpages matches their section.
#
@conf{qw(MAN1DIR MAN1EXT)} = ("$prefix/man/man1", '.1');
@conf{qw(MANFFDIR MANFFEXT)} = ("$prefix/man/man$conf{MANFFSECT}", ".$conf{MANFFSECT}");

# System wordlist.
$conf{WORDS} = '/usr/share/dict/words';

# First test this standard configuration.
test_conf(\%conf);
if ($gone_wrong) {
    print "standard config failed\n";
    exit $gone_wrong;
}

# Now test some variants of it.  We could exhaustively run various
# combinations, but it wouldn't gain much.  It should suffice to
# change all these variables at once.
#
my %variants = (BINDIR => "$prefix/bin_a", LIBDIR => "$prefix/lib_a",
		MANFFSECT => 'ff_a', MANFFDIR => "$prefix/manffdir_a",
		MANFFEXT => '.manffext_a', MAN1DIR => "$prefix/man1dir_a",
		MAN1EXT => '.man1ext_a',
		# We also want to make unusual cross-references.
		TIB_XREF => 'tib_a (1_a)', SPELL_XREF => 'spell_a (1_a)',
		# And the wordlist file (used in source and manpages)
		WORDS => 'words_a',
	       );
%conf = (%conf, %variants);
test_conf(\%conf);
if ($gone_wrong) {
    print "variant config failed\n";
    exit $gone_wrong;
}

print "$0: all tests successful\n";

# set_special_local_h()
#
# Edit the local.h file, adding the given set of variables and values.
#
# Parameters: hashref of (variable name => value).
#
sub set_special_local_h {
    my $vars = shift;
    open(LOCAL_H, 'local.h') or die "cannot open local.h: $!";
    chomp (my @lh = <LOCAL_H>);
    close LOCAL_H or die "cannot close local.h: $!";

    my $start = '/**** MODIFIED BY TEST SUITE ****/';
    my $end = '/**** END MODIFIED BY TEST SUITE ****/';
    my ($start_l, $end_l);
    foreach my $i (0 .. $#lh) {
	my $tmp = $lh[$i];
	for ($tmp) {
	    s/^\s+//; s/\s+$//;
	    if ($_ eq $start) {
		warn "$start seen twice" if defined $start_l;
		$start_l = $i;
	    }
	    elsif ($_ eq $end) {
		warn "$end seen twice" if defined $end_l;
		$end_l = $i;
	    }
	}
    }

    if (not defined $start_l and not defined $end_l) {
	# Add the lines to the file.
	push @lh, $start; $start_l = $#lh;
	push @lh, $end; $end_l = $#lh;
    }
    elsif (not defined $start_l and defined $end_l) {
	die "found $end but not $start";
    }
    elsif (defined $start_l and not defined $end_l) {
	die "found $start but not $end";
    }
    elsif (defined $start_l and defined $end_l) {
	unless ($end_l > $start_l) {
	    die "$end must later in file than $start";
	}
    }
    else { die }

    my @lines;
    foreach (sort keys %$vars) {
	print "doing var $_\n";
	my $value = $vars->{$_};
	die "value of $_ contains funny chars: $value"
	  if $value =~ tr/\\\"//;
	push @lines, "#undef $_";
	push @lines, qq[\#define $_ "$value"];
    }
    splice @lh, $start_l + 1, ($end_l - $start_l - 1), @lines;

    open(LOCAL_H, '>local.h') or die "cannot write to local.h: $!";
    foreach (@lh) {
	print LOCAL_H "$_\n" or die "cannot write to local.h: $!";
    }
    close LOCAL_H or die "cannot close local.h: $!";
}


# check_man()
#
# Read a manual page (troff source) and make sure the cross-references
# are correct.  Uses some global variables to know about what manual
# pages are part of the ispell package.  Relies on warn() being caught
# so it can be used to set an error flag somewhere.
#
# Parameters:
#   reference to config hash
#   file to read
#   expected name of manual page
#   expected section
#   hash of installed filenames (to check FILES section)
#
sub check_man {
    my ($conf, $f, $expd_name, $expd_section, $files) = @_;
    foreach (@man1s) { die if not defined }
    my $known_manpages = known_manpages($conf);
    die if not defined $expd_name or not defined $expd_section;
    open(FH, $f) or die "cannot open $f: $!";
    my ($seen_th, $th_name);
    my ($seen_see_also, %xrefs);

    # We don't use while (<FH>) because of a bug in perl.
    while (defined (my $line = <FH>)) {
      LINE:
	chomp $line;
	next if $line =~ /^\.\\\"/;
	if ($line =~/^\.TH\s+(\S+)\s+(\S+)/) {
	    # Top-level heading.
	    my ($name, $section) = ($1, $2);
	    warn "$f:$.: seen two .TH lines" if $seen_th++;
	    $th_name = lc $name; # assume real name is lowercase
	    warn "$f:$.: section $section doesn't match $expd_section"
	      if $section ne $expd_section;
	}
	elsif ($line =~ /^\.SH\s+"?NAME"?\s*$/) {
	    # '.SH NAME' followed by a one-line summary.
	    my $next = <FH>;
	    $next =~ /^(\S+(?:,\s*\S+)*)\s+\\-\s+\S/
	      or die "bad line following .SH NAME: $_";
	    my %names;
	    foreach (split /,\s*/, $1) {
		if (not /\S/) {
		    warn "$f:$.: NAME section contains name '$_'";
		    next;
		}
		$names{$_}++
		  && warn "$f:$.: name $_ listed twice in NAME section";
	    }

	    if (not defined $th_name) {
		warn "$f:$.: found NAME section before seeing .TH";
	    }
	    else {
		warn "$f:$.: name $th_name not listed in NAME section"
		  if not $names{$th_name};
	    }

	    if (not $names{$expd_name}) {
		warn "$f:$.: expected name $expd_name not listed";
	    }
	}
	elsif ($line =~ /^\.IR\s+$xref_re/o) {
 	    # Looks like a cross-reference to another manpage.
	    unless ($known_manpages->{$1}{$2}) {
		warn "$f:$.: reference to unknown $1 ($2)";
	    }
	}
	elsif ($line =~ /^\.SH\s+"?SEE ALSO"?/) {
	    # 'SEE ALSO' section followed by one or more lines.
	    while (defined (my $l = <FH>)) {
		$l =~ s/^\s+//; $l =~ s/\s+$//;
		$l =~ s/^\.IR\s+//;
		my @xrefs = split /,\s*/, $l;
		foreach (@xrefs) {
		    if (/^$xref_re$/o) {
			unless ($known_manpages->{$1}{$2}) {
			    warn "$f:$.: reference to unknown $1 ($2)";
			}
		    }
		    elsif (/^(\S+)$/) {
			# Reference without a section number - for
			# things that don't have an installed
			# manpage.
			#
		    }
		    else { warn "$f:$.: bad cross-reference: '$_'" }
		}
		last if $l !~ /,\s*$/; # list ends
	    }
	}
	elsif ($line =~ /^\.SH\s+"?FILES"?/) {
	    # 'FILES' section followed by some lines.  Each line
	    # should either be '.IP ' followed by a filename, or some
	    # textual description (which we ignore).
	    #
 	    while (defined (my $l = <FH>)) {
 		$l =~ s/^\s+//; $l =~ s/\s+$//;
 		if ($l =~ /^\.SH\s/) {
 		    # End of FILES section.  Would like to push back
 		    # this line, but not possible, so just go back to
 		    # start of loop without reading another line.
 		    #
 		    goto LINE;
 		}
		elsif ($l =~ /^\.IP\s+(\S+)$/) {
		    # Found filename.
		    unless (known_file($conf, $files, $1)) {
			warn "$f:$.: reference to unknown $1";
		    }
		}
		else {
		    # Description, ignore.
		}
 	    }
	}
	else {
	    # We don't try to parse every single line, just ignore it.
	}
    }
    close FH or warn "cannot close filehandle opened to $f: $!";
}


# test_conf()
#
# Install ispell with a particular configuration and test the
# installed files.  Uses some global variables for the lists of files
# to expect.  Assumes that the warn() handler with $gone_wrong is set.
#
# Parameters:
#   configuration hash
#   (optional) whether to leave the installed directory around
#     afterwards, even if tests passed.  You use this for testing the
#     test suite.
#
sub test_conf {
    my $conf = shift;
    my $leave = @_ ? shift : 0;
    foreach (keys %$conf) {
	die "no value for key $_" if not defined $conf->{$_};
    }
    set_special_local_h($conf);
    system('make', 'install')
      && warn "make -s install failed: $!, $?";
    set_special_local_h({});

    # Now check the desired files were installed.
    my %seen;
    find(sub {
	     $seen{$File::Find::name}++
	       && warn "$File::Find::name seen twice"
	   }, $prefix);
    foreach (keys %seen) {
	if (-f $_) {
	} elsif (-d _) {
	    delete $seen{$_};
	} else {
	    warn "unexpected special file $_";
	}
    }
    my %all_files = %seen; # make a copy
    foreach (@bins) {
	(delete $seen{"$conf->{BINDIR}/$_"}) or warn "failed to find binary $_";
    }
    foreach (@man1s) { die if not defined }
    foreach (@man1s) {
	foreach (@man1s) { die if not defined }
	die if not defined;
	my $file = "$conf->{MAN1DIR}/${_}$conf->{MAN1EXT}";
	foreach (@man1s) { die if not defined }
	if (delete $seen{$file}) {
	    foreach (@man1s) { die if not defined }
	    check_man($conf, $file, $_, '1', \%all_files);
	} else {
	    warn "failed to find man 1 page $file";
	}
    }
    foreach (@man1s) { die if not defined }
    foreach (@manffs) {
	my $file = "$conf->{MANFFDIR}/${_}$conf->{MANFFEXT}";
	if (delete $seen{$file}) {
	    foreach (@man1s) { die if not defined }
	    check_man($conf, $file, $_, $conf->{MANFFSECT}, \%all_files);
	} else {
	    warn "failed to find man ff page $file";
	}
    }
    foreach (@libs) {
	(delete $seen{"$conf->{LIBDIR}/$_"})
	  or warn "failed to find library $_";
    }
    foreach (keys %seen) {
	warn "unexpected file $_";
    }

    unless ($gone_wrong or $leave) {
	system('rm', '-rfv', $prefix)
	  && die "cannot rm -rf $prefix: exit status $?";
    }
}


# known_manpages()
#
# Manual pages which it is okay to reference, given a particular
# configuration.  Returns a reference to a hash mapping name ->
# section -> boolean (or undef).  Uses some globals for the list of
# manual pages expected to be installed.
#
sub known_manpages {
    my $conf = shift;
    my %known_manpages;
    foreach (@man1s) { die if not defined }
    foreach (@man1s_cp) {
	die if not defined;
	$known_manpages{$_}{1}++ && warn "$_ (1) listed twice";
    }
    foreach (@manffs_cp) {
	die if not defined;
	$known_manpages{$_}{$conf->{MANFFSECT}}++
	  && warn "$_ ($conf->{MANFFSECT}) listed twice";
    }
    # Manpages which we can assume are present already.
    my @std_manpages = ([ 'join', 1 ], [ 'sort', 1 ], [ 'egrep', 1 ], [ 'look', 1 ]);
    while (@std_manpages) {
	my ($name, $section) = @{shift @std_manpages};
	$known_manpages{$name}{$section}++
	  && warn "trying to install another manual page for $name ($section)";
    }
    # And the manual pages for tib and spell, if they are known about
    # in this configuration.
    #
    my %seen_xref_var;
    foreach my $var (qw[TIB_XREF SPELL_XREF]) {
	for ($conf->{$var}) {
	    last if not defined;
	    if (/^$xref_re$/o) {
		my ($name, $section) = ($1, $2);
		$known_manpages{$name}{$section}++
		  && warn "trying to install another manual page for $name ($section)";
	    }
	    else {
		die "badly formed xref: $_";
	    }
	}
	++ $seen_xref_var{$var};
    }
    foreach (keys %$conf) {
	next unless /_XREF$/;
	die "unknown _XREF conf var: $_" unless $seen_xref_var{$_};
    }

    return \%known_manpages;
}

# known_file()
#
# Is a file okay to reference?  This will be if it's a file installed
# as part of ispell, or a standard well-known file like
# /usr/share/dict/words (configured by the WORDS variable).
#
# Parameters:
#   configuration hash
#   ref to hash whose keys are the files installed as part of ispell
#   filename to test
#
sub known_file {
    my ($conf, $installed, $filename) = @_;
    return $installed->{$filename} if $installed->{$filename};
    return 1 if $filename eq $conf->{WORDS};
    if ($filename =~ /^[\$.]/) {
	# In home directory or other weird place.
	return 1;
    }
    return 0;
}
