#!/usr/bin/perl -w
use strict;
my ($e3, $e4, $e5);
if ($ARGV[0] eq '-e3') {
    $e3 = 1;
    shift @ARGV;
}
elsif ($ARGV[0] eq '-e4') {
    $e4 = 1;
    shift @ARGV;
}
elsif ($ARGV[0] eq '-e5') {
    $e5 = 1;
    shift @ARGV;
}

my ($old, $new) = @ARGV; die if not defined $new;
open (OLD, $old) or die "cannot open $old: $!";
open (NEW, $new) or die "cannot open $old: $!";

if (not $e3 and not $e4 and not $e5) {
    while (<OLD>) {
	my @oldbits = split;
	$_ = <NEW>;
	my @newbits = split;
	check(\@oldbits, \@newbits);
    }
    while (<NEW>) {
	die "extra lines in $new\n";
    }
}
else {
    local *old_expns = get_expns(\*OLD); use vars '%old_expns';
    local *new_expns = get_expns(\*NEW); use vars '%new_expns';

    foreach (keys %old_expns) {
	my $oe = delete $old_expns{$_};
	my $ne = delete $new_expns{$_};
	die if not defined $oe;
	die "no expansion found for $_ in new file" if not defined $ne;
	my @oldbits = ($_, sort @$oe);
	my @newbits = ($_, sort @$ne);
	check(\@oldbits, \@newbits);
    }
    foreach (keys %new_expns) {
	die "expansion of $_ in new output but not mentioned in old";
    }
}

sub check {
    local *oldbits = shift; use vars '@oldbits';
    local *newbits = shift; use vars '@newbits';
    my $oldf = shift @oldbits;
    my $newf = shift @newbits;
    
    if ($oldf ne $newf) {
	die "differing input word $oldf vs $newf\n";
    }
    # Ignore capitalization for now
    $_ = lc foreach @oldbits, @newbits;
    @oldbits = sort @oldbits;
    @newbits = sort @newbits;
    if ("@oldbits" ne "@newbits") {
	die "differing expansion of $oldf: @oldbits vs @newbits\n";
    }
}
    
sub get_expns {
    local *FH = shift;
    my (%expns, %lastseen, %ratio, %no_expns);

    while (<FH>) {
	chomp;
	my ($in, $e, $root_word);
	if (not $e5) {
	    s/^(\S+)\s+(\S+)// or die "bad line $_";
	    ($in, $e) = ($1, $2);
	    if ($in =~ m!^([a-zA-Z]+)/[A-Z]+$!) {
		$root_word = $1;
	    }
	    elsif ($in =~ m!^[a-zA-Z]+$!) {
		$root_word = $in;
	    }
	    else { die "bad word/flags: $in" }
	}
	else {
	    # -e5 expansion.
	    # 
	    # The word printed by itself without any flags applied
	    # doesn't get the corresponding expansion printed on the
	    # same line (that would just be repeating oneself).
	    # 
	    s/^(\S+)\s*// or die "bad line $_";
	    $in = $1;
	    if ($in =~ tr!+!!) {
		s/(\S+)// or die "bad remainder of line $_";
		$e = $1;

		# Rearrange the flags into alphabetical order.
		$in =~ /^([A-Za-z]+)\+([A-Z]+)$/ or die;
		$in = "$1+" . join('', sort(split(//, $2)));
		$root_word = $1;
	    }
	    else {
		$root_word = $e = $in;
	    }
	}
	(defined($in) && defined($e) && defined($root_word)) || die;
	if ($e4) {
	    if ($in =~ tr!/!!) {
		# The input had some flags, it should get expanded,
		# expect to get a ratio.
		#
		s/\s+(\d+\.\d+)// or die "no ratio";
		if (defined $ratio{$in} and $ratio{$in} != $1) {
		    die "ratio for $in has changed from $in to $1";
		}
		$ratio{$in} = $1;
	    }
	    else {
		# Don't expect a ratio, and don't expect any further
		# expansions.
		# 
		die "line without flags has ratio" if /\d$/;
		die "line without flags has differing expansion"
		  if $in ne $e;
		$no_expns{$in}++ && die "word without flags expands twice";
	    }
	}
	die "bad line ($_ remains)" if length;
	
 	# Check that neither a root word not root-and-flags is
 	# scattered across the file.  All the expansions should be
 	# listed consecutively.
	# 
	foreach ($in, $root_word) {
	    if (defined $lastseen{$_} and $lastseen{$_} != $.-1) {
		die "$_ was last seen at line $lastseen{$_}, now line $.";
		$lastseen{$_} = $.;
	    }
	}
	push @{$expns{$in}}, $e;
    }
    return \%expns;
}
