#!/usr/bin/perl -w
# 
# This program called rand_gen generated the test data for fix8bit.
# The -h option makes it include high-bit-set characters (such data
# cannot pass unchanged through fix8bit -8 | fix8bit -7).  The -u
# option makes it include unnecessary escape sequences, escapes for
# 7-bit characters which would not be generated by fix8bit itself.
# The -x option makes it (very occasionally) include hex escape
# sequences.  Proper random generation of hex escapes is not done
# because I got fed up at that point.
# 

use strict;
use Getopt::Std; use vars qw($opt_h $opt_u $opt_x); getopts('hux');

foreach (0 .. 1000-1) {
    if (rand() < .5) {
	print '\\';
	print '\\' while rand() < .1;
	if (rand() < .5) {
	    # Normal three-digit octal sequence
	    print rand_first_octal();
	    print rand_octal() foreach 0, 1;
	}
	else {
	    # Something a bit more interesting.
	    print rand_first_funny();
	    foreach (0 .. rand(5)) {
		print rand_funny();
	    }
	}
    }
    else {
	print rand_char();
    }
}

# We assume that you can just generate 8-bit character values and Perl
# will output them as is.  Hopefully even with the locale support this
# is still true, since we didn't 'use locale'.
# 
sub rand_char {
    if ($opt_h) {
	return chr(int(rand(256)));
    }
    else {
	return chr(int(rand(128)));
    }
}

sub rand_octal {
    # Assume ASCII-style digits.
    return chr(ord('0') + int(rand(8)));
}

sub rand_first_octal {
    if ($opt_u) {
	# Full range 000 -| 400.
	return chr(ord('0') + int(rand(4)));
    }
    else {
	# Only high-bit-set characters allowed, ie 2.. or 3..
	if (rand() < .5) {
	    return '2';
	}
	else {
	    return '3';
	}
    }
}

# Generate something that could be an octal digit, but it might not
# be. 
sub rand_funny {
    if (rand() < .7) {
	return rand_octal();
    }
    else {
	if (rand() < .4) {
	    return chr(ord('8') + int(rand(2)));
	}
	elsif (rand() < .5) {
	    return '\\';
	}
	else {
	    return rand_char();
	}
    }
}

# Ditto but obeying the requirement that octal sequences for 7-bit
# characters should not be included.
# 
sub rand_first_funny {
    if (rand() < .7) {
	return rand_first_octal();
    }
    else {
	if (rand() < .4) {
	    return chr(ord('8') + int(rand(2)));
	}
	elsif (rand() < .5) {
	    return '\\' . rand_char_safe();
	}
	else {
	    return rand_char_safe();
	}
    }
}

# Need to make sure that \ is not followed by 0 or 1
# (conservatively make sure that \0.. or \1.. octal
# sequences cannot be output).
# 
# Also avoid \x and \X.
# 
sub rand_char_safe {
    # Potential DoS if you rig the random generator :-).
    my $c;
    do { 
	$c = rand_char();
    } until char_is_safe($c);
    return $c;
}

# Unless -h or -x are given, we want to conservatively avoid some
# characters after backslash (or before an unknown character) to not
# get \0.. \1.. \x.. or \X.. escape sequences.
# 
sub char_is_safe {
    my $c = shift;
    unless ($opt_u) {
	return 0 if $c eq '0' or $c eq '1' or $c eq '\\';
    }
    unless ($opt_x) {
	return 0 if $c eq 'x' or $c eq 'X' or $c eq '\\';
    }
    return 1;
}
