#!/usr/bin/perl -w

eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
    if 0; # not running under some shell
# Copyright 2002 Ed Avis.  See the file COPYING.

=head1 NAME

pip - wrap programs to use them as filters

=head1 SYNOPSIS

C<pip [-i|-o|-b]... PROG ARGUMENTS>

where for every C<-i> (input), C<-o> (output), or C<-b> (both) switch, there is
one argument of the form '-', or '-.foo' for some string 'foo'.

=head1 DESCRIPTION

The '-' or '-.foo' arguments are placeholders and are matched up
left-to-right with the C<-i>, C<-o> and C<-b> switches.  Each placeholder is
replaced by the name of a temporary file.  For a C<-i> switch this is an
input file, which is created by pip before the command runs and
contains data read from pipE<39>s standard input.  For C<-o> it is an
output file, whose contents are printed after the command runs.  For
C<-b> the temporary file is both an input file and an output file.

If a placeholder argument has the form '-.foo' then the temporary
filename will end in '.foo'.  This is useful for programs which change
behaviour based on filename, such as the C compiler.  But note that an
argument of '-foo' would not be changed.

If there are multiple input files then each input file gets the same
data.  If there are multiple output files then each output file is
printed in turn.  If there are no input files then standard input is
passed through unchanged to the program.  The programE<39>s standard
output is untouched, so if the program prints anything then this
appears before the content of any output files.

=head1 EXAMPLES

=over

=item C<pip -i mozilla ->

will read a file from standard input, and give it to mozilla to
display.  The final commandline might be C<mozilla /tmp/pip123.tmp>.

=item C<pip -io cc -.c -o ->

will pipe a C program through the compiler, giving an executable on
standard output.  The temporary input filename given to cc will end in
'.c'.

=item C<pip -b emacs ->

will read data, give the user the chance to modify it in emacs, and
then print it.

=back

=head1 COMPARISON WITH REAL PIPES

When using pip you must wait for all input to be consumed before the
command is run, and for the command to exit before seeing any of the
output.  You donE<39>t get partial output as you would with pipes.
But pip works with programs that seek backwards and forwards in their
input, and which may write their output in a strange order too.

For some programs you may find the devices /dev/stdin and /dev/stdout
a better option than pip, since these allow partial input and partial
output.  However they are not seekable, so will not work with all
programs.

=head1 BUGS

Because pip unlinks temporary files before exiting, and exits as soon
as running the program returns, any command which puts itself
into the background before opening its input files will not work.

Not related to the CP/M command of the same name.

=head1 SEE ALSO

L<pip_tex(1)>.

=head1 AUTHOR

Ed Avis, ed@membled.com

=cut

use strict;
use IO::Handle;
sub tmpnam();

sub usage() {
    print STDERR <<END
usage: $0 [-i|-o|-b]... PROG ARGS...
where some ARGS are - or -.foo
-i replaces the placeholder with a file containing standard input,
-o replaces it with a file whose contents are output after running,
-b does both.
Use the placeholder -.foo to make a temporary file ending '.foo'.

Report bugs to <ed\@membled.com>.
END
  ;
}
if (@ARGV < 1) {
    usage();
    exit(1);
}

# Split the arguments into flags, and the rest.
my $flags = '';
my @rest = ();

# Get the flags into a big lump.
while (my $arg = shift @ARGV) {
    if (index('--help', $arg) == 0) {
	usage();
	exit(0);
    }
    elsif (index('--version', $arg) == 0) {
	print STDERR <<END
pip 1.0
Written by Ed Avis.

Copyright (C) 2002 Ed Avis.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
END
  ;
	exit(0);
    }
    elsif ($arg =~ /^--/) {
	print STDERR "unknown option $arg\n";
	usage();
	exit(1);
    }
    elsif ($arg =~ /^-(.*)/) {
	local $_ = $1;
	
	if (tr/iob//c) {
	    die "bad flag $_, expected -i, -o, or -b";
	}
	elsif ($_ eq '') {
	    die "argument '-' must come after name of program to run";
	}
	else {
	    $flags .= $_;
	}
    }
    else {
	# Not a flag, push it back and finish.
	unshift @ARGV, $arg;
	last;
    }
}

@rest = @ARGV;
die "no program specified, usage: $0 [-i|-o]... PROG ARGS..."
    if @rest == 0;

my ($prog, @args) = @rest;
my @tmpfiles;
my @infiles;
my @outfiles;
my %is_outfile;

# Go through all the flags, substituting filenames for '-'
# arguments.
#
my $flag;
foreach $flag (split(//, $flags)) {
    my $found = 0;
    my $n;
    foreach $n (0 .. $#args) {
	if ($args[$n] =~ /^-(.*)/) {
	    my $ext = $1;
	    next if ($ext ne '' and $ext !~ /^\./);
	    my $tmpfile = tmpnam() . $ext;
	    push @tmpfiles, $tmpfile;
	    if ($flag eq 'i') {
		push @infiles, $tmpfile;
	    }
	    elsif ($flag eq 'o') {
		push @outfiles, $tmpfile;
		$is_outfile{$tmpfile} = 1;
	    }
	    elsif ($flag eq 'b') {
		push @infiles, $tmpfile;
		push @outfiles, $tmpfile;
		$is_outfile{$tmpfile} = 1;
	    }
	    else { die }

	    $args[$n] = $tmpfile;
	    $found = 1;
	    last;
	}
    }
    die "no '-' argument found for flag $flag" if not $found;
}

# Get stdin if necessary.  Each input file gets the same data.
if (@infiles) {
    my @handles = ();
    foreach (@infiles) {
	my $fh = new IO::Handle;
	open ($fh, ">$_") or die "can't write to $_: $!";
	push @handles, $fh;
    }

    while (<STDIN>) {
	my $handle;
	foreach $handle (@handles) {
	    print $handle $_;
	}
    }

    foreach (@handles) {
	close $_;
    }
}

# Run the program.
system($prog, @args);

# Remove input files.
foreach (@infiles) {
    next if $is_outfile{$_};
    (not -e $_) or unlink or die "cannot unlink $_: $!";
}

# Print output if necessary, and remove files.
my $outfile;
foreach $outfile (@outfiles) {
    unless (open (OUTFILE, $outfile)) {
	if ($! =~ /^No such file or directory/
	    and $outfile !~ m!\.[^/]*$!)
        {
	    # Sometimes DOSish programs add an extension to the output
	    # filename without being asked.  Sniff around and see if
	    # we can find any evidence of this.
	    #
	    if (-e $outfile) {
		die "open() said $outfile doesn't exist, but it does";
	    }
	    my @poss = <$outfile.*>;
	    if (@poss == 0) {
		# Nope, nothing.
		die "$prog didn't create $outfile or any $outfile.*\n";
	    }
	    elsif (@poss == 1) {
		# It looks like the program has indeed created an
		# output file with a silly name.
		#
		my $o = $poss[0];
		$o =~ /^$outfile(\..*)$/ or die;
		my $ext = $1;
		warn <<END;
$prog has created the file '$o' instead of '$outfile'.  Perhaps you
should have given the output placeholder as '-$1' instead of '-'?
END
                open (OUTFILE, $o) or die "cannot open $o: $!";
		$outfile = $o;
	    }
	    else {
		my $s = "$prog did not create $outfile, "
		  . "but it did create: " . join(', ', @poss) . "\n"
		    . "I can't handle this sort of thing, giving up";
		die $s;
	    }
	}
	else {
	    die "cannot open $outfile: $!";
	}
    }

    while (<OUTFILE>) {
	print;
    }
    close OUTFILE;
    unlink $outfile or die "cannot unlink $outfile: $!";
}


# tmpnam()
#
# Return a name for a temporary file.  I would use the tmpnam()
# included with Perl's POSIX module, but some programs from MS-DOS
# backgrounds truncate any leafname with more than eight characters
# before the dot.
#
# I believe this is secure because it creates a directory with 0700
# permissions and then uses that.  Hopefully mkdir() is atomic and
# will not return true unless it created a directory with the perms
# specified.  There is a DoS attack from not being able to create that
# directory in the first place; perhaps I should switch to a
# less-predictable name generator.  But whatever is chosen must use
# 8.3 filenames.
#
my $num; # number within our temporary directory;
sub tmpnam() {
    die 'usage: tmpnam()' if @_;

    # Max. tries to think of a directory name before giving up.
    my $MAX_TRIES = 1000;

    my $tmpdir = $ENV{TMP} || $ENV{TMPDIR} || $ENV{TEMP} || '/tmp';
    die "bad temp directory $tmpdir" if not -d $tmpdir;

    use vars '$made'; # global, so can remove later
    for (my $n = 0; $n < $MAX_TRIES; $n++) {
	my $dir = "$tmpdir/$$.$n";
	if (mkdir $dir, 0700) {
	    $made = $dir;
	    last;
	}
    }
    if (not $made) {
	die "failed to make a directory in $tmpdir, even after $MAX_TRIES attempts\n";
    }

    # Pick an unused filename in this directory.  Since we created the
    # dir ourselves we don't need to look at what it contains.
    #
    $num = 0 if not defined $num;
    return "$made/" . $num++;
}
# And here's some code to clear up the mess tmpnam() leaves behind.
END {
    (not defined $made)
      or (not -e $made)
	or (rmdir $made)
	  or warn "cannot rmdir $made: $!";
}
