#!/usr/bin/perl -w
#   FauBackup - Backup System, using a Filesystem for Storage
#   Copyright (C) 2000-2003 Martin Waitz, Dr. Volkmar Sieh
#   $Id: faubackup-find.in,v 1.2 2003/12/11 00:32:01 tali Exp $
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

use strict;

my $verbose;
my $useregexp = 1;

# little bit more shell like matching
# '*' is standard wildcard
# '**' even crosses directory boundaries
# '?' for exactly one char
# all other characters get escaped (e.g. '.')
# you can use full regexps when preceding the line with 'REGEXP:'
sub match2regexp($)
{
	my $regexp = shift;

	print STDERR "match2regexp: '$regexp' -> " if $verbose;
	if( $regexp =~ /^REGEXP:\s*(.*)$/ ) {
		$regexp = $1;
		if( !$useregexp ) {
			# replace with known invalid one, check_ignore will warn
			$regexp += "/ :-(";
		}
	} else {
		$regexp =~ s/([^\?\*a-zA-Z0-9\/])/\\$1/g; # escape everything
		$regexp =~ s/\?/./g;
		$regexp =~ s/\*\*/{}/g;			# unescaped '{}' for '**'
		$regexp =~ s/\*/[^\/]*/g;		# to not let '**' match here
		$regexp =~ s/\{\}/.*/g;
	}
	print STDERR "'$regexp'\n" if $verbose;

	return $regexp;
}

# escape all characters that could be special in a regexp
sub escape($)
{
	my $str = shift;
	$str =~ s/([^a-zA-Z0-9\/])/\\$1/g;
	return $str;
}

sub check_ignore($$)
{
	my( $ignore, $conf ) = @_;

	if( $ignore =~ /^\// ) {
		print STDERR "Warning: using absolute path in $conf\n";
		return 0;
	}
	eval { "foobar" =~ /^$ignore$/; };
	if( $@ ) {
		print STDERR "Warning: invalid ignore pattern /$ignore/" .
			" in $conf: $@\n";
		return 0;
	}

	1;
}

# read a list of patterns to ignore from the directory specified
# one entry per line, which gets prefixed by the directory name
# (to allow to specify to ignore things in subdirs
sub get_ignorelist($)
{
	my $dir = shift;
	my $ignore = "$dir/.faubackuprc";
	my @ignorelist;
	return unless -f $ignore;

	open IGNORE, $ignore or print STDERR "could not open $ignore: $!\n";
	while(<IGNORE>) {
		chomp;
		if( /^#/ ) { next; }
		if( /^\s*NoBackup:\s*(.+)\s*$/ ) {
			check_ignore($1, $ignore) or next;
			push @ignorelist, escape("$dir/") . $1;
			next;
		}
		if( /^\s*NoBackup\s*$/ ) {
			push @ignorelist, ".*";
			next;
		}
		if( /^\s*Ignore\s+(.+)\s*$/ ) {
			my $regexp = match2regexp($1);
			check_ignore($regexp, $ignore) or next;
			push @ignorelist, escape("$dir/") . $regexp;
			next;
		}
		# more things to come...
	}
	close IGNORE or print STDERR "could not close $ignore: $!";

	return @ignorelist;
}

# search one directory and print the path of every item contained therein
# recurses on subdirectories
sub process_dir($@)
{
	my( $dir, @parentlist ) = @_;
	my( $file, $ignore );
	my @entries;

	# merge list from parent with this one:
	my @ignorelist = get_ignorelist($dir);
	#push @ignorelist, @parentlist;
	foreach (@parentlist) { push @ignorelist, $_ };

	# traverse all directory entries
	unless( opendir DIR, $dir ) {
		print STDERR "could not open $dir/: $!";
		return;
	}
	@entries = readdir DIR;
	closedir DIR or die "could not close $dir/: $!";
	FILE:
	foreach $file (@entries) {
		next if $file eq '.' || $file eq '..';
		$file = "$dir/$file";
		# check ignore lists
		foreach (@ignorelist) {
			next FILE if $file =~ /^$_$/;
		}
		# not matched any filter
		# recurse if this is a directory we can cd into
		if( !-l $file && -d _ && -x _ ) {
			&process_dir( $file, @ignorelist );
		}
		# output file name
		print "$file\000";
	}

}


# command line arguments are used as wildcards of files to ignore
my @ignorelist;
foreach (@ARGV) {
	if( /^--noregexp$/ ) { #check for the only option
		$useregexp = 0;
		next;
	}
	# interpret argument as an ignore-match
	my $regexp = match2regexp $_;
	check_ignore($regexp, "command line ignore list") or next;
	push @ignorelist, "\\./$regexp";
}
# start search
process_dir( ".", @ignorelist );
print ".\000";
