#! /usr/bin/perl
###############################################################################
#
# $Id: shakespeare,v 1.1.1.1 2003/04/21 15:30:49 bcwhite Exp $
#
# Example on how to index on something other than a filename or url.
#
# This example will index Shakespeare's plays based on act and scene number.
# The actual document name stored is filename<tab>title<tab>act:scene and will
# have to parsed by the retrieve program.
#
# Written by Brian C. White <bcwhite@pobox.com>.
# This example code has been placed in the public domain.
#
###############################################################################


# Who ya gonna call?
use Ferret;

# Create us one of those nifty search engines!
$search = new Ferret;

# Open the index file (for update)
$search->Update("../shakespeare.index");

# Index all words (no stoppers) so searching for quotes works.  Note that
# if no stoppers were allowed, the phrase "to be or not to be" would become
# a null query since all the words are stoppers.  In most instances, people
# will search by content words, but for something like this, every word is
# important.
$search->SetOption(&Ferret::OPT_NOSTOPPERS);


# Write data creates a file specifically for that index.
# The use of global vars is poor style which is why I claim, quite wrongly
# in fact, not to be the author of this bit of code.
sub WriteData {
	return unless ($act && $scene && $words);

	my $filename = "$file-$act-$scene";
	$words =~ s/^\s+\n//s;
	$words =~ s/\s+$/\n/s;

	print "  - Act $act, Scene $scene\n";
	open(OUT,"| gzip -9 >$filename.gz") || die "Error: Could not write '$file.gz' -- $!\n";
	print OUT $words;
	close(OUT);

	$search->AddDocument($filename,$words);
}


# Parse each file given on the command line
foreach $file (@ARGV) {
	open(FILE,"gzip -dc $file |") || die "Error: Could not open '$file' -- $!\n";
	$file =~ s/\.txt\.gz$//;
	print "Adding \"$file\" ... ";

	$words="",$act="",$scene="",$line=0;
	$title="";

	$title = <FILE> until $title;
	$title =~ s/^\s*(.*?)\s*$/$1/;
	$title =~ s/\s+/ /g;
	print "($title)\n";
	$search->DBPutUser($file,$title);


	while (<FILE>) {
		next if m/^\s*$title\s*$/i;
		if (m/^ACT ([IVX]+)[:\s]*$/) {
			WriteData();
			$act   = $1;
			$scene = "";
			$words = "";
			next;
		}
		if (m/^SCENE\s*([IVX]+)[:\s]/) {
			WriteData();
			$scene = $1;
			$words = "";
			$line  = $.;
			next;
		}
		$words .= $_;
	}

	WriteData();

	close(FILE);
	print "\n";
}

print "Writing database...\n";
$search->Close();
