#! /usr/bin/perl
###############################################################################
#
# $Id: striphtml,v 1.1.1.1 2003/04/21 15:30:50 bcwhite Exp $
#
# Example on how to place an HTML page into the index and get back the title
# and a summary of the page.
#
# In addition, form tags are translated into arbitrary strings.  This can be
# used (with empty strings) to strip the form altogether or (with more
# complicated things) to bring up something resembling the original, though
# non-functional.
#
# Written by Brian C. White <bcwhite@pobox.com>.
# This example code has been placed in the public domain.
#
###############################################################################


# Ahhh...  The way she moves!
use Ferret;


$summarysize = 500;		# default summary size is 256 bytes


# Conversion table for form input fields
$formconv{"form"}				= "***form***";
$formconv{"/form"}				= "***/form***";
$formconv{"select"}				= "***select***";
$formconv{"radio"}				= "***radio***";
$formconv{"radio-checked"}		= "***radio-checked***";
$formconv{"checkbox"}			= "***checkbox***";
$formconv{"checkbox-checked"}	= "***checkbox-checked***";
$formconv{"submit"}				= "***submit***";
$formconv{"reset"}				= "***reset***";
$formconv{"text"}				= "***text***";
$formconv{"hidden"}				= "***hidden***";
$formconv{"image"}				= "***image***";
$formconv{"password"}			= "***password***";
$formconv{"textarea"}			= "***textarea***";


# A different conversion table that uses graphics
%formGraphics = (
#	'form'				=>	"";
#	'/form'				=>	"";
	'checkbox'			=>	qq{<img src="$imagesdir/chku-w95.gif" width=23 height=22 }.
							qq{border=0 alt="[ ] ">},
	'checkbox-checked'	=>	qq{<img src="$imagesdir/chkc-w95.gif" width=23 height=22 }.
							qq{border=0 alt="[x] ">},
#	'hidden'			=>	"";
#	'image'				=>	qq{<img src="$imagesdir/subm-w95.gif" width=82 height=30 }.
#							qq{border=0 alt="[Submit]">},
	'password'			=>	qq{<img src="$imagesdir/pswd-w95.gif" width=127 height=24 }.
							qq{border=0 alt="[**********]">},
	'radio'				=>	qq{<img src="$imagesdir/rbtu-w95.gif" width=20 height=18 }.
							qq{border=0 alt="( ) ">},
	'radio-checked'		=>	qq{<img src="$imagesdir/rbtc-w95.gif" width=20 height=18 }.
							qq{border=0 alt="(o) ">},
	'reset'				=>	qq{<img src="$imagesdir/rset-w95.gif" width=44 height=18 }.
							qq{border=0 alt="[Reset]">},
	'select'			=>	qq{<img src="$imagesdir/slct-w95.gif" width=120 height=21 }.
							qq{border=0 alt="[Select]">},
	'submit'			=>	qq{<img src="$imagesdir/subm-w95.gif" width=48 height=18 }.
							qq{border=0 alt="[Submit]">},
	'text'				=>	qq{<img src="$imagesdir/text-w95.gif" width=127 height=24 }.
							qq{border=0 alt="[__________]">},
	'textarea'			=>	qq{<img src="$imagesdir/area-w95.gif" width=161 height=63 }.
							qq{border=0 alt="[__________]">},
);



Ferret::Message();
foreach (@ARGV) {
	my($file,$filter,$title,$summary);

	$filter = Ferret::StdExtFilters($_);
	$file   = Ferret::LoadFile($_,$filter);

	Ferret::StripHTML(\$file,\$title,\$summary,$summarysize);
#	Ferret::ChangeHTMLForm(\$summary,\%formconv);
	Ferret::ChangeHTMLForm(\$summary,\%formGraphics);

	print "====================\n$_ ($title):\n";
	print "--------------------\n$summary\n";
	print "--------------------\n$file\n\n";
}
