#!/usr/bin/perl -w

eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
    if 0; # not running under some shell

=pod

=head1 NAME

tv_grab_uk - Grab TV listings for the United Kingdom.

=head1 SYNOPSIS

tv_grab_uk --help

tv_grab_uk [--config-file FILE] --configure

tv_grab_uk [--config-file FILE] [--output FILE] [--days N]
           [--offset N] [--quiet]

=head1 DESCRIPTION

Output TV and radio listings in XMLTV format for many stations
available in Britain.  The data comes from the Ananova website (and is
subject to their terms and conditions).

First you must run B<tv_grab_uk --configure> to choose which stations
you want to receive.  Choose a TV region, and optionally some digital
or satellite channels plus radio.  To help you choose digital channels
some 'packages' of channels are defined.

Then running B<tv_grab_uk> with no arguments will get a listings for
the stations you chose, for as many days as possible.

B<--configure> Prompt for which stations to download and write the
configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_uk.conf>.  This is the file written by
B<--configure> and read when grabbing.

B<--output FILE> When grabbing, write output to FILE rather than
standard output.

B<--days N> When grabbing, grab N days rather than as many as
possible.

B<--offset N> Start grabbing at today + N days.  N may be negative.

B<--quiet> suppress the progress messages normally written to standard
error.

=head1 SEE ALSO

L<xmltv(5)>, L<http://www.ananova.com/tv/>,
L<http://www.ananova.com/about/terms.html>

=head1 AUTHOR

Ed Avis, ed@membled.com

=head1 BUGS

Currently there is no means to edit a previous configuration, you have
to reconfigure from scratch.  There is also no system to prompt the
user about new channels that have appeared on the Ananova site,
although they do appear as warning messages.

=cut

use strict;
use XMLTV::Version '$Id: tv_grab_uk.in,v 1.96 2004/01/03 14:37:43 epaepa Exp $ ';
use Getopt::Long;
use Date::Manip;
use IO::File;
use Tie::RefHash;
use Data::Dumper;
use File::Find;

# Use Log::TraceMessages if installed.
BEGIN {
    eval { require Log::TraceMessages };
    if ($@) {
	*t = sub {};
	*d = sub { '' };
    }
    else {
	*t = \&Log::TraceMessages::t;
	*d = \&Log::TraceMessages::d;
	Log::TraceMessages::check_argv();
    }
}

# Use Term::ProgressBar if installed.
use constant Have_bar => eval { require Term::ProgressBar; 1 };

use XMLTV;
# We modify the XMLTV module's tables to add a <distribution> element
# to <channel>s.  We assume each channel has exactly one distribution
# method.  This list is to replace @XMLTV::Channel_Handlers where
# support for the extra element is needed.
#
my @new_channel_handlers
  = (@XMLTV::Channel_Handlers,
     [ 'distribution', 'scalar', '1' ],
    );

use XMLTV::TZ qw(gettz tz_to_num);
use XMLTV::Memoize;
use XMLTV::Ask;
use XMLTV::Config_file;
use XMLTV::Get_nice;
use XMLTV::Date;
use XMLTV::Usage <<END
To configure: $0 --configure [--config-file FILE]
To grab listings: $0 [--config-file FILE] [--output FILE] [--days N] [--offset N]
END
  ;

# Prototype declarations
sub get_copyright( $$ );
sub get_regions( $$ );
sub read_channels_page( $$ );
sub fix_times( $ );
sub fix_zero_length( $ );
sub extract_variant( $$ );
sub get_channel_pkgs();
sub init_channels();
sub get_pages( $$ );
sub configure();
sub grab( @ );
sub fix_ananova_xml( $ );

# Global channel data.
our @ch_all;
our %ch_by_a; # index by Ananova id, to 'set' of objects
tie %ch_by_a, 'Tie::RefHash::Nestable';
our %ch_by_x; # index by XMLTV id
our %ch_gone_by_x; # channels that vanished from the site
our %ch_transient_by_x; # channels that may vanish without warning

# Check options.  First do the undocumented --cache option (to cache
# get_nice_aux(), which retrieves web pages), then the normal ones.
#
my $using_cache
  = XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux');
my ($opt_days,
    $opt_help,
    $opt_output,
    $opt_share,
    $opt_configure,
    $opt_config_file,
    $opt_offset,
    $opt_quiet,
   );
$opt_days   = 4; # default
$opt_quiet  = 0; # default
$opt_offset = 0; # default today
GetOptions('days=i'        => \$opt_days,
	   'help'          => \$opt_help,
	   'configure'     => \$opt_configure,
	   'config-file=s' => \$opt_config_file,
	   'output=s'      => \$opt_output,
	   'share=s'       => \$opt_share, # undocumented
           'offset=i'      => \$opt_offset,
	   'quiet'         => \$opt_quiet,
	  )
  or usage(0);
die 'number of days must not be negative'
  if (defined $opt_days && $opt_days < 0);
if ($opt_help) {
    usage(1);
}

# share/ directory for storing channel mapping files.  This next line
# is altered by processing through tv_grab_uk.PL.  But we can use the
# current directory instead of share/tv_grab_uk for development.
#
# The 'source' file tv_grab_uk.in has $SHARE_DIR undef, which means
# use the current directory.  In any case the directory can be
# overridden with the --share option (useful for testing).
#
my $SHARE_DIR='/usr/share/xmltv'; # by grab/uk/tv_grab_uk.PL
t 'share directory hardcoded in script: ' . d $SHARE_DIR;
$SHARE_DIR = $opt_share if defined $opt_share;
t 'after setting from --share option: ' . d $SHARE_DIR;
my $OUR_SHARE_DIR = (defined $SHARE_DIR) ? "$SHARE_DIR/tv_grab_uk" : '.';
t 'directory containing tv_grab_uk stuff: ' . d $OUR_SHARE_DIR;
(my $CHANNEL_NAMES_FILE = "$OUR_SHARE_DIR/channel_ids") =~ tr!/!/!s;
t 'file of channel ids: ' . d $CHANNEL_NAMES_FILE;

# PHP page provided by Ananova to access their internal listings data.
# This is used as an inital substring of all the URLs gotten by this
# program.
#
my $BASE_URL = 'http://www.ananova.com/tv_listings/_xmltv.php';

# File that stores which channels to download.
my $config_file
  = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_uk', $opt_quiet);

# This list contains lines from the config file and is used only when
# grabbing.  We set it up now to catch errors about reading the config
# file early on.
#
my @config_lines;
if ($opt_configure) {
    XMLTV::Config_file::check_no_overwrite($config_file);
    say("Going to configure channels to download, please wait\n");
}
else {
    @config_lines = XMLTV::Config_file::read_lines($config_file);
}

# On Windows Date::Manip can have trouble finding the local timezone.
# Since the output listings shouldn't depend on the local timezone
# anyway, we just set it here.
#
Date_Init('TZ=+0000');

# Things will go horribly wrong if Ananova change pages from one day
# to the next while the script is running.  Assume they do it at
# midnight.
#
# This code shouldn't be affected by the Date::Manip bug with parsing
# 'now' after setting a different timezone: I don't know exactly when
# the changeover happens, and especially not on a day when clocks
# shift anyway.  So a one hour error matters little.
#
our $now;
$now = parse_date('now');
END {
    if (defined $now
	and UnixDate(parse_date('now'), '%Q') ne UnixDate($now, '%Q')) {
	warn "current day has changed, results may be messed up\n";
    }
}

# Memoize some date parsing routines, if possible.  FIXME move to
# XMLTV::Memoize.
#
eval { require Memoize };
unless ($@) {
    foreach (qw(parse_date UnixDate DateCalc Date_Cmp
		tz_to_num)) {
	Memoize::memoize($_) or warn "cannot memoize $_";
    }
}

# Download the two index pages now.
my $index_pages = get_pages('getting list of channels', [ qw(regions allchannels) ]);

# Set up the global channels and regions data.  This is needed for
# both configuration and grabbing.
#
init_channels();
read_channels_page($index_pages->{allchannels}->{content},
		   $index_pages->{allchannels}->{url});

# Then regions uses information already gathered about channels.
my $regions = get_regions($index_pages->{regions}->{content},
			  $index_pages->{regions}->{url});
my %region_display = reverse %$regions;

# Predefined channel packages for ease of configuration.
my %channel_pkgs = get_channel_pkgs();

if ($opt_configure) {
    configure();
}
else {
    my %g_args = ();
    if (defined $opt_output) {
	my $fh = new IO::File ">$opt_output";
	die "cannot write to $opt_output\n" if not $fh;
	%g_args = (OUTPUT => $fh);
    }
    grab(%g_args);
}


# Ask the user which channels to download, and write $config_file.
sub configure() {
#    local $Log::TraceMessages::On = 1;
    # FIXME need to make directory
    open(CONF, ">$config_file") or die "cannot write to $config_file: $!";
    my %chose_ch;

    # Need to ask:
    #
    # - Do you want the five main channels and if so what region?
    # - Do you have satellite?
    # - Do you have radio?  (incl. with Sky Digital)
    # - If satellite, prompt for channel packages.
    #
    my $terr = askBooleanQuestion
      ('Do you want to download the five main channels (for your region)?', 1);
    if ($terr) {
	my @region_dns = sort keys %region_display;
	die if not @region_dns;
	t '\%region_display: ' . d \%region_display;
	t 'region display names: ' . d \@region_dns;
	my $default_region = 'Carlton';
	if (not defined $region_display{$default_region}) {
	    warn "default region '$default_region' seems to have disappeared\n";
	    $default_region = $region_dns[0];
	}

	# FIXME commonize showing one name to the user and another
	# internally.
	#
	my $region_dn = askQuestion
	  ('Which region?', $default_region, @region_dns);
	my $aid;
	foreach (@region_dns) {
	    if ($_ eq $region_dn) {
		$aid = $region_display{$_};
		last;
	    }
	}
	die if not defined $aid;
	print CONF "region $aid\t# $region_dn\n";
    }

    my $dig = askBooleanQuestion
      ('Do you get digital or satellite television?', 0);
    if ($dig) {
	say( <<END
You now need to select which digital channels to download.  First
choose some predefined packages of channels.
END
  );
	my $finished_option = 'Finished choosing packages';
	die "can't have package named '$finished_option'!"
	  if defined $channel_pkgs{$finished_option};
	my %to_choose = %channel_pkgs;
	while (%to_choose) {
	    my @k = sort keys %to_choose;
	    my $default = $k[0];
	    my $chosen = askQuestion('Package to add: ',
				     $default, @k, $finished_option);
	    last if $chosen eq $finished_option;

	    # Now prompt about individual channels in this package.
 	    our @ch_in_pkg;
	    local *ch_in_pkg = delete $to_choose{$chosen};
	    my @r = askManyBooleanQuestions
	      (1, map { "Add channel $_->{main_display_name}?" } @ch_in_pkg);
	    die if @r != @ch_in_pkg;
	    foreach (@ch_in_pkg) {
		if (shift @r) {
		    unless ($chose_ch{$_->{xmltv_id}}++) {
			print CONF "channel $_->{xmltv_id}\t# from package $chosen\n";
		    }
		}
	    }
	}
    }

    my @radio_chs = grep { my $t = $_->{type}; defined $t and $t eq 'radio' }
      @ch_all;
    my $num_radio_chs = scalar @radio_chs;
    if ($num_radio_chs == 0) {
	say("Hmm, there don't seem to be any radio channels available.\n");
    }
    else {
	if (askBooleanQuestion("Download all $num_radio_chs national radio stations?", 1)) {
	    print CONF "type radio\n";
	}
    }
    say("Local radio channels must be selected individually,\n");

    for (;;) {
	my $in = ask(<<END
Do you want any other channels?

Enter the name of a channel, or '.' to finish:
END
  );
	$in =~ s/^\s+//; $in =~ s/\s+$//;
	last if $in eq '.';
	my @poss;
	foreach my $ch (@ch_all) {
	    for ($ch->{main_display_name}) {
		if (defined) {
		    push @poss, $ch if /\Q$in\E/i;
		}
		else {
		    warn "no main display name for channel $ch->{first_ananova_id}, not selecting\n";
		}
	    }
	}
	if (@poss == 0) {
	    say("No channel matches '$in'.");
	}
	elsif (@poss == 1) {
	    my $ch = $poss[0];
	    my $dn = $ch->{main_display_name};
	    if (askBooleanQuestion("Add channel $dn?", 1)) {
		my $xmltv_id = $ch->{xmltv_id};
		unless ($chose_ch{$xmltv_id}++) {
		    print CONF "channel $xmltv_id\t# $dn\n";
		}
	    }
	}
	elsif (1 <= @poss and @poss < 25) {
	    my %dn_to_ch;
	    foreach (@poss) {
		my $dn = $_->{main_display_name};
		warn "more than one channel called $dn"
		  if exists $dn_to_ch{$dn};
		$dn_to_ch{$dn} = $_;
	    }
	    my $none_option = 'None of the above are what I wanted';
	    die 'silly channel name' if exists $dn_to_ch{$none_option};
	    my $r = askQuestion('Which channel to add?',
				$poss[0]->{main_display_name},
				(sort keys %dn_to_ch), $none_option);
	    next if $r eq $none_option;
	    my $ch = $dn_to_ch{$r}; die if not defined $ch;
	    my $xmltv_id = $ch->{xmltv_id};
	    unless ($chose_ch{$xmltv_id}++) {
		print CONF "channel $xmltv_id\n";
	    }
	}
	elsif (25 <= @poss) {
	    say("'$in' matches lots of channels, be more specific.");
	}
	else { die }
    }

    close CONF or warn "cannot close $config_file: $!";
    say("Finished configuration.");
    exit();
}

# Grab listings and write them in XML.  Arguments are to be passed to
# XMLTV::Writer.
#
sub grab( @ ) {
    my $copyright = get_copyright($index_pages->{regions}->{content},
				  $index_pages->{regions}->{url});

    # The copyright message is in whatever encoding the XML web pages
    # use.  Assume that this is a superset of ASCII, and that our
    # output encoding is too; then we just strip non-ASCII characters.
    #
    for ($copyright) {
	# Try to preserve at least the magic word or magic symbol.
	s/\xa9/copyright/g if not /copyright/i;
	s/\xa9/(C)/g;
	tr/\200-\377//d;
    }

    # Hopefully nobody will confuse this copyright message for the
    # listings with the copyright in this program.
    #
    print STDERR "\n$copyright\n" unless $opt_quiet;

    # Now read the config file to find which files to download.  The
    # user can choose channels by type (radio or satellite), region,
    # XMLTV id, or 'ALL'.  But only some of these can be chosen with
    # --configure.  The choices 'satellite' and 'ALL' are not normally
    # user-visible, they'd be a huge set of channels.
    #
    my %wanted; tie %wanted, 'Tie::RefHash';
    my $line_num = 0;
    foreach (@config_lines) {
	++ $line_num;
	next if not defined;
	my $where = "$config_file:$line_num";
	if ($_ eq 'ALL') {
	    foreach (@ch_all) {
		$wanted{$_} = 1;
	    }
	}
	elsif (/^type\s+(radio|satellite)$/) {
	    my $type = $1;
	    foreach (@ch_all) {
		$wanted{$_} = 1 if defined $_->{type} and $_->{type} eq $type;
	    }
	}
	elsif (/^region\s+(.+)/) {
	    my $region = $1;

	    # For backward compatibility turn old-style Ananova
	    # regions like 'ne_11' into just numbers like '11'.
	    #
	    if ($region =~ /^[a-z]+_(\d+)$/) {
		$region = $1;
	    }
	    die "$where: bad region specification $region"
	      if $region =~ tr/0-9//c;

	    t "want region $region, marking channels";
	    foreach (@ch_all) {
		if ($_->{regions}->{$region}) {
		    t "marking channel $_->{xmltv_id}";
		    $wanted{$_} = 1;
		}
	    }
	}
	# No way to ask for channel packages directly, they are used
	# only by --configure.  So far.
	elsif (/^channel\s+(.+)/) {
	    my $xmltv_id = $1;
	    my $ch = $ch_by_x{$xmltv_id};
	    if (not defined $ch) {
		warn "$where: no channel with XMLTV id $xmltv_id, skipping\n";
		next;
	    }
	    $wanted{$ch} = 1;
	}
	else { die "$where: bad line\n" }
    }

    # Check that every channel is associated with some package (so it
    # could be chosen) or has actually been chosen.
    #
#    local $Log::TraceMessages::On = 1;
    my %in_some_pkg; # keys are channel objects
    foreach my $pkg (keys %channel_pkgs) {
	t 'setting %in_some_pkg entries for package: ' . d $pkg;
	$in_some_pkg{$_} = 1 foreach @{$channel_pkgs{$pkg}};
    }
    t '\%in_some_pkg=' . d \%in_some_pkg;
    CH: foreach (@ch_all) {
	next if $in_some_pkg{$_};

	# It's not in any channel package, but is it in a region?
	foreach my $rid (keys %$regions) {
	    next CH if $_->{regions}->{$rid};
	}

	# For now, we won't warn about radio channels.  FIXME we
	# should eventually handle these like TV ones, but maybe
	# shoving localradio off into their own bit.
	#
	my $type = $_->{type};
	next if defined $type
	  and $type eq 'radio' or $type eq 'localradio';

	my $xid = $_->{xmltv_id};
	my $aid = $_->{first_ananova_id};
	my $dn = $_->{main_display_name};
	warn "no channel package or region includes $dn ($xid, $aid)\n";
    }

    # We want to get them in day order, so that we can get the first day's
    # listings first.  This is becuase we need to get a complete day to
    # get the complete channel information, which must be written out
    # before any programmes.
    #
    # We download all the files at once, then process them one by one.
    #
    my @to_parse;
    if (keys %wanted) {
	my @chs = sort { $a <=> $b }
	  map { $_->{first_ananova_id} }
	    keys %wanted;
	die if not @chs;
	unless ($opt_quiet) {
	    print STDERR 'grabbing ' . (scalar @chs) . ' channels from '
	      . (scalar @ch_all) . " available\n";
	}

	my @urls_for_day;
	foreach (@urls_for_day) { die if not ref }
	my ($first_day, $last_day) = ($opt_offset + 1, $opt_offset + $opt_days);
	foreach my $day ($first_day..$last_day) {
	    my $CHS_PER_REQ = 10;
	    for (my $i = 0; $i < @chs; $i += $CHS_PER_REQ) {
		my $end = $i + $CHS_PER_REQ;
		$end = @chs if $end > @chs;
		my $url = "$BASE_URL?day=day$day&c=" . join('.', @chs[$i .. $end-1]);	
		push @{$urls_for_day[$day]}, $url;
	    }
	}

	my $num_to_get = 0;
        foreach (@urls_for_day) {
	    $num_to_get += scalar @$_ if defined;
	}
        my $bar = new Term::ProgressBar('downloading listings', $num_to_get)
	  if Have_bar && not $opt_quiet;

      DAY: foreach my $day ($first_day..$last_day) {
	    my (@success, @get_failed, @no_content);
	    foreach (@{$urls_for_day[$day]}) {
		t "downloading page: $_";
		my $got = get_nice($_);
		if (not defined $got) {
		    push @get_failed, $_;
		}
		elsif (not fix_ananova_xml(\$got)) {
		    push @no_content, $_;
		}
		else {
		    push @success, [ $got, $_ ];
		}
		update $bar if Have_bar && not $opt_quiet;
	    }
	    if (not @success) {
		if (not @get_failed and not @no_content) {
		    # Huh?  Every page should be in one of the lists.
		    die;
		}
		elsif (not @get_failed and @no_content) {
		    if ($day == 1) {
			die "no content for first day, giving up\n";
		    }
		    else {
			my $msg = "no content for day $day, ";
			my $last_day_with_content = $day - 1;
			my $new_days = $last_day_with_content - $opt_offset;
			$msg .= "assuming --days $new_days";
			$msg .= " --offset $opt_offset" if $opt_offset;
			warn "$msg\n";
			last DAY;
		    }
		}
		elsif (@get_failed and not @no_content) {
		    die "every single page for day $day failed to download, giving up\n";
		}
		elsif (@get_failed and @no_content) {
		    if ($day == 1) {
			die "first day had download failures and no content, giving up\n";
		    }
		    else {
			my $prev = $day - 1;
			warn "download and content failures for day $day, assuming --days $prev\n";
			last DAY;
		    }
		}
		else { die }
	    }
	    push @to_parse, @success;
	}
    }
    else {
	warn "apparently nothing to download\n";
    }

    my $bar = new Term::ProgressBar('parsing', scalar @to_parse)
      if Have_bar && not $opt_quiet;
    my ($encoding, $credits, %ch, @prog_lists);
    my $w;
    my %written_xid;
    foreach (@to_parse) {
	my ($xml, $url) = @$_;
	t "parsing content of $url";
	my $r;
	{
#	    local $Log::TraceMessages::On = 0;
	    local $SIG{__WARN__} = sub {
		# For some reason calling warn() here seems to break.
		my $msg = shift;
 		$msg = "warning: something's wrong" if not defined $msg;
		print STDERR "$url: $msg\n";
	    };
	    local $SIG{__DIE__} = sub {
		# And calling die() from here might not work
		# correctly, but it is better than the alternative of
		# doing an exit(1) because at least if you rethrow the
		# exception it can be caught by eval {}.
		#
		die "$url: $_[0]";
	    };
	    # But these carefully set up handlers don't catch all the
	    # exceptions thrown by XMLTV::parse().  Strange.  This
	    # eval {} block is an attempt to make sure the url gets
	    # prepended to every exception message that might occur.
	    #
	    eval {
		local @XMLTV::Channel_Handlers = @new_channel_handlers;
		$r = XMLTV::parse($xml);
	    };
	    die $@ if $@;
	}

	if (not defined $w) {
	    ($encoding, $credits) = ($r->[0], $r->[1]);
	    $w = new XMLTV::Writer(@_, encoding => $encoding);
	    $w->start($credits);
	    $w->comment($copyright);
	}
	else {
	    if ($encoding ne $r->[0]) {
		warn "different pages have different encodings";
	    }
	    # Don't check credits since we have no reliable and simple
	    # way to do it.
	    #
	}
	die if not defined $w;

	# Convert Ananova channel id to XMLTV id and write <channel>.
	my $ch = $r->[2]; # input channel hash
	foreach (keys %$ch) {
	    die if not defined;

	    my @xids = map { $_->{xmltv_id} }
	      keys %{$ch_by_a{$_}};
	    die if not @xids;
	    my $xid = $xids[0];
	    foreach my $other_xid (@xids[1 .. $#xids]) {
		die "different XMLTV ids ($other_xid, $xid) for Ananova id $_"
		  if $other_xid ne $xid;
	    }
	    next if $written_xid{$xid}++;

	    my $towrite = $ch->{$_};
	    $towrite->{id} = $xid;

	    # Add extra display names.
	    my $ch_obj = $ch_by_x{$xid};
	    if (not defined $ch_obj) {
		# Shouldn't happen.
		warn "no object for XMLTV id $_";
	    }
	    else {
		# Discard the existing display name data and replace it
		# with the list from the object.  Urgh, this is horrible.
		#
		my @new = ();
		my @dns = $ch_obj->{main_display_name};
		if (defined $ch_obj->{extra_display_names}) {
		    push @dns, @{$ch_obj->{extra_display_names}};
		}
		@dns = grep { defined } @dns;
		# Assume English, unless all digits.
		$towrite->{'display-name'}
		  = [ map { tr/0-9//c ? [ $_, 'en' ] : [ $_ ] } @dns ];
	    }

	    die if exists $ch{$xid};
	    $ch{$xid} = $towrite;
	}

	# Convert channel ids in programmes.
	my $prog_list = $r->[3];
	foreach (@$prog_list) {
	    my @xids = map { $_->{xmltv_id} }
	      keys %{$ch_by_a{$_->{channel}}};
	    die if not @xids;
	    my $xid = $xids[0];

	    # We checked this earlier, but doesn't hurt to check it again.
	    foreach (@xids[1 .. $#xids]) {
		die "different XMLTV ids for Ananova id $_->{channel}"
		  if $_ ne $xid;
	    }
	    $_->{channel} = $xid;
	}

	# Get rid of Ananova's bizarre system for follow-on
	# programmes.  The XMLTV way of dealing with them is not ideal
	# either, but it's the standard right now.
	#
	# We need to group the programmes by channel and call
	# fix_times() on those groups.
	#
	my %ch_todo;
	push @{$ch_todo{$_->{channel}}}, $_ foreach @$prog_list;
	foreach (keys %ch_todo) {
	    fix_times($ch_todo{$_});
	}

	# Change to numeric timezones.
	foreach (@$prog_list) {
	    for (grep {defined} $_->{start}, $_->{stop}) {
		s{^ (\d+) \s+ ([A-Z]+) $ }{"$1 " . tz_to_num($2)}egx;
	    }
	}

	push @prog_lists, $prog_list;
	update $bar if Have_bar && not $opt_quiet;
    }

    $bar = new Term::ProgressBar('writing', scalar @prog_lists)
      if Have_bar && not $opt_quiet;
    $w->write_channels(\%ch);
    foreach (@prog_lists) {
	# Remove the occasional zero-length programmes which pop up.  NB
	# we rely here on the ordering which Ananova generates within each
	# channel.
	#
#	fix_zero_length($_);

	$w->write_programme($_) foreach @$_;
	update $bar if Have_bar && not $opt_quiet;
    }

    $w->end() if defined $w;
}


# Extract the copyright message from the comment at the start of every
# page served.  Takes page content, returns the copyright message.
#
# Parameters:
#   content
#   URL (for error messages)
#
sub get_copyright( $$ ) {
    my ($content, $url) = @_;
    my $r;
    my @lines = split /\n/, $content;
    my $msg;
    while (@lines) {
	local $_ = shift @lines;
	if (s/^(\W*)Copyright\b/Copyright/) {
	    my $prefix = $1;
	    $msg .= "$_\n";
	    # Find following lines with the same prefix.
	    while (@lines) {
		local $_ = shift @lines;
		s/^\Q$prefix\E// or last;
		$msg .= "$_\n";
	    }
	    return $msg;
	}
    }
    warn "no copyright message found in $url\n";
}


# Reads the Ananova 'regions' page and sets up channel objects.  Also
# returns a hashref mapping region ids to region names.
#
# Parameters:
#   contents of Ananova 'regions' page
#   URL of that page (for error reporting)
#
sub get_regions( $$ ) {
    my ($xml, $url) = @_;
    my %r;

    # Call some internal routines of the XMLTV module to parse this
    # XML, which is similar to XMLTV's channel listings but not quite
    # the same.
    #
    # (Ignore attributes of the <tv> element.)
    #
    local @XMLTV::Channel_Handlers = @new_channel_handlers;
    t 'set \@XMLTV::Channel_Handlers to' . d \@XMLTV::Channel_Handlers;
    my $t = new XML::Twig
      (TwigHandlers =>
       { '/tv/region' => sub { do_region($url, \%r, @_) } });
    $t->parse($xml);

    # We just told some existing channel objects about their regions.
    # That is the main purpose of this subroutine.  But we also return
    # a hash mapping region ids to region names.
    #
    return \%r;
}
# Helper for get_regions().  First two args are URL and hash to modify,
# next two are those passed by XML::Twig.
#
sub do_region( $$$$ ) {
    my ($url, $r, $t, $node) = @_;
    # Get the Ananova region id.
    our %reg_attrs; local *reg_attrs = XMLTV::get_attrs($node);
    delete $reg_attrs{tag};	# old-style id, not used.
    my $reg_id = delete $reg_attrs{id};
    if (not defined $reg_id) {
	warn "$url: region with no id, skipping";
	next;
    }
    foreach (keys %reg_attrs) {
	warn "$url: unknown attribute $_ in region";
    }

    # Store the region id and name.
    my $fc = $node->first_child();
    my $reg_name = $fc->pcdata();
    if (not defined $reg_name) {
	warn 'first child of region should be region name';
	next;
    }
    $fc->delete();
    for ($reg_name) { s/^\s+//; s/\s+$// }
    $r->{$reg_id} = $reg_name;

    # Inside a <region> element are some <channel> elements.
    t 'parsing <channel>s inside <region>';
    foreach (XMLTV::get_subelements($node)) {
	if (XMLTV::get_name($_) ne 'channel') {
	    warn "$url: something other than channel inside region";
	    next;
	}
	t 'calling XMLTV::node_to_channel()';
	my $ch_data = XMLTV::node_to_channel($_);
	my @chs = keys %{$ch_by_a{$ch_data->{id}}};
	if (not @chs) {
	    warn "$url: channel $ch_data->{id} mentioned in regions but not channels";
	    next;
	}
		
	# Tell each channel with this Ananova id that it belongs to
	# this region.
	#
	++ $_->{regions}->{$reg_id} foreach @chs;
    }
}


# Read Ananova's 'allchannels' page.  This tells us about radio and
# satellite channels.
#
# Parameters:
#   contents of allchannels page
#   URL (for error reporting)
#
sub read_channels_page( $$ ) {
    my ($xml, $url) = @_;
#    local $Log::TraceMessages::On = 1;
    my $data;

    if (not defined $xml) {
	die "failed to get channels page $url\n";
    }
    fix_ananova_xml(\$xml) or die "$url: bad channels page content";

    t 'parsing XML for channels page: ' . d $xml;
    {
	eval {
	    local @XMLTV::Channel_Handlers = @new_channel_handlers;
	    t 'set \@XMLTV::Channel_Handlers to: ' . d \@XMLTV::Channel_Handlers;
	    $data = XMLTV::parse($xml);
	};
	die "$url: $@" if $@;
    }
    my ($encoding, $credits, $channels, $progs) = @$data;
    t '$encoding=' . d $encoding;
    t '$credits=' . d $credits;
    t '$channels=' . d $channels;
    t '$progs=' . d $progs;
    # Assume that the encoding is a superset of ASCII (we know that
    # only ASCII characters are interesting to us for parsing).
    #

    # FIXME ignoring the credits
    die if @$progs;

    # FIXME perhaps a cleaner way of doing this than parsing then
    # writing out again?
    #
    my (%type, %display);
    foreach my $id (keys %$channels) {
	my $c = $channels->{$id};
	die "undef \$channels->{$id}" if not defined $c;
	die "\$channels->{$id} not hash: $c" if ref $c ne 'HASH';
	die "\$channels->{$id} has no 'id' key" if not defined $c->{id};

	# FIXME handle multiple display names; keep lang attribute
	for ($c->{'display-name'}) {
	    if (not defined or @$_ == 0) {
		warn "no display names for channel $c->{id}, skipping";
		next;
	    }
	    elsif (@$_ == 1) {
		# Okay.
	    }
	    elsif (@$_ >= 2) {
		warn "skipping additional display names for $_->[0]->[0]";
	    }
	    else {
		die;
	    }
	}
	
	my ($type, $aid, $display) = ($c->{distribution}, $c->{id}, $c->{'display-name'}->[0]->[0]);
	die if ref $type;
	die if $id ne $aid;
	t "got line with type $type, aid $aid, dn $display";
	my @chs = keys %{$ch_by_a{$aid}};
	t 'maybe looked up by aid: ' . d \@chs;
	if (not @chs) {
	    t 'not found, create new channel object';
	    my $ch = {};
	    push @ch_all, $ch;
	    ++ $ch->{ananova_ids}->{$aid};
	    $ch->{first_ananova_id} = $aid;
	    ++ $ch_by_a{$aid}->{$ch};
	    if ($aid =~ /_(\d+)$/) {
		++ $ch->{regions}->{$1};
	    }
	    @chs = ($ch);
	}
	
	foreach my $ch (@chs) {
	    # Only set the display name based on the first (ie,
	    # best) Ananova id for this channel.  Otherwise we'd
	    # have a conflict between say 'Border' and 'Border
	    # (Scottish viewers)'.  Only the first of the two should
	    # have a chance to set the display name.
	    #
	    my $first_aid = $ch->{first_ananova_id};
	    t "first Ananova id: $first_aid";
	    if ($first_aid eq $aid) {
		t 'this Ananova channel is the first for this channel';
		for ($ch->{variant}) {
		    $display .= " ($_)" if defined;
		}
		$ch->{main_display_name} = $display;
		t "set main display name to $display";
	    }
	    # Additional display names are added from the channel_ids file
	    # but *not* from Ananova's extra names.
	    #
	    # No warning for duplicate display names, that often happens.
	    #

	    # Guess an XMLTV id if it wasn't set before.  This would go
	    # wrong if two channels had the same Ananova id, but that can
	    # only happen if it's specified in the channel_ids file -
	    # which also defines the XMLTV ids.
	    #
	    if (not defined $ch->{xmltv_id}) {
#		warn "no XMLTV id in channel_ids file for Ananova id $aid\n";
		my $id = "$aid.tv-listings.ananova.com";
		die if defined $ch->{xmltv_id};
		$ch->{xmltv_id} = $id;
		die "channel $id already exists"
		  if defined $ch_by_x{$id};
		$ch_by_x{$id} = $ch;
	    }

	    my %map = (terrestrial => 'terrestrial',
		       # FIXME losing distinction between ITV Digital and Sky
		       digital => 'satellite',
		       satellite => 'satellite',
		       localradio => 'localradio',
		       radio => 'radio',
		       unknown => 'duff',
		      );

	    my $t = $map{$type};
	    if (not defined $t) {
		warn "$url: unknown <distribution>: $type";
		$t = 'duff';
	    }
	    $ch->{type} = $t;
	}
    }

    # Clean up by removing channels in config but not on site.
    my @new_ch_all;
    foreach (@ch_all) {
	my $type = $_->{type};
	my $xmltv_id = $_->{xmltv_id};
	my $keep;
	if ($type eq 'duff') {
	    $keep = 0;
	}
	elsif ($type eq 'tentative') {
	    warn
"channel $xmltv_id ($_->{first_ananova_id}) not seen on site, remove from channel_ids file\n"
  unless $ch_transient_by_x{$xmltv_id};
	    $keep = 0;
	}
	else { $keep = 1 }

	if ($keep) {
	    push @new_ch_all, $_;
	}
	else {
	    delete $ch_by_x{$xmltv_id};
	    foreach my $aid (keys %{$_->{ananova_ids}}) {
		delete $ch_by_a{$aid}->{$_};
	    }
	    $ch_gone_by_x{$xmltv_id}++
	      && warn "strange, seen vanished $xmltv_id twice";
	}
    }
    @ch_all = @new_ch_all;
}


# extract_variant()
#
# Some channels are really two channels in one, for example Radio 4 FM
# and Radio 4 LW.  Programmes on the channel have titles prefixed with
# '(FM)', etc.  This routine filters out all programmes for a
# particular 'variant'.
#
# Parameters:
#   Reference to list of programmes (all on same channel)
#   Transmission method (variant) to look for, eg 'FM'.
#
# Side effects: programmes in the list which are not from this variant
# will be deleted.
#
# BTW: this supersedes the old special_radio4() handler in
# tv_extractinfo_en.  It seems more appropriate to handle it here
# since it is a pecularity of British listings and not English
# TV/radio listings in general.
#
sub extract_variant( $$ ) {
    our @progs; local *progs = shift;
    my $tr = shift;

    my @progs_new;
    foreach (@progs) {
	my $titles = $_->{title}; die if not defined $titles;

	# Find whether one (and only one) transmission method is mentioned.
	my $tr_this;

	foreach (@$titles) {
	    for my $text ($_->[0]) {
		t "trying to match text $text for (Something) at start...";
		if ($text =~ s/^\s*\(([A-Z]+)\)\s*//) {
		    t 'matches';
		    if (not defined $tr_this) {
			$tr_this = $1;
			t '$tr_this set to: ' . d $tr_this;
		    }
		    elsif (defined $tr_this and $tr_this eq $1) {
			# Okay, agrees with other titles.
		    }
		    elsif (defined $tr_this and $tr_this ne $1) {
			warn "titles disagree, giving both ($tr_this) and ($1)";
			# Just discard this value.
		    }
		    else { die }
		}
	    }
	}

	if (defined $tr_this) {
	    t "found a transmission method: $tr_this";
	    if ($tr_this eq $tr) {
		t 'matches, keep';
		push @progs_new, $_;
	    }
	    else {
		t "does not match $tr, discard this programme";
	    }
	}
	else {
	    t 'no transmission method given, assume programme on all variants';
	    push @progs_new, $_;
	}
    }

    @progs = @progs_new;
}
	

####
# Channels stuff
#

# In principle it should be possible to find all channel data by
# looking at the Ananova site.  However they don't use the RFC2838
# style names, and I also want to add some extra display names for
# channel numbers.  So there's a hardcoded table listing XMLTV channel
# ids, their equivalent on the Ananova site, and optionally an extra
# (short) display name for that channel.
#
# It can be hard to decide whether two related channels should map to
# the same internal name - eg digital and analogue versions of the
# same channel.  I have made different internal names if I know that
# the content will be different (eg BBC1 digital is different to any
# of the analogue region versions), but otherwise mapped both channels
# to the same internal name.  Sometimes there are annoying small
# differences which require differing internal names, else
# tv_sort complains.  The same applies to regional variants for
# channels which don't differ between regions (eg Channel 5).
#
# Piping the output through tv_sort will check that the two
# versions of a channel are indeed identical.
#
sub init_channels() {
#    local $Log::TraceMessages::On = 1;
    # See the distributed version of this file for info about the
    # format.
    #
    my $line_num = 0;
    foreach (XMLTV::Config_file::read_lines($CHANNEL_NAMES_FILE, 1)) {
	++ $line_num;
	next unless defined;
	my $where = "$CHANNEL_NAMES_FILE:$line_num";
	local $SIG{__DIE__} = sub( $ ) {
	    die "$where: $_[0]";
	};
	local $SIG{__WARN__} = sub( $ ) {
	    warn "$where: $_[0]";
	};

	my @fields = split /:/; # strips trailing empty fields
	die 'wrong number of fields' if @fields < 2 or @fields > 4;

	my ($xmltv_id, $ananova_ids, $transient, $extra_dn) = @fields;
	my @aids = split /,/, $ananova_ids;
	die "no Ananova ids\n" if not @aids;

	# Look for the notation X/V for a 'transmission variant' of an
	# Ananova channel, for example radio4/FM is Ananova channel
	# radio4, programmes listed as '(FM)' or without transmission
	# details.
	#
	# We do not support multiple Ananova channels here: just one
	# 'variant' per channel object.  It ought to be done properly,
	# with each variant associated with an Ananova id, but it
	# isn't.
	#
	my $variant;
	foreach (@aids) {
	    if (s!/(.+)$!!) {
		$variant = $1;
		warn "multiple Ananova ids not supported when one has variant\n"
		  if @aids > 1;
	    }
	}

	t "initializing channel with id $xmltv_id";
	my $ch = {};
	push @ch_all, $ch;
	$ch->{xmltv_id} = $xmltv_id;
	die "channel $xmltv_id already exists\n"
	  if defined $ch_by_x{$xmltv_id};
	$ch_by_x{$xmltv_id} = $ch;

	t 'setting Ananova ids: ' . d \@aids;
	++ $ch->{ananova_ids}->{$_} foreach @aids;
	$ch->{first_ananova_id} = $aids[0];
	++ $ch_by_a{$_}->{$ch} foreach @aids;
	foreach (@aids) {
	    /_(\d+)$/ && ++ $ch->{regions}->{$1};
	}

	if (defined $variant) {
	    t "setting variant $variant";
	    die if defined $ch->{ananova_ids} and keys %{$ch->{ananova_ids}} > 1;
	    $ch->{variant} = $variant;
	}
	t 'maybe adding extra display name: ' . d $extra_dn;
	if (defined $extra_dn) {
	    my $found = 0;
	    foreach (@{$ch->{extra_display_names}}) {
		if ($_ eq $extra_dn) {
		    $found = 1;
		    last;
		}
	    }
	    # FIXME also check doesn't match main display name
	    push @{$ch->{extra_display_names}}, $extra_dn unless $found;
	}

	for ($transient) {
	    if (not defined or not length) {
		# Not marked as transient; make warnings if not there.
	    }
	    elsif ($_ eq 'transient') {
		$ch_transient_by_x{$xmltv_id}++
		  && warn "strange, marked $xmltv_id as transient twice";
	    }
	    else {
		die "bad value for 'transient' field: $_";
	    }
	}

	$ch->{type} = 'tentative'; # not yet seen on site
    }

    # Check that the same Ananova id is not mapped to two xmltv ids.
    my $bad = 0;
    foreach (keys %ch_by_a) {
	my @xids = map { $_->{xmltv_id} }
	  keys %{$ch_by_a{$_}};
	die if not @xids;
	my $xid = $xids[0];
	foreach my $other_xid (@xids[1 .. $#xids]) {
	    next unless $other_xid ne $xid;
	    warn "different XMLTV ids ($other_xid, $xid) for Ananova id $_\n";
	    $bad = 1;
	}
    }
    die "channel id errors, exiting\n" if $bad;
}

# Return a hash mapping package names to lists of channel objects.
sub get_channel_pkgs() {
    # Each package is a file under channel_pkgs/ somewhere in share/.
    # The filename is the package name, and then channel ids are
    # listed one per line.
    #
    my $channel_pkgs_dir = "$OUR_SHARE_DIR/channel_pkgs";
    die "no directory $channel_pkgs_dir" if not -d $channel_pkgs_dir;
    my %pkgs;

    # ISTR some weirdness where certain versions of File::Find would
    # prepend the directory name and others would not.  But let's not
    # worry about that until we see it.
    #
    my $wanted = sub {
	return if -d;
	return if $File::Find::name =~ m!/CVS/!;
	local $_ = $_; # restore old value on exit

	(my $pkg_name = $File::Find::name)
	  =~ s!^\Q$channel_pkgs_dir/\E!! or die;
	die "package $pkg_name seen twice" if defined $pkgs{$pkg_name};
	my $line_num = 0;
	my $filename = $_; # relative to current directory
	foreach (XMLTV::Config_file::read_lines($filename, 1)) {
	    ++ $line_num;
	    next if not defined;

	    # Check the XMLTV id is actually known.
	    my $ch = $ch_by_x{$_};
	    if (not defined $ch) {
		warn "$pkg_name:$line_num: unknown channel id $_\n"
		  unless $ch_gone_by_x{$_}; # one warning is enough
	    }
	    else {
		push @{$pkgs{$pkg_name}}, $ch;
	    }
	}
    };
    find($wanted, $channel_pkgs_dir);
    return %pkgs;
}


# get_pages()
#
# Fetch the given 'pages' from Ananova.  Attempt to combine multiple
# page fetches.
#
# Parameters:
#   text to use for progress bar
#   listref of page names to download, eg for show=regions name is 'regions'
#
# Returns: reference to hash mapping page name to a hash with 'url'
# and 'content'.  Although 'url' may be shared with other pages.
#
# Dies if a page cannot be downloaded.
#
sub get_pages( $$ ) {
#    local $Log::TraceMessages::On = 1;
    my ($text, $pages) = @_;
    my @pages = @$pages;
    foreach (@pages) { die if not defined }
    my %r;
    my $num_pages = scalar @pages;
    t "initializing progress bar with $num_pages items";
    my $bar = new Term::ProgressBar($text, $num_pages)
      if Have_bar && not $opt_quiet;

    # FIXME currently multiple pages are not supported.
    my $max_at_once = 1;

    my @got;
    while (@pages) {
	my @this_fetch = ();
	die if $max_at_once != 1;
	while (@pages and (@this_fetch < $max_at_once)) {
	    push @this_fetch, shift @pages;
	}
	t 'this fetch: ' . d \@this_fetch;
	t 'remaining to fetch: ' . d \@pages;
	my $url = "$BASE_URL?" . join('&', map { "show[]=$_" } @this_fetch);
	t "fetching url: $url";
	foreach (@this_fetch) { $r{$_}->{url} = $url }

	my $retries = 3;
      GET:
	my $got = get_nice $url;
	die "cannot fetch $url" if not defined $got;

	# FIXME need to split up pages, when multiple ones are
	# returned.
	#
	push @got, $got;

	if (@got != @this_fetch) {
	    warn 'expected ' . (scalar @this_fetch) .
	      " pages from URL $url, got " . (scalar @got);
	    if ($retries--) {
		warn "retrying $url\n";
		goto GET;
	    }
	    else {
		die "really cannot fetch $url, giving up\n";
	    }
	}
	update $bar if Have_bar && not $opt_quiet;
	foreach (@this_fetch) {
	    die "page $_ requested twice" if defined $r{$_}->{content};
	    $r{$_}->{content} = shift @got;
	}
    }
    die if (keys %r) != $num_pages;
    return \%r;
}


# fix_times()
#
# Often there are follow-on programmes like Weather in 'News;
# Weather'.  Ananova represents these by having the followon overlap
# the last 15 seconds of the main programme.  This isn't the XMLTV
# way, which is to use clumpidx.  Neither answer is ideal but we want
# to follow the existing standard.
#
# Parameters: a reference to a list of programmes for one channel.
#   The programmes will be modified, but not the list itself.
#
sub fix_times($) {
#    local $Log::TraceMessages::On = 1;
    die 'usage: fix_times(ref to list of programmes)' if @_ != 1;
    t 'fix_times() ENTRY';
    my @progs = @{shift()}; # take a copy

    for (my $prev = shift @progs; my $p = $progs[0]; $prev = shift @progs) {
	die if $prev->{channel} ne $p->{channel};
	my $start = $p->{start};
	my $stop = $p->{stop};
	my $prev_stop = $prev->{stop};
	if (defined $stop and defined $prev_stop) {
	    my $start_p = parse_date($start);
	    my $stop_p = parse_date($stop);
	    my $prev_stop_p = parse_date($prev_stop);
	    my $start_plus_15 = DateCalc($start_p, '+ 15 seconds');
	    die if not defined $start_plus_15;
	    if (not Date_Cmp($start_plus_15, $prev_stop_p)
		and not Date_Cmp($stop_p, $prev_stop_p)) {
		# Found the magic 15 second difference.  Fix it.
		t 'found 15 second difference';
		t '$p=' . d $p;
		t '$prev=' . d $prev;
		if (defined $prev->{clumpidx} or defined $p->{clumpidx}) {
		    # I really don't expect this to happen.
		    warn "cannot add clumpidx to programmes at $start_p-|$stop_p on $p->{channel}, already there";
		    next;
		}
		$prev->{clumpidx} = '0/2';
		$p->{clumpidx} = '1/2';
		$p->{start} = $prev->{start};
		t '$p now: ' . d $p;
		t '$prev now: ' . d $prev;
	    }
	}
    }
    t 'fix_times() EXIT';
}


# fix_zero_length()
#
# I once saw 'News' with start time 0700 and stop time 0700, and then
# on the same channel immediately afterwards, 'News' from 0700 to
# 0705.  This subroutine looks for similar glitches and removes the
# zero-length programme.
#
# Parameter: ref to list of programmes: the programmes will not be
# modified, but the list may be.
#
sub fix_zero_length( $ ) {
#    local $Log::TraceMessages::On = 1;
    t 'fix_zero_length() ENTRY';
#    t 'progs: ' . d $_[0];
    our @progs; local *progs = shift;
    foreach (@progs) { die if not defined }

    # Consider all consecutive pairs of programmes.
    my $last = $#progs - 1;
    for (my $i = 0; $i <= $last; $i++) {
	my $first = $progs[$i];
	if (not defined $first) {
	    # But we already checked that each elem of @progs was
	    # defined!
	    #
	    warn "strange, found undef element \$progs[$i], skipping\n";
	    next;
	}

#	t 'first: ' . d $first;
	die if not defined $first->{start};
	die if not defined $first->{channel};
	die if not defined $progs[$i+1];
	die if not defined $progs[$i+1]->{start};
	die if not defined $progs[$i+1]->{channel};

	my $first_stop = $first->{stop};
	next if not defined $first_stop;
	my $first_start = $first->{start};
	next if $first_stop ne $first_start;

	# OK, the first programme is zero length.
	my $second = $progs[$i+1];
	my $second_start = $second->{start};
	next if $first_start ne $second_start;

	# A likely candidate.  Check the non-time details are identical.
	t 'suspect: ' . d $first;
	t '...and: ' . d $second;
	my $first_notimes = { %$first };
	my $second_notimes = { %$second };
	foreach ($first_notimes, $second_notimes) {
	    delete $_->{start};
	    delete $_->{stop};
	}
	t 'without times: ' . d $first_notimes;
	t '...and: ' . d $second_notimes;

	# FIXME Data::Dumper doesn't always work, as shown by tv_sort.
	if (Dumper($first_notimes) eq Dumper($second_notimes)) {
	    # The first programme is a duplicate of the second,
	    # except that it has zero length.  Remove it.
	    #
	    t 'identical, removing the first';
	    splice(@progs, $i, 1);
#	    t 'progs now: ' . d \@progs;
	    -- $i;
	    -- $last;
	}
    }
    t 'fix_zero_length() EXIT';
}


# fix_ananova_xml()
#
# The XML returned by _xmltv.php is not quite well-formed or valid or
# semantically sensible; fix what we can and check for some common errors.
# Modifies the string passed in and returns true iff it should now be
# a parsable XMLTV document.
#
# If this routine returns false, for the time being you can assume
# that means 'moderately sane - but no content'.
#
sub fix_ananova_xml( $ ) {
    for (${ $_[0] }) {
	# Work around bad characters.
	s/\s&\s/ &amp; /g;

	# Empty <category> (and possibly other) elements.
	# (Spurious backslash in regexp to keep Emacs happy.)
	if (s!<(\w+) lang="\w+"(?:\s+x-\w+="[^\"]*")*></\1>!!) {
#           warn "got page $_ containing empty $1\n";
	}
	
	if (/^\s*Can\'t connect to db$/) {
	    # This is so serious we abort the whole program.
	    die "\nWeb site giving 'Can't connect to db' error.  Please try again later.\n";
	}
	
	if (not /<tv/) {
	    # No top-level <tv> element.  Don't warn, this is actually
	    # a common occurrence.
	    #
	    return 0;
	}

	# Seems okay...
	return 1;
    }
}
