#! /usr/bin/perl

sub error {
    print STDERR "error on $ARGV:$. - $_\n";
}

sub tail_error {
    local($linenum) = @_;

    print STDERR "error on $ARGV:$linenum - $InputLine[$linenum]\n";
}


$_ = <>;	m:^<HTML>$: || &error;
$_ = <>;	m:^<HEAD>$: || &error;
$_ = <>;	(($title) = m:^<TITLE> *(.*) *</TITLE>$:) || &error;
$_ = <>;	m:^</HEAD>$: || &error;
$_ = <>;	m:^<BODY>$: || &error;
$_ = <>;	m!^<B>Connected: An Internet Encyclopedia</B>$! || &error;
$_ = <>;	m:^<BR>$: || &error;
$_ = <>;	(($title2) = m:^<EM> *(.*) *</EM>$:) || &error;
($title2 eq $title) || &error;
$_ = <>;	m:^<HR>$: || &error;
$_ = <>;	m:^<CENTER>$: || &error;
$_ = <>;	m!^<B>Top:</B> <A HREF="/Connected/index.html">Connected: An Internet Encyclopedia</A>$! || &error;
$_ = <>;

$up_count = 0;
while (($up_url, $up_name) = m!^<BR><B>Up:</B> *<A HREF="([^"]*)">(.*)</A>$!) {
    $up_url[$up_count] = $up_url;
    $up_name[$up_count] = $up_name;
    $up_count ++;
    $_ = <>;
}

		m:^</CENTER>$: || &error;

$_ = <>;

if (m:^<FORM METHOD="POST" ACTION="/Connected/cgi-bin/search.cgi">$:) {
    $searchForm = "yes";
    $_ = <>;	m:^<TABLE>$: || &error;
    $_ = <>;	(($searchURL) = m!^<TR><TD><B>Search:</B> <TD><INPUT NAME="SEARCH_STRING"> <INPUT TYPE=HIDDEN NAME="BASE_URL" VALUE="([^"]+)">$!) || &error;
    # I'd like to check $searchURL here to make sure it's correct, but
    # first I need to figure out what my current URL is!
    $_ = <>;	m!^<TD><B>Search Depth:</B> <TD><SELECT NAME="DEPTH"><OPTION>0<OPTION SELECTED>1<OPTION>2</SELECT>$! || &error;
    $_ = <>;	$_ eq "<TR><TD><B>Search Type:</B><TD><INPUT TYPE=\"RADIO\" NAME=\"TYPE\" VALUE=\"STRING\" CHECKED>String <INPUT TYPE=\"RADIO\" NAME=\"TYPE\" VALUE=\"REGEX\">RegEx\n" || &error;
    $_ = <>;	$_ eq "<TD><B>Search Options:</B> <TD><INPUT TYPE=\"CHECKBOX\" NAME=\"CASE_INDEPENDANT\" CHECKED>NoCase <INPUT TYPE=\"CHECKBOX\" NAME=\"WHOLE_WORD\" CHECKED>WholeWord\n" || &error;
    $_ = <>;	m:^</TABLE>$: || &error;
    $_ = <>;	m:^</FORM>$: || &error;
    $_ = <>;
}


# These conditional constructs are for handling the various combinations
# of links that can appear after "Up".  We can have:
#   1. nothing
#   2. PREV
#   3. NEXT
#   4. PREV BR NEXT
# In any case, we have already read a line ahead, and end having read a line
# ahead, so the <HR><P> match doesn't need a $_=<> paired with it

if (($prev_url, $prev_name)=m!^<B>Prev:</B> *<A HREF="([^"]*)"> *(.*) *</A>$!){
    $_ = <>;
    goto end_of_conditionals if (! m:^<BR>$:);
    $_ = <>;
}

if (($next_url, $next_name)=m!^<B>Next:</B> *<A HREF="([^"]*)"> *(.*) *</A>$!){
    $_ = <>;
}

end_of_conditionals:
m:^<HR><P>$: || &error;



# Read the rest of the file, storing the lines, so we can check the ending
# by going backwards from EOF

$LastHeaderLine = $.;

while (<>) {
    $InputLine[$.] = $_;
}


# Check the ending

$lineno = $.;

$InputLine[$lineno]   =~ m:^</HTML>$: || &tail_error($lineno);
$InputLine[--$lineno] =~ m:^</BODY>$: || &tail_error($lineno);
if ($InputLine[--$lineno] =~ m:<P><A HREF="/Connected/cgi-bin/:) {
    $lineno--;
}
(($title3) = ($InputLine[$lineno] =~ m:^<EM> *(.*) *</EM>$:)) ||
    &tail_error($lineno);
($title3 eq $title) || &tail_error($lineno);
$InputLine[--$lineno] =~ m:^<BR>$: || &tail_error($lineno);
$InputLine[--$lineno] =~ m!^<B>Connected: An Internet Encyclopedia</B>$! ||
    &tail_error($lineno);

# The trailer is also conditional, but simpler than the header.
# All it can have is a NEXT

if ($InputLine[--$lineno] =~ m:^<HR>$:) {
    $InputLine[--$lineno] =~ m:^</CENTER>: || &tail_error($lineno);
    (($next2_url, $next2_name) = ($InputLine[--$lineno] =~ m!^<B>Next:</B> *<A HREF="([^"]*)"> *(.*)</A>$!)) || &tail_error($lineno);
    $InputLine[--$lineno] =~ m:^<CENTER>$: || &tail_error($lineno);
    $InputLine[--$lineno] =~ m:^<P><HR>$: || &tail_error($lineno);
} else {
    $InputLine[$lineno] =~ m:^<P><HR>$: || &tail_error($lineno);
}

$FirstTrailerLine = $lineno;

($next2_url eq $next_url) || &tail_error($lineno);
($next2_name eq $next_name) || &tail_error($lineno);



# Print information about the header

print "$ARGV	Title	$title\n";
if ($searchForm eq "yes") {
    print "$ARGV	SearchForm\n";
}
for ($i=0; $i < $up_count; $i ++) {
    print "$ARGV	Up	$up_name[$i]	$up_url[$i]\n";
}
print "$ARGV	Prev	$prev_name	$prev_url\n" if ($prev_url ne "");
print "$ARGV	Next	$next_name	$next_url\n" if ($next_url ne "");



# Run through the body, checking for all sorts of things.
#
# We make a copy of each line, then remove HREFs as they are found.
# This lets us loop until all HREFs on the line have been found.
# Likewise for <EM> items and subheadings.

for ($lineno = $LastHeaderLine+1; $lineno < $FirstTrailerLine-1; $lineno ++) {
    $line = $InputLine[$lineno];
    while (($link, $title) =
	($line =~ m:<A HREF="([^"]+)">([^<]*)</A>:)) {
	$line =~ s:<A HREF="[^"]+">[^<]*</A>::;
	print "$ARGV	Link	$title	$link\n";
    }
    while (($EMitem) =
	($line =~ m:<EM>([^<]+)</EM>:)) {
	$line =~ s:<EM>[^<]+</EM>::;
	print "$ARGV	EM	$EMitem\n";
    }
    while (($subhead) =
	($line =~ m:<H4>([^<]+)</H4>:)) {
	$line =~ s:<H4>[^<]+</H4>::;
	print "$ARGV	SubHead	$subhead\n";
    }
}
