#!/usr/local/bin/perl -- -*-perl-*-

# ------------------------------------------------------------
# search.cgi - Search an RFC for a regex
# Based on...
#
# submit_job.pl, by Kyle Hourihan
#   most Perl Code thanks to Reuven M. Lerner (reuven@the-tech.mit.edu).
#   modified by Eli Rosenblatt
#
# Last updated: 8/30/95
#
# job_input.pl allows a recruiter to post new jobs listings in 
# a WWW html form and outputs the response to a the server.
# This script requires Perl and should run on any CGI-compatible 
# HTTP server.
# 
# ------------------------------------------------------------

# ------------------------------------------------------------

# job_post.pl is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any
# later version.


# You should have received a copy of the GNU General Public License
# along with Form-mail; see the file COPYING.  If not, write to the Free
# Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
# ------------------------------------------------------------

# NOTE:  Check the variables below the splits() for server specific paths 


# open(CONSOLE,">/dev/console");
# select(CONSOLE); $| = 1; select(STDOUT);

sub searchRAWDATA {
    while (<RAWDATA>) {

	if (($FORM{CASE_INDEPENDANT} eq "on" && /$RegEx/io) ||
	    ($FORM{CASE_INDEPENDANT} ne "on" && /$RegEx/o)) {

	    $Hits ++;

	    # Convert the line into a displayable format by escaping all
	    # the HTML specials.  If we don't do this, then we can get into
	    # a situation where a matching line has an <EM> tag (for example),
	    # but not an </EM> tag.  So we have to watch all the different
	    # HTML sequences and make sure they end correctly for the next
	    # line.  No thanks.  Easier to just do this...

	    $HitLine = $_;

	    if (! $searchingRFCindex) {
		$HitLine =~ s/&/&amp;/g;
		$HitLine =~ s/</&lt;/g;
		$HitLine =~ s/>/&gt;/g;
	    }

	    $HitLines .= $HitLine;
	}

	while (($link) = m:<A HREF="([^"]+)">:) {
	    s:<A HREF="[^"]+">:: ;
	    @Links = ($link, @Links) if $link =~ m:^/Connected/:;
	}

	if (($text) = m:<TITLE>(.*)</TITLE>:) {
	    $title = $text;
	}
    }
}

sub searchurl {
    local($Target) = @_;
    local($Input);
    local($searchingRFCindex) = 0;
    local($Hits) = 0;
    local($HitLine);
    local($HitLines) = "";
    local($link);
    local($text);
    local($title) = "[Title unavailable]";

    # Special case here for the encylopedia - don't do the overhead
    # of an HTTP transfer, just read the file.  (Of course, only
    # $Targets should make it this far, anyway...

    $Input = "/usr/local/bin/url_get $Target|";
    $Input = "<../../$Target"  if ($Target =~ m:^/Connected/:);
    if ($Target eq "/Connected/RFC/index.html") {
	$Input = "<../RFC/indextable";
	$searchingRFCindex = 1;
	$HitLines .= "<TABLE>\n";
    }

    open(RAWDATA, "$Input");
    &searchRAWDATA;

    if ($Target =~ m:/index.shtml$:) {
	$ChildList = $Target;
	$ChildList =~ s:/index.shtml$:/list_of_children:;
	if ( -r "../../$ChildList") {
	    open(RAWDATA, "<../../$ChildList");
	    &searchRAWDATA;
	}
    }

    $HitLines .= "</TABLE>\n"  if ($searchingRFCindex);

    return "$Hits\t$Target\t$title\t$HitLines";
}



# Get the input
read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});

# Split the name-value pairs
@pairs = split(/&/, $buffer);

foreach $pair (@pairs)
{
    ($name, $value) = split(/=/, $pair);

    # Un-Webify plus signs and %-encoding
    $value =~ tr/+/ /;
    $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;

    # Stop people from using subshells to execute commands
    # Not a big deal when using sendmail, but very important
    # when using UCB mail (aka mailx).
    # $value =~ s/~!/ ~!/g; 
    # Uncomment for debugging purposes
    # print "Setting $name to $value<P>";

    $FORM{$name} = $value;
}

$BaseURL = $FORM{BASE_URL};
$RegEx = $FORM{SEARCH_STRING};

# Limit the search depth (really should return error on bad values)
$Depth = $FORM{DEPTH};
$Depth = 0 if ($Depth < 0);
$Depth = 2 if ($Depth > 2);

# If user requested a string search, escape regular expression specials
$RegEx =~ s/(\W)/\\$1/g    if ($FORM{TYPE} eq "STRING");

# If user requested a whole word search, tack on RegEx marks
$RegEx = "\\b$RegEx\\b"    if ($FORM{WHOLE_WORD} eq "on");




print "Content-type: text/html\n\n";

print "<HTML>\n";
print "<HEAD>\n";
print "<TITLE>Search Results: $RegEx</TITLE>\n";
print "</HEAD>\n";
print "<BODY>\n";
print "<CENTER><H3>Search Results: $RegEx</H3></CENTER><P>\n";
print "<PRE>\n";


$searches = 0;
$SearchedLinks = " ";
@Links = ($BaseURL);

sub onepass {
    local(@OldLinks) = @Links;

    @Links = ();

    foreach $link (@OldLinks) {
	if (! ($SearchedLinks =~ / $link /)) {
	    $searches ++;
	    $result = &searchurl($link);
	    $FileScores[$#FileScores+1] = $result if $result > 0;
	    $SearchedLinks .= " $link";
	}
    }
}

for ($i = 0; $i <= $Depth; $i ++) {
    &onepass;
}

$linksHit = $#FileScores + 1;

print "Base URL:         $BaseURL\n";
print "Search string:    $RegEx\n";
print "Search depth:     $Depth\n";
print "Links searched:   $searches\n";
print "Links hit:        $linksHit\n";
print "\n";

@FileScores = sort {$b <=> $a;} @FileScores;

foreach $filescore (@FileScores) {
    local($file, $score);
    ($score, $file, $title, $hits) = split(/\t/,$filescore,4);

    print "\n\n\n";
    printf "%3d  ", $score;
    print "<A HREF=\"$file\">$title</A>\n";
    print $hits;
}

print "</PRE>\n";
print "</BODY>\n";
print "</HTML>\n";
