#!/usr/bin/perl

# 4/13/96 Michael Smith 
# for WebGlimpse
# see http://glimpse.cs.arizona.edu/webglimpse for more information

# returns full search box (with preference to the referring page)
#########################################################################

# The following variables are changed by wginstall
$WEBGLIMPSE_HOME = "/usr/src/redhat/BUILD/webglimpse-1.5";
$CGIBIN = "cgi-bin";
$GLIMPSE_LOC = "/usr/bin/glimpse";
$CONVERT_LOC = "/usr/bin/wgconvert";
# End list of changed variables

# lib directory
$WEBGLIMPSE_LIB = "$WEBGLIMPSE_HOME/lib";

# we don't need to know the settings; all done by webglimpse
$nh_pre=".nh.";
$REMOTEDIR = ".remote";
$MAPFILE = ".wgmapfile";

# name of config file
$CONFIGFILE = "archive.cfg";

# start the header
print "Content-type: text/html\n\n";

# ENV variables 
# path info contains the archive directory
$archivepwd = $ENV{'PATH_INFO'};

if ($archivepwd =~ m/^\s*\&/) {
    # Don't let anybody open specific descriptors.
    &err_file ($archivepwd);
    die ("UNREACHABLE REACHED");
}

# Get virtual host if any
$vhost = "";
open (F, "<$archivepwd/$CONFIGFILE") || &err_file ("$archivepwd/$CONFIGFILE");
while (($_ = <F>) && ($vhost == "")) {
	if (/^vhost/) {
		($nullvar, $vhost) = split(/\s/);
	}
}
close(F);
if ($vhost eq "default") {
	$vhost = "";
}

#########################################################################
#########################################################################
### SITE CONF STUFF
#########################################################################
#########################################################################
$wgConfPath = "$WEBGLIMPSE_HOME/$vhost.wgsiteconf";
$prefix = "^DirectoryIndex|^UserDir|^Alias|^ScriptAlias|^DocumentRoot";

$DirectoryIndex="";
$UserDir="";
$DocumentRoot="";
# @AliasList=();
# @ScriptAliasList=();
# @ServerCache=();
$Port="";
$Server="";
$ServerAddress="";
#########################################################################
### END SITE CONF STUFF
#########################################################################

#---------------------------------
# make my libraries more important
unshift(@INC, "$WEBGLIMPSE_LIB");
require "config.pl";

&siteconf_ReadConf();
print "<HEAD>\n";

#  Check that a query has been made
$query = $ENV{'QUERY_STRING'};

#	Strip the variables out from the query string,
#	and assign them into variables, prefixed by 'QS_'
foreach $pspec (split (/\&/, $query)) {
	$pname = '';
	$pvalue = '';
	($pname, $pvalue) = (split (/=/, $pspec));

# Decode form results (hex characters, spaces etc)
	$pvalue = &www_form_urldecode($pvalue);
	$pname = &www_form_urldecode($pname);

	if ($pname =~ /^[a-zA-Z0-9_]*$/ ) {
		$varname = "QS_$pname";
		$$varname = $pvalue;
	}
}

# query is the file
$file = $QS_file;
$file =~ s/\'//g;
# print "file: $file<br>\n";

$show_neighborhood=0;
if($QS_shownh){
	$show_neighborhood=$QS_shownh;
}

# go read the config file in the archive
if(&TestConfig($archivepwd)==0){
	&err_conf;
}
($title, $archiveurl, $traverse_type, $explicit_only, $numhops,
 $nhhops, $local_limit, $remote_limit, $addboxes, @urllist) = ReadConfig($archivepwd);

# look for the title for this page
$thistitle = &lookup_title($file);

### TO DO -- error if we can't find the file's title; all indexed files should
###          have at least 'No title'

if($file ne ""){
	print "<title>Full search from $thistitle</title>\n";
}else{
	print "<title>Full search</title>\n";
}
print "</head>\n\n";

print "<body><center>\n";
print "<table border=5>\n";
print "<tr><td align=center valign=middle>\n";
print "<a href=http://glimpse.cs.arizona.edu/webglimpse>\n";
print "<img src=$archiveurl/.glimpse-eye.jpg align=middle></td>\n";
print "<td align=center valign=middle>\n";
print "<a href=http://glimpse.cs.arizona.edu/webglimpse>\n";
print "<font size=+3>WebGlimpse </a> Search<br></font></td>\n";
print "</tr>\n\n";

print "<tr><td colspan=2>\n";
print "<FORM method=get ACTION=/$CGIBIN/webglimpse$archivepwd>\n";
if($file ne ""){
	$link = &siteconf_LocalFile2Url($file);
	print "<input name=file type=hidden value=$file>\n";
	print "Search:\n";
	print "<INPUT TYPE=radio NAME=scope VALUE=neighbor CHECKED>\n";
	print "The neighborhood of <a href=$link>$thistitle</a>\n";
	print "<INPUT TYPE=radio NAME=scope VALUE=full>The full archive: $title\n";
}else{
	print "<center>Search on the entire archive.</center>\n";
}
print "</td></tr>\n\n";

print "<tr><td colspan=2>\n";
print "String to search for: <INPUT NAME=query size=30>\n";
print "<INPUT TYPE=submit VALUE=Submit>\n";
print "<br>\n";
print "<center>\n";
print "<INPUT NAME=case TYPE=checkbox>Case&#160;sensitive\n";
print "<!-- SPACES -->&#160;&#160;&#160;\n";
# print "<!-- SPACES -->&#160;&#160;&#160;&#160;&#160;&#160;&#160;\n";
print "<INPUT NAME=whole TYPE=checkbox>Partial&#160;match\n";
print "<!-- SPACES -->&#160;&#160;&#160;\n";
print "<INPUT NAME=lines TYPE=checkbox>Jump&#160;to&#160;line\n";
print "<!-- SPACES -->&#160;&#160;&#160;\n";
print "<SELECT NAME=errors align=right>\n";
print "<OPTION>0\n";
print "<OPTION>1\n";
print "<OPTION>2\n";
print "</SELECT>\n";
print "misspellings&#160;allowed\n";
print "<br>\n";
print "</center>\n";
print "Return only files modified within the last <INPUT NAME=age size=5>\n";
print "days.\n";
print "<br>\n";
print "Maximum number of files returned:\n";
print "<SELECT NAME=maxfiles>\n";
print "<OPTION>10\n";
print "<OPTION selected>50\n";
print "<OPTION>100\n";
print "<OPTION>1000\n";
print "</SELECT>\n";
print "<br>Maximum number of matches per file returned:\n";
print "<SELECT NAME=maxlines>\n";
print "<OPTION>10\n";
print "<OPTION selected>30\n";
print "<OPTION>50\n";
print "<OPTION>500\n";
print "</SELECT>\n";
print "<br>\n";
print "</FORM>\n";
print "</td></tr>\n";
print "<tr><td colspan=2>\n";
print "<center>\n";
print "<font size=-2><a href=http://glimpse.cs.arizona.edu>\n";
print "Glimpse</a> and <a href=http://glimpse.cs.arizona.edu/webglimpse>\n";
print "WebGlimpse</a>, Copyright &copy; 1996, \n";
print "University of Arizona\n";
print "</center>\n";
print "</font></td></tr>\n";
print "</table></center>\n";
print "</center>\n\n<p>";

$path_info = $ENV{'PATH_INFO'};

if($file ne ""){
	if($show_neighborhood!=0){
		# now add the files in that neighborhood
		print "<hr><br>Pages in the neighborhood of $file:\n";
		$neighborhood="$file";
		# add the prefix
		$neighborhood=~ s/([^\/]+)$/$nh_pre$1/;
	
	        #CALL CONVERT HERE BEFORE OPENING NEIGHBOURHOOD FILE --> bgopal oct/6/96
	        #$cmd = "$CONVERT_LOC -U -in -H $path_info < $neighborhood 2>&1 |";
		#DON'T USE -U SINCE I WANT ALL INFORMATION...
	        $cmd = "$CONVERT_LOC -in -H $path_info < $neighborhood 2>&1 |";
	        if (!open(NH, $cmd )) {
	          print "No neighborhood file found.\n";
	        }
		else {
			print "<ul>\n";
			while(<NH>){
				#$output = $_;
				$tempoutput = $_;
				chomp $tempoutput;
				@outputarray = split(" ", $tempoutput);
				$output = @outputarray[0];
				shift outputarray;
				$link = @outputarray[0];
				shift outputarray;
				$title = join(" ", @outputarray);
				next if ($output =~ /^$archivepwd\/$file$/);   # skip if it's the file itself

				### TO DO -- change output to filename
				#
				# get the title
				#$title = &lookup_title($output);
				#
				# get the URL
				#$link = &siteconf_LocalFile2Url($output);
				#
				# if there is no title, use the 'real' path
				if($title eq "No Title"){
					$title = $link;
				}
				else {
				    if($title eq ""){
					$title = $link;
				    }
				}

				print "<li><a href=$link>$title</a>\n";
			}
			close(NH);
		}
	}else{
		print "<center><h3>\n";
		print "<a href=/$CGIBIN/webglimpse-fullsearch$archivepwd?file=$file&shownh=1>List the neighborhood</a> of \"$thistitle\"<p>\n";
		print "</h3></center>\n";
	}
}
		
print "</body>\n";
print "</html>\n";




#########################################################################
sub err_conf{
	print "<title>Cannot find archive</title>\n";
	print "</head><body>\n";
	print "<h1>Specified directory $archivepwd is not an archive directory.</h1>\n";
	print "Configuration file was not found.\n";
	print "</body></html>\n";
	exit -1;
}

sub err_file{
	local ($file) = @_;
	print "<title>Can't read file</title>\n";
	print "</head><body>\n";
	print "<h1>Specified file $file is not readable.</h1>\n";
	print "</body></html>\n";
	exit -1;
}

sub www_form_urldecode {  # Added 10/18/97 as per Peter Bigot --GB

	local($_) = @_;

	# Reverse the encoding: plus goes to space, then unhex encoded chars
	s/\+/ /g;
	s/%([A-Fa-f0-9]{2})/pack("c",hex($1))/ge; 
	return $_;
}

sub lookup_title{
	local($file) = @_;
	local($intitle, $title);
        if (($file =~ m/^\s*-\s*$/) ||
            ($file =~ m/^\s*\&/)) {
            # Don't let anybody open stdin, or specific descriptors.
            &err_file ($file);
            die ("UNREACHABLE REACHED");
        }
	if (open(IN, "<$file")) {
		$intitle = 0;
		line: while (<IN>) {
			chomp;
			if((/\<title\>(.*)$/i)) {
				$intitle = 1;
				$title = $1;
			} elsif ($intitle) {
				$title .= " $_";
			}
			if ($intitle && $title =~ s#</title>.*##i) {
				last line;
			}
		}
	}
	# if there's no title, just return "", let webglimpse write 'No title'.
	# if($title eq ""){
		# $title="No title";
	# }

	return $title;
}

########################################################################
####  SITE CONFIGURATION FUNCTIONS ####
########################################################################

########################################################################
sub siteconf_ReadConf   {
   local(@thearray);
 
   open (WMCONF, "<$wgConfPath") || &err_file ($wgConfPath);
 
#  hmm, I am not sure if it's a bug. If you have 2 of
#  DirectoryIndex, UserDir or DocumentRoot, we use the last one.
 
   # load up the HomeDirArray
   # NOT NEEDED HERE
   # while(@thearray = getpwent()){
      # $HomeDir{@thearray[0]} = @thearray[7];
   # }
 
   while (<WMCONF>)  {
      if (/^DirectoryIndex[\s]*([\S]*)/i) {
         $DirectoryIndex = $1;
      } elsif (/^UserDir[\s]*([\S]*)$/i)  {
         $UserDir = $1;
      } elsif (/^DocumentRoot[\s]*([\S]*)$/i)   {
         $DocumentRoot = $1;
       } elsif (/^Alias[\s]*([\S]*)[\s]*([\S]*)$/i) {
         # do nothing.  Not needed.
         # push(@AliasList, $2);
      } elsif (/^ScriptAlias[\s]*([\S]*)[\s]*([\S]*)$/i) {
         # do nothing.  Not needed.
         # push(@ScriptAliasList, $2);
      } elsif (/^Port[\s]*([\S]*)$/i)  {
         $Port = $1;
      } elsif (/^Server[\s]*([\S]*)$/i)   {
         $Server = $1;
      }
   }
   if ($DirectoryIndex eq "") {
      $DirectoryIndex = "index.html";
   }
   local($name,$aliases,$dm3,$dm4,$addrs) = gethostbyname($Server);
   local($alias);

   ### SERVER CACHE STUFF NOT NEEDED
   # $ServerCache{$Server} = $addrs;
   # $ServerCache{$name} = $addrs;
   # foreach $alias (@aliases)  {
      # $ServerCache{$alias} = $addrs;
   # }
   $ServerAddress = $addrs;
}

sub siteconf_LocalFile2Url {
   local($file) = @_;
   local($alias, $homedir, $url);
 
   if ($Port eq "80")   {
      $portPart = "";
   } else   {
      $portPart = ":$Port";
   }
 
   if ($file =~ /^$DocumentRoot(.*)/)  {
      $url = "http://$Server$portPart/$1";
      return $url;
   }
 
   #  We are NOT going for longest match.
   foreach $alias (keys %Alias)  {
      $homedir = $Alias{$alias};
      if ($file =~ /^$homedir(.*)$/)   {
         $url = "http://$Server$portPart$alias/$1";
         return $url;
      }
   }
 
   return "";
}

