#!/usr/bin/perl -Tw
#
# Acknowledgements
#
# Thanks to 
# Paul Clark's aglimpse program
# paul@cs.arizona.edu
# which was the starting point for this program.
#
# Written by:
# Michael Smith
# msmith@cs.arizona.edu
#
# Modifications
#
# 4/13/96	Version 1.0, original
#
# Modifications
#	We translate the local file name to URL and grab the title in
#	this script, instead of start 2 "glimpse -k" sessions.
#					06/28/96	ZDC
#
# 9/96
#  In-lined a lot of "require"d code for speed
#  Tried to optimize output of information
#
# 9/97
#  Security fixes and added "no local copy link" option --GB
# 
#######################################################################

# **** **** **** ****    CONFIGURABLE VARIABLES     **** **** **** ****
$WEBGLIMPSE_HOME = "/usr/src/redhat/BUILD/webglimpse-1.5";
$GLIMPSE_LOC = "/usr/bin/glimpse";
# $GREP_LOC = $GLIMPSE_LOC;

# lib directory
$WEBGLIMPSE_LIB = "$WEBGLIMPSE_HOME/lib";

# Path to your scripts
$CGIBIN = "cgi-bin";

# Maximum characters to print from META NAME="DESCRIPTION" tag
$MAX_METADESC_LEN = 200;

# **** **** **** **** NO CONFIGURATION NEEDED BELOW **** **** **** ****

# glimpse occasionally needs to invoke some system programs, like cat, sort,
# and mv.  Set up a path so it can find them.  If you don't like this,
# edit the file index/glimpse.h in the glimpse source hierarchy to hard-code
# paths for SYSTEM_CAT and its companions, then set the PATH here to a
# benign location.  (Beware: using an empty path '' with GNU libc, as on
# Linux, is equivalent to using '.').
$ENV{'PATH'} = '/bin:/usr/bin';

# lock file
$LOCKFILE = "indexing-in-progress";

# If you want per-line access
$FSSERV = "/$CGIBIN/mfs" ;

# Set file name pattern where to suppress HTML tags
# Comment out to cancel suppression
$SUPPRESS_HTML_TAGS = "\\.s?html?\$";

# $MAPFILE = ".wgmapfile";
$nh_pre = ".nh.";

# $CONFIGFILE, $REMOTEDIR not used.  commented out 11/5/97 --GB
# name of config file
#$CONFIGFILE = "archive.cfg";
#$REMOTEDIR = ".remote";

# Default values for user inputs
$QS_age = '';        # Restrict matches to updates in the last $QS_age days
$QS_case = '';       # Case-sensitive if set to 'on'
$QS_debug = '';      # Debug on/off
$QS_errors = '';     # Number of errors allowed in a match, or 'Best match'
$QS_file = '';	     # File to search neighborhood of
$QS_lines = '';	     # Print line numbers & enable jump to line option
$QS_localcopy = '';  # Print "local copy" links in output	(Added 9/97 --GB)
$QS_maxfiles = '';   # Maximum number of files to print matches from
$QS_maxlines = '';   # Maximum number of lines per file to print matches from
$QS_pathinfo = '';   # Path to index dir; not in wgindex.html by default
$QS_query = '';	     # WHAT YOU ARE SEARCHING FOR
$QS_scope = '';	     # Full archive search or neighborhood only
$QS_whole = '';	     # Whole or partial word search
$QS_filter = '';     # Restrict the search to files matching QS_filter  (Added 11/5/97 --GB)


# **** **** **** **** Done settings **** **** **** ****

# make the output as we can
$| = 1;

#---------------------------------
# make my libraries more important
unshift(@INC, "$WEBGLIMPSE_LIB");
require "config.pl";


### DEBUG
# $other, $starthour are unused
#($startsec, $startmin, $starthour, $other) = localtime(time);

# Get inputs now so we can use a user-set path

# $prefix appears to be unused.  Commented out 11/5/97 --GB
#	To support an ISINDEX type search, set query string if given
#	an argument on the command line
#$prefix="whole=on&case=off&query=" if ( $#ARGV >= 0 );

#	Check that a query has been made
($query = $ENV{'QUERY_STRING'}) || &err_noquery ;

#	Strip the variables out from the query string,
#	and assign them into variables, prefixed by 'QS_'
foreach $pspec (split (/\&/, $query)) {
	$pname = '';
	$pvalue = '';
	($pname, $pvalue) = (split (/=/, $pspec));


# Decode form results (hex characters, spaces etc)
	$pvalue = www_form_urldecode($pvalue);
	$pname = www_form_urldecode($pname);

	if ($pname =~ /^[a-zA-Z0-9_]*$/ ) {

# We should do this quote removal only for variables that will be placed on a command line
#		$pvalue =~ s/\'//g;

		$varname = "QS_$pname";
		$$varname = $pvalue;
	}
}

$QS_pathinfo =~ s/%2f/\//ig;

$path_info = ($QS_pathinfo ne "") ? $QS_pathinfo : $ENV{'PATH_INFO'};
$_ = $path_info;

# might as well start the message now
print "Content-type: text/html\n\n";
print "<HEAD>\n";

$indexdir = $path_info;

# Check that indexdir has no single quote characters; it will be used on a command line
$indexdir =~ s/[\']//g;


# Added check for ".." as per CERT 11/7/97 --GB
if ($indexdir =~ /\.\./) {
	&err_insecurepath;
}

if(-e "$indexdir/$LOCKFILE"){
	&err_locked;
}

if(&TestConfig($indexdir)!=2){
	&err_conf;
}


# Unused variables: $explicit_only, $remote_limit, $local_limit, $addboxes, $numhops, $nhhops, $traverse_type, @urllist = () - 11/5/97 --GB
# Initialize just to make perl -Tw happy
$explicit_only = 0;
$remote_limit = 0;
$local_limit = 0;
$addboxes = 0;
$numhops = 0;
$nhhops = 0;
$traverse_type = 0;
@urllist = ();

($title, $urlpath, $traverse_type, $explicit_only, $numhops,
 $nhhops, $local_limit, $remote_limit, $addboxes, @urllist) = ReadConfig($indexdir);

# Ensure that Glimpse is available on this machine
-x $GLIMPSE_LOC || &err_noglimpse($GLIMPSE_LOC) ;

# Ensure that index is available
-r "$indexdir/.glimpse_index" || &err_noindex($indexdir) ;

# resubstitute / for %2F in the file paths
$QS_file =~ s/%2f/\//ig;

$QS_query =~ s|\+| |g;
$QS_query =~ s|%(\w\w)|sprintf("%c", hex($1))|ge;
$pquery = $QS_query;
$QS_query =~ s|\'|\'\"\'\"\'|g;


$OPT_errors='';
$OPT_errors="-$QS_errors"	if $QS_errors =~ /^[0-8]$/;
$OPT_errors="-B"		if $QS_errors =~ /^Best\+match$/;
# remove the '-i' from case if the switch is on
$OPT_case="-i"; 
$OPT_case=""			if $QS_case =~ /^on$/;
$OPT_whole = '';
$OPT_whole="-w"			unless $QS_whole =~ /^on$/;
$OPT_age = '';
$OPT_age = "-Y $QS_age" if $QS_age =~ /^[0-9]+$/;
# print "OPT_age = $OPT_age<br>\n";
$QS_filter =~ s/\./\\./g;
$QS_filter =~ s/\'//g;
$OPT_filter = '';
$OPT_filter="-F '$QS_filter'"	if $QS_filter;

if ($QS_maxlines =~ /\d+/) {
	$maxlines = $&;
} else {
	$maxlines = 20;
}
if ($QS_maxfiles =~ /\d+/) {
	$maxfiles = $&;
} else {
	$maxfiles = 25;
}

$highlight = $QS_query;
$highlight =~ s/^\W+//;
$highlight = join("|",split(/\W+/,$highlight));
# check if the query contains any words
&err_badquery if !$highlight;
$highlight = '\b('.$highlight.')\b' if $OPT_whole;

$initial_output = '';
$initial_output .= "<TITLE>Result for query \"$pquery\"\n";
$initial_output .= "</TITLE></HEAD><BODY>\n";
$initial_output .= "<center>";
$initial_output .= "<H1>Results for query \"$pquery\"</H1>\n";
$initial_output .= "<h3>on: $title</h3>\n";

# if the scope is full, delete any file options
if($QS_scope =~ /^full$/i){
	$QS_file="";
}

if($QS_file){
	($title, $metadesc) = &lookup_titledesc($QS_file);
#	$title = "";
	if ($title eq "No Title") {
	   $title=$QS_file;
	}
	else {
	   if($title eq ""){
	     $title=$QS_file;
	   }
	}

	$initial_output .= "<i>Search on neighborhood of <tt>$title</tt></i>\n";
	$initial_output .= "</center><p>\n";

	# $fullfile = "$indexdir/$QS_file";
	$fullfile = $QS_file;		# it might not be in a subdir of the archivepwd
	# modify the file name to include the .nh.
	# prepend the file name with nh_pre
	$fullfile =~ s/([^\/]+)$/$nh_pre$1/;


	#$OPT_file = "-f $fullfile"; Changed to -p --> bgopal oct/6/96
	$OPT_file = "-p $fullfile:0:0:2";
	if(!(-e $fullfile)){
		&err_noneighborhood($fullfile);
	}
}else{
	$initial_output .= "<i>Search on entire archive</i>\n";
	$initial_output .= "</center><p>\n";

	$OPT_file = "";
}

# Try using -H switch instead of chdir, as per Peter Bigot's suggestion.  GB 10/17/97
#chdir $indexdir;

# the default is *no* jump to lines.  If line=on, tell glimpse to get lines
$OPT_linenums = '';
if($QS_lines){
	$OPT_linenums="-n";
	$initial_output .= "File name (modification date), and list of matched lines (preceded by line numbers)<br>\n";
}else{
	$initial_output .= "File name (modification date), and list of matched lines<br>\n";
}

# Security note: using $indexdir on the command line could be dangerous if a directory really exists whose name contains shell control characters. 10/17/97 --GB
#$cmd = "$GLIMPSE_LOC -j -z -y $OPT_file $OPT_linenums $OPT_age $OPT_case $OPT_whole $OPT_errors -H . " . Added -U -W --> bgopal oct/6/96
$cmd = "$GLIMPSE_LOC -U -W -j -z -y $OPT_file $OPT_linenums $OPT_age $OPT_case $OPT_whole $OPT_errors -H $indexdir " .
	 "$OPT_filter '$QS_query' 2>&1 |";

# Fool perl -T into accepting $cmd for execution.  (as per Peter Bigot) --GB 10/17/97
# We assume that we have sufficiently checked the parameters to be safe at this point.  
$cmd =~ /^(.*)$/;
$cmd = $1;

if($QS_debug){
	$initial_output .= "<br>cmd: $cmd<br>\n";

}


### DEBUG
# print "<br>start time: $starthour:$startmin:$startsec<br>\n";
# $utime = (times)[0];
# $stime = (times)[1];
# print "<br>time after init: $utime, $stime<br>\n";
# ($sec, $min, $hour, $other) = localtime(time);
# print "<br>now (after init): $hour:$min:$sec<br>\n";


print "<!-- Glimpse command: $cmd -->\n";
# Save pid of the pipe command so we can do cleanup later.
if (!($gpid = open(GOUT, $cmd ))) {
   &err_noglimpse($cmd);
}
@glines = <GOUT>;
close(GOUT);

# check the return code
$rc = $? >> 8;
if($rc!=0){
   # it's an error!
   &err_badglimpse(@glines);
}

# now print out the already-computed output!
print $initial_output;

### DEBUG
# $utime = (times)[0];
# $stime = (times)[1];
# print "<br>time after glimpse: $utime, $stime<br>\n";
# ($sec, $min, $hour, $other) = localtime(time);
# print "<br>now (after glimpse): $hour:$min:$sec<br>\n";


$prevfile = "";
$lcount = 0;
$fcount = 0;

# Added "line:" label; should fix ignore maxlines bug --GB 7/24/97
line:
foreach $line (@glines) {
	$_ = $line;

	if($QS_debug){
		print "<br><tt>glimpse: $_</tt><br>\n";
	}

        # Date output is from glimpse source file agrep/agrep.c:aprint_file_time.
        # Without matching it exactly, we too often screw up recognizing
        # whether or not there's a title column, since glimpse drops that
        # output when it can't find a document title.
        # Current format is: Mon day FullYear (mmm [d]d yyyy) 
	if($QS_lines){
		# look for line number, too
		(/^(\S+)\s+(\S+)\s*(([^\\:]|\\:|\\\\)*):\s*(\w\w\w\s+\d+\s+\d\d\d\d):\s*(\d+)\s*:(.*)/) || next;
		$file = $1;
		$link=$2;
		$title=$3;
		$date = $5;
		$line = $6;
		$string = $7;
	}else{
		(/^(\S+)\s+(\S+)\s*(([^\\:]|\\:|\\\\)*):\s*(\w\w\w\s+\d+\s+\d\d\d\d):(.*)/) || next;
		$file = $1;
		$link = $2;
		$title = $3;
		$date =	$5;
		$string = $6;
	}

	##### CHANGE FOR LOCAL COPY POINTERS -- mdsmith
	# modify the local file to get the localurl
	$localurl = $file;
	$localurl =~ s/$indexdir/$urlpath/;
	##### END CHANGE FOR LOCAL COPY POINTERS -- mdsmith

	if($QS_debug){
		print "<br><tt>Webglimpse: file=$file link=$link title=$title date=$date line=$line string=$string localurl=$localurl </tt><br>\n";
	}

	# replace the \:'s and \\'s in the title with just :'s
	$title =~ s/\\\\/\\/g;
	$title =~ s/\\:/:/g;

	# skip the file if it isn't in this index directory directory
	### commented out!
	# next unless $file =~ s|^$indexdir||o;

	# skip if the file is a .gh or .glimpse file
	next if ($file =~ /\.gh/) || ($file =~ /\.glimpse_/);

	if ($file ne $prevfile) {
		$linecount = 0;
		if ($fcount>$maxfiles) {
			print "</ul>\n";
			print "<H3>Limit of $maxfiles files exceeded.  Check the search options.</H3>\n";
			$file = "";
			$fcount = "at least $fcount";
			$lcount = "at least $lcount";
			last line;
		}
		print "</UL>" if ( $prevfile ne "" );
		$prevfile = $file ;

		if($title eq "No Title") {
			$title = $link;
		}
		else {
		    if($title eq ""){
			$title = $link;
		    }
		}
		##### CHANGE FOR LOCAL COPY POINTERS -- mdsmith
		##### Change to include OPTION for local copy pointers --GB 9/17/97
		if ($QS_localcopy eq 'n') {
			print 
				"<hr><b><A HREF=\"",$link,
				"\">",$title,"</A></b>, $date<br>\n";
		} else {
			print 
				"<hr><b><A HREF=\"",$link,
				"\">",$title,"</A></b>",
				", <font size=-1><a href=$localurl>(local copy)</a>, $date</font><br>" ;
		}
		##### END CHANGE FOR LOCAL COPY POINTERS -- mdsmith

		
		# Added META description if exists, as per Darryl Fuller's suggestion. --GB 7/24/97
		if ((! defined($metadesc)) || ($metadesc eq '')) {
			print "\n<UL>\n";
		} else {

			print "<p>&nbsp;&nbsp;&nbsp;&nbsp;$metadesc <br>\n<UL>\n";
		}
		


		$fcount++ ;
	}
	$lcount++ ;
	$linecount++;
	if ($linecount>=$maxlines) {
		print "<LI>Limit of $maxlines matched " .
			"lines per file exceeded...\n" if
				$linecount==$maxlines && $maxlines > 0;
		next line;
	}

	if ($SUPPRESS_HTML_TAGS && $file =~ /$SUPPRESS_HTML_TAGS/o) {
		$string =~ s#\</?[a-zA-Z][^>]*\>?##g;
	} else {
		# we shouldn't suppress tags, but we need to do basic
		#  substitutions
		$string =~ s/\&/\&amp;/g;
		$string =~ s/\</\&lt;/g;
		$string =~ s/\>/\&gt;/g;
	}
	if($string !~ /^\s*$/){
		if($QS_lines){
			# BOLDING
			if ($OPT_case) {
				$string =~ s#$highlight#<B>$&</B>#gio;
			} else {
				$string =~ s#$highlight#<B>$&</B>#go;
			}
#	unused		$length = length($indexdir);

			# Added $link as argument for use in BASE HREF tag
			# Trim spaces from $line as per Jan Holler.  10/17/97 --GB
			$line =~ s/\ //g;
			print "<LI><A HREF=\"$FSSERV$indexdir\?link=$link&file=$file&line=$line#mfs\">\n" ;
			print "line ",$line,"</A>:",$string,"\n" ;
		}else{
			# BOLDING
			if ($OPT_case) {
				$string =~ s#$highlight#<B>$&</B>#gio;
			} else {
				$string =~ s#$highlight#<B>$&</B>#go;
			}
			print "<LI>$string\n";
		}
	}
}

print "</UL>\n" if $file ;
print "<HR>" ;
print "<H2>Summary for query <code>\"",$QS_query,"\":</code></H2>\n" ;
print "<i><a href=http://glimpse.cs.arizona.edu/webglimpse>WebGlimpse</a></i>\n";
print "search found ",$lcount," matches in ",$fcount," files<br>\n" ;
print "(Some matches may be to HTML tags which may not be shown.)\n";

### DEBUG
# $utime = (times)[0];
# $stime = (times)[1];
# $ctime = (times)[1];
# $cstime = (times)[1];
# print "<p>time after formatting: $utime, $stime, $ctime, $cstime<br>\n";
# ($sec, $min, $hour, $other) = localtime(time);
# print "<br>now: $hour:$min:$sec<br>\n";

print "</BODY>\n" ;
print "</HTML>\n" ;
unlink "/tmp/.glimpse_tmp.$gpid";

exit(0);

##########################################################################
sub www_form_urldecode {  # Added 10/18/97 as per Peter Bigot --GB

	local($_) = @_;

	# Reverse the encoding: plus goes to space, then unhex encoded chars
	s/\+/ /g;
	s/%([A-Fa-f0-9]{2})/pack("c",hex($1))/ge; 
	return $_;
}

##########################################################################
sub diag_exit {
# exit on error
	exit -1;
}
##########################################################################
sub err_noneighborhood {

	local($_) = @_;

	# neighborhood does not exist
	print <<EOM;
<hr>
<h1>File not found</h1>
There is no neighborhood for file $_.  Either the file does not
exist or the neighborhood file does not exist.
</body>
</html>
EOM

	&diag_exit;
}

##########################################################################
sub err_noquery {
   #	The script was called without a query. 
   #	Provide an ISINDEX type response for browsers
   #	without form support.
   print "
<TITLE>Glimpse Gateway</TITLE></HEAD>
<BODY><H1>Glimpse Gateway</H1>
This is a gateway to Glimpse.
Type a pattern to search in your browser's search dialog.<P>

<ISINDEX>

<H2>What is Glimpse ?</H2>
<QUOTE>
<P>
Glimpse (which stands  for  GLobal  IMPicit  SEarch)  is  an
indexing  and query system that allows you to search through
all your files very quickly.   For  example,  a  search  for
Schwarzkopf  allowing  two  misspelling errors in 5600 files
occupying 77MB took 7 seconds on a SUN  IPC.   Glimpse  supports
most of agrep's options (agrep is our powerful version
of  grep)  including  approximate  matching  (e.g.,  finding
misspelled  words),  Boolean  queries, and even some limited
forms of regular expressions.<BR>
Glimpse's running time is typically slower than systems
tems using inverted indexes, but its index is  an  order  of
magnitude smaller (typically 2-5% of the size of the files).
<H2>Authors of Glimpse</H2>
Udi Manber, Sun Wu, and Burra Gopal<BR>
<ADDRESS>
Department of  Computer
Science, University   of   Arizona,   Tucson,   AZ  85721.<BR>
glimpse\@cs.arizona.edu
</ADDRESS>
</QUOTE>

<HR>
<ADDRESS>
Glimpse<BR>
glimpse\@cs.arizona.edu<BR>
</ADDRESS>

</BODY>
";
   &diag_exit;
}

##########################################################################
sub err_noglimpse {
local($_) = @_;
   #
   # Glimpse was not found
   # Report a useful message
   #
   print "
<TITLE>Glimpse not found</TITLE>
</HEAD>
<BODY>
<H1>Glimpse not found</H1>

Using $_
<p>
This gateway relies on <CODE>Glimpse</CODE> search tool.
If it is installed, please set the correct path in the script file.
Otherwise obtain the latest version from
<A HREF=\"file://ftp.cs.arizona.edu/glimpse\">ftp.cs.arizona.edu</A>
</BODY>
";
   &diag_exit;
}

##########################################################################
sub err_badglimpse {
   my(@glines) = @_;
   #
   # Glimpse had an error
   # Report a useful message
   #
   print "
<TITLE>Glimpse error</TITLE>
</HEAD>
<BODY>
<H1>Glimpse error</H1>

The search parameters caused an error in the call to Glimpse.
<p>
Please try your search again with different parameters.
<p>
<hr>
Output from Glimpse:
<pre>
@glines
</pre>
<br>
<hr>
</BODY>
";
   &diag_exit;
}

##########################################################################
sub err_noindex {
	local ($indexdir) = @_;
# Glimpse index was not found
# Give recommendations for indexing
	print "<TITLE>Glimpse Index not found</TITLE>\n";
	print "</HEAD>\n";
	print "<BODY>\n";
	print "<H1>Glimpse Index in directory '$indexdir' not found</H1>\n";
	print "Glimpse cannot proceed without index.\n";
	print "Please check if the directory being searched is indexed\n";
	print "by <code>glimpseindex</code>.\n";
	print "</BODY>\n";
	print "</html>\n";
	&diag_exit;
}
##########################################################################
sub err_insecurepath {
# Path user requested contains ".." characters
	print "<TITLE>Path not accepted</TITLE>\n";
	print "</HEAD>\n";
	print "<BODY>\n";
	print "<H1>Insecure Path Not Accepted</H1>\n";
	print "Please specify a path not containing ".." \n";
	print "</BODY>\n";
	print "</html>\n";
	&diag_exit;
}

##########################################################################
sub err_conf {
# Glimpse archive Configuration File was not found
	print "<TITLE>Glimpse Archive Configuration File not found</TITLE>\n";
	print "</HEAD>\n";
	print "<BODY>\n";
	print "<H1>Glimpse Archive Configuration File not found</H1>\n";
	print "Cannot open configuration file $indexdir/archive.cfg\n";
	print "</BODY>\n";
	print "</html>\n";
	&diag_exit;
}

##########################################################################
sub err_badquery {
	print "<TITLE>Query is too broad</TITLE>\n";
	print "</HEAD>\n";
	print "<BODY>\n";
	print "<H1>Query is too broad</H1>\n";
	print "The query \"$pquery\" doesn't contain any words and ".
		"thus will take too much time. Please refine your query.\n";
	print "</BODY>\n";
	print "</html>\n";
	&diag_exit;
}

##########################################################################
sub err_locked {
	print "<TITLE>Indexing in progress</TITLE>\n";
	print "</HEAD>\n";
	print "<BODY>\n";
	print "<H1>Indexing in progress</H1>\n";
	print "The archive is currently reindexing.  Please try your query later.\n";
	print "</BODY>\n";
	print "</html>\n";
	&diag_exit;
}

# Also find <META NAME="DESCRIPTION" CONTENT="stuff..."> at the same time.
sub lookup_titledesc{
	local($file) = @_;
	local($intitle, $title, $donetitle, $inmetadesc, $metadesc, $donemetadesc);
        if (($file =~ m/^\s*-\s*$/) ||
            ($file =~ m/^\s*\&/)) {
            # Don't let anybody open stdin, or specific descriptors.
            &err_noneighborhood ($file);
            die ("UNREACHABLE REACHED");
        }
        $intitle = 0;  $donetitle = 0;
        $inmetadesc = 0; $donemeta = 0;
        $title = ''; $metadesc = '';
	if (open(IN, "<$file")) {
		# Stop looking for <TITLE> & <META...> if reach </HEAD> -- GB 7/24/97
		line: while (<IN> && !(/\<\/head/i) && (($donetitle == 0) || ($donemeta ==0))) {
			chomp;
			if((/\<title\>(.*)$/i)) {
				$intitle = 1;
				$title = $1;
			} elsif ($intitle) {
				$title .= " $_";
			}
			if ($intitle && $title =~ s#</title>.*##i) {
				$donetitle = 1;
				$intitle = 0;
			}

			if((/\<meta name=\"*description\"* content=\"*(.*)$/i)) {
				$inmetadesc = 1;
				$metadesc = $1;
			} elsif ($inmetadesc) {
				$metadesc .= " $_";
			}
			if ($inmetadesc && $metadesc =~ s#\"*\>.*$##) {
				$donemetadesc = 1;
				$inmetadesc = 0;
			}

		}
		close(IN);
	}
	# if there's no title, just return "", let webglimpse write 'No title'.
	# if($title eq ""){
		# $title="No title";
	# }

	# Maximum chars for meta description; should be settable by option.
	$metadesc = substr($metadesc, 0, $MAX_METADESC_LEN);

	return ($title,$metadesc);
}

