#!/usr/local/bin/perl
$DOS = 0; $UNIX = 1; $UNIX_DEBUG = 2;
# $OS = $DOS;                         #IMPORTANT: PICK OPERATING SYSTEM!!!!!
$OS = $UNIX;                        #IMPORTANT: PICK OPERATING SYSTEM!!!!!
#$OS = $UNIX_DEBUG;                  #for debugging unix output while using DOS



#***REQUIRED***: directories referenced here must already exist
#***REQUIRED***: $OS must be set to either $DOS or $UNIX or $UNIX_DEBUG
#***REQUIRED***: if $OS==$DOS, set $COLUMNS (below) to the # of cols on screen






if ($OS == $UNIX) {
    $filehead     = "/home/clint/WWW/voices/";
    $namelistfile = "/home/clint/WWW/pub/passname.dat";
    $pass1file    = "/home/clint/WWW/pub/pass1.dat";
    $pass2file    = "/home/clint/WWW/pub/pass2.dat";
    $logfile      = "/home/clint/WWW/pub/search.log";
    $errorfile    = "error.htm";         #resides in dir specified by $filehead
    $notesfile    = "notes.htm";         #resides in dir specified by $filehead
    $blink        = "<BLINK>";
    $unblink      = "</BLINK>";
    $P            = "<P>";
    $unP          = "</P>";
    $bold         = "<b>";
    $unbold       = "</b>";
    $big          = "<big>";
    $unbig        = "</big>";
    $hr           = "<hr>";
    $PRE          = "<PRE>";
    $unPRE        = "</PRE>";
    $PAlignCenter = "<P ALIGN=CENTER>";
    $br           = "<br>";
    $quote        = "&quot;";
    $RegExURL     = "http://clint.sheer.us/~clint/voices/grepdoc.htm\#RegEx";
    print "Content-type: text/html\n\n"; $title = "Search Results";
    print "<HTML><HEAD><TITLE>$title</TITLE></HEAD><BODY>\n";

	#from work experiences:
	if ($ENV{REQUEST_METHOD} eq "POST") {
	        read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
	} elsif ($ENV{REQUEST_METHOD} eq "GET") {
	        $buffer=$ENV{QUERY_STRING};
	}#endif

    @pairs = split(/&/, $buffer);                    #split name-value pairs
    foreach $pair (@pairs) {
       ($name, $value) = split(/=/, $pair);
       # Un-Webify plus signs and %-encoding
       $value =~ tr/+/ /;
       $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
       $value =~ s/<!--(.|\n)*-->//g;
       if ($allow_html != 1) {
          $value =~ s/<([^>]|\n)*>//g;
       }
       $in{$name} = $value;
    }
    $argv0 = $in{"FIELD1"};                      #set argv0 to search criterion
}


if ($OS == $UNIX_DEBUG) {
    ######### make sure to remember to use \\ instead of \
    $filehead     = "c:\\www\\voices\\";                     #dos-like
    $namelistfile = "c:\\bat\\tmp\\passname.dat";            #dos-like
    $pass1file    = "c:\\bat\\tmp\\pass1.dat";               #dos-like
    $pass2file    = "c:\\bat\\tmp\\pass2.dat";               #dos-like
    $logfile      = "c:\\bat\\search.log";                   #dos-like
    $errorfile    = "error.htm";         #resides in dir specified by $filehead
    $notesfile    = "notes.htm";         #resides in dir specified by $filehead
    $blink        = "<BLINK>";
    $unblink      = "</BLINK>";
    $P            = "<P>";
    $unP          = "</P>";
    $bold         = "<b>";
    $unbold       = "</b>";
    $big          = "<big>";
    $unbig        = "</big>";
    $hr           = "<hr>";
    $PRE          = "<PRE>";
    $unPRE        = "</PRE>";
    $PAlignCenter = "<P ALIGN=CENTER>";
    $br           = "<br>";
    $quote        = "&quot;";
    $RegExURL     = "http://clint.sheer.us/~clint/voices/grepdoc.htm\#RegEx";
    print "Content-type: text/html\n\n"; $title = "Search Results";
    print "<HTML><HEAD><TITLE>$title</TITLE></HEAD><BODY>\n";
    $argv0 = $ARGV[0];                                       #dos-like
}


if ($OS == $DOS) {                                                   #DOS
    ######### make sure to remember to use \\ instead of \
    $filehead     = "c:\\www\\voices\\";
    $namelistfile = "c:\\bat\\tmp\\passname.dat";    #c:\bat\tmp dir must exist
    $pass1file    = "c:\\bat\\tmp\\pass1.dat";
    $pass2file    = "c:\\bat\\tmp\\pass2.dat";
    $logfile      = "c:\\bat\\search.log";           #put wherever you want
    $errorfile    = "error.htm";         #resides in dir specified by $filehead
    $notesfile    = "notes.htm";         #resides in dir specified by $filehead
    $blink        = "* ";
    $unblink      = " *";
    $P            = "\n\n";
    $unP          = "\n\n";
    $bold         = " ";
    $unbold       = " ";
    $big          = "* ";
    $unbig        = " *";
    $COLUMNS      = 80;
    $hr           = "Ä" x ($COLUMNS - 1);
    $PRE          = "";                        #not necessary for fixed-column
    $unPRE        = "";                        #environments such as DOS......
    $PAlignCenter = "                ";    #close enough to centered for ME
    $br           = "\n";
    $quote        = "\"";
    $RegExURL     = "";  #shouldn't be DOS-referenced anyway, but this is safer
    $argv0        = $ARGV[0];                    #set argv0 to search criterion
}






if ($argv0 eq "") {                #display errmsg if no search criterion given
   print "$blink";
   print "ERROR!$unblink$P";
   print "You must $bold", "pick$unbold a word or ";
   if ($OS == $UNIX) {
       print "<a href=\"", $RegExURL, "\">";           #WWW/UNIX link
   }
   print "regular expression";
   if ($OS == $UNIX) { print "</a>"; }                          #WWW/UNIX link
   print " for searching!$P";
   if ($OS == $UNIX) { print "</BODY></HTML>\n\n"; }            #WWW/UNIX link
   die("Need Keyword!! "); #OHOHOH - MAYBE FIX LATER WITH REAL ErROR MESSAGE
   #$argv0 = "Gigantor";   #DEBUG
} else {
    open (LOG, ">>$logfile");
    print LOG "\n$argv0: ";
}#endif argv0 eq ""



$ERROR_MESSAGE = 0;             #error flag declared here, last possible moment


if (ERROR_MESSAGE == 0) {	#BIG LONG IFF

    print "Searching for: $bold", "$big";
    if ($argv0 eq "img align") {
	print "IMAGES $unbig$unbold(by searching for &quot;$bold" . "img align$unbold&quot;)$bold$big";
    } else {
	print "$argv0";
    }#endif

    print "$unbig$unbold\n$hr";
#   $firstfound = "\n$hr", "\n$PRE";
    $pattern = "$argv0";

    $count = 0;

### before doing anything, tell user any notes from notes.htm that apply
    $notesfilename  = "$filehead$notesfile";
    &do_specific_error($notesfilename);




    open (PASS1, ">$pass1file");
#   &create_name_list('notes');
    &create_name_list('a'); &create_name_list('b'); &create_name_list('c');
    &create_name_list('d'); &create_name_list('e'); &create_name_list('f');
    &create_name_list('g'); &create_name_list('h'); &create_name_list('i');
    &create_name_list('j'); &create_name_list('k'); &create_name_list('l');
    &create_name_list('m'); &create_name_list('n'); &create_name_list('o');
    &create_name_list('p'); &create_name_list('q'); &create_name_list('r');
    &create_name_list('s'); &create_name_list('t'); &create_name_list('u');
    &create_name_list('v'); &create_name_list('w'); &create_name_list('x');
    &create_name_list('y'); &create_name_list('z'); &create_name_list('new');
    close (PASS1);


    #clean up dupes
    $NewNamesPlace = 0;  $NewNamesCount = 0;
    for ($i = 0;  $i < $count;  $i++) {
        if ($names[$i] ne $newnames[$NewNamesPlace-1]) {
            if ($names[$i] ne "NEW"     &&
                $names[$i] ne "TODO"    &&
                $names[$i] ne "HELP ME" &&
                $names[$i] !~ /^</      &&
                $names[$i] !~ /^[\-\ \*]/ ) {
                    $newnames[$NewNamesPlace++] = $names[$i];
                    $NewNamesCount++;
            }#endif
        }#endif
    }#endfor


    #output to name list file to create results later
    open (NAMES, ">$namelistfile");
    #print("\n\nUnique names: @newnames\n\n");  #DEBUG only
    foreach $name (@newnames) {
        print NAMES "$name\n";
    }
    close (NAMES);


    #print ("\n\nAll unique: @newnames");       #DEBUG only
    $numtalents = @newnames;                    #count talents

    if ($numtalents > 0) {
        if ($OS == $DOS) {
            print "\n";             #this \n is a HTML NO-OP but needed for DOS
        }
        print "Number of voice talents associated with ";
        print "search criterion $bold", "$argv0$unbold is: ";
        if ($OS == $DOS) {
            print "\n$PAlignCenter             ";       #insignificant cosmetic
        }
        print "$big$bold", "$numtalents$unbold$unbig", "$P";
    }
    print LOG "$numtalents";
    if ($numtalents > 0) {
        print LOG ",";
    }#endif




    if ($numtalents > 0) {
        if ($numtalents > 1) {
            print "$bold", "$unPRE", "Their names are:\n&nbsp;&nbsp;$unbold ";
        } else {
            print "$bold", "$unPRE", "Their name is:\n&nbsp;&nbsp;-$unbold ";
        }#endif
    }#endif


    open(NAMELIST, "$namelistfile");
    $line  = <NAMELIST>;
    $count = 1;
    while ($line ne "") {
        chop($line);
        if ($count == $numtalents) {
            if ($numtalents==1) {
                print "$line.\n";
            } elsif ($numtalents > 0) {
                print "and $line.\n";
            }#endif
        } elsif ($numtalents > 0) {
            print "$line";
            if ($numtalents > 2) {
                print ";&nbsp;&nbsp;&nbsp;\n";
            }#endif
            print " ";
        }#endif
        $count++; $line = <NAMELIST>;
    }#endif
    close(NAMELIST);

    if ($numtalents > 0) {
        print "$hr", "\n";
    }#endif



### print pass 1 file here
    if ($numtalents > 0) {
        print "$PAlignCenter";
        if ($OS == $DOS) {
            print "     ";                          #DOS insignificant cosmetic
        }
        print "$bold", "$big";
        print "First-Pass ";
        print "Matches:$unbig$unbold$unP", "$PRE";
        open(PASS1FILE,"$pass1file") ||
             die("Can't open input file $pass1file");
        $line = <PASS1FILE>; $nummatches=0; $numlines=0;
        while ($line ne "") {
            #to correct count for multiple entries on one line, we must scan
            #for a comma followed by a any characters followed by a letter:
            @extras = split(/,\ +[a-zA-Z0-9]/, $line);
            $nummatches += @extras; $numlines++;
#           chop($line); print "$line\n";
            print "$line";
#           chop $line;                                #DEBUG
#           print "\n$line[$nummatches,$numlines]\n";  #DEBUG
#           $debug = $nummatches - $numlines;          #DEBUG
#           print "[[[$debug]]][[[@extras]]]\n";       #DEBUG
            $line=<PASS1FILE>;
        }
        close(PASS1FILE);
        print "$unPRE\n", "$PAlignCenter";
        print "$bold", "Number of first-pass matches: $big";
        print "$nummatches$unbig ($numlines lines)$unbold";

        print LOG "$nummatches($numlines),";
        print "\n$hr", "\n";                    #again, \n is okay for all OSes
    }




### print out 2nd pass
    if ($numtalents > 0) {
        print "$PAlignCenter";
        if ($OS == $DOS) {
            print "     ";                          #DOS insignificant cosmetic
        }
        print "$bold", "$big";
        print "Second-Pass ";
        print "Matches:$unbig$unbold$unP", "$PRE";

        open(PASS2,">$pass2file") ||
             die("Can't open input file $pass2file");
        $num2matches = 0;   $num2lines = 0;

#       &do_second_pass('notes');
        &do_second_pass('a'); &do_second_pass('b'); &do_second_pass('c');
        &do_second_pass('d'); &do_second_pass('e'); &do_second_pass('f');
        &do_second_pass('g'); &do_second_pass('h'); &do_second_pass('i');
        &do_second_pass('j'); &do_second_pass('k'); &do_second_pass('l');
        &do_second_pass('m'); &do_second_pass('n'); &do_second_pass('o');
        &do_second_pass('p'); &do_second_pass('q'); &do_second_pass('r');
        &do_second_pass('s'); &do_second_pass('t'); &do_second_pass('u');
        &do_second_pass('v'); &do_second_pass('w'); &do_second_pass('x');
        &do_second_pass('y'); &do_second_pass('z'); &do_second_pass('new');




        print "$unPRE\n", "$PAlignCenter";
        print "$bold", "Number of second-pass matches: $big";
        print "$num2matches$unbig ($num2lines lines)$unbold";

        print LOG "$num2matches($num2lines)";
        print "\n$hr", "\n";
    }#endif

    close(LOG);

    if ($numtalents == 0) {
        $ERROR_MESSAGE = 1;
    }#endif


}#endif LONG iff



close(PASS2);


if ($ERROR_MESSAGE) {
    $errorfilename  = "$filehead$errorfile";
    &do_specific_error($errorfilename);
}

if ($OS == $UNIX) { print("\n</BODY></HTML>\n"); }          #UNIX ONLY

################################# SUBROUTINES #################################

sub create_name_list {
    $letter    = $_[0];                 #fetching parameter letter for filename
    $filename  = "$filehead$letter";
    $filename .= ".htm";                   #filename now = $filehead$letter.htm
    open(INFILE,"$filename")
        || die("Can't open input file $filename");
    $line = <INFILE>;
    until ($line eq "") {
        chop $line;
        if ($line =~ /$pattern/i) {
            @words = split(/:/, $line);
            $names[$count] = $words[0];
            #the next if lets you determine how to put lines in the file that
            #are ignored by this script.  Basically, any line starting with
            #NEW, TODO, HELP ME, <BIG>, <B>, <PRE>, </PRE>, a hyphen, a space,
            #or a star are ignored.  This can be changed, but make sure to
            #change it #below as well (search for "HELP ME" if you don't feel
            #like looking to find where it is below)
            if ($names[$count] ne "NEW"      &&
                $names[$count] ne "TODO"     &&
                $names[$count] ne "HELP ME"  &&
                $names[$count] !~ /^<[\/bBPp]/ &&   #ignore <big><b><pre></pre>
                $names[$count] !~ /^[\-\ \*]/ ) {   #ignore "-", " ", "*"
#                  #print NAMES "$words[0]\n";
                    print PASS1 "$line\n";
            }
            $count++;
            #print "\@names= @names\n";         #DEBUG only
        }
        $line = <INFILE>;
    }
    close(INFILE);
}                                                  #end of create_name_list





sub do_second_pass {
    $letter    = $_[0];                 #fetching parameter letter for filename
    $filename  = "$filehead$letter";
    $filename .= ".htm";                   #filename now = $filehead$letter.htm
    open(INFILE,"$filename")
        || die("Can't open input file $filename");
    $line = <INFILE>;
    while ($line ne "") {
        chop $line;
        @words = split(/:/, $line);
        for ($i = 0;  $i < $NewNamesCount;  $i++) {
            if ($newnames[$i] eq $words[0]) {
                print "$line\n";
                @extras = split(/,\ +[a-zA-Z0-9]/, $line);
                $num2matches += @extras; $num2lines++;
                print PASS2 "$line\n"; #CAN be commented out to use DEBUG lines
#               print PASS2 "$line[$num2matches,$num2lines]\n";  #DEBUG
#               $debug = $num2matches - $num2lines;              #DEBUG
#               print PASS2 "[[[$debug]]][[[@extras]]]\n";       #DEBUG
            }
        }
        $line = <INFILE>;
    }
    close(INFILE);
}



sub do_specific_error {
    #this sub queries an "error file" to make sure the user didn't search
    #for something they shouldn't have.  For example, I abbreviate Teenage
    #Mutant Ninja Turtles to TMNT.  So if the user searches for Teenage Mutant
    #Ninja Turtles, it has a positive match with a line in $filehead$errorfile,
    #and that line tells them that they should have searched for TMNT.
    #Each line like that should start with a space.  Lines that don't start
    #with a space are ALWAYS printed out, for messages that I want to give ALL
    #users who don't get results.

    $subfilename = $_[0];                        #fetching parameter for filename
    print "\n";                                             #cosmetic
    open(ERROR,"$subfilename")                              #try to open it
        || die("\ncan't open error file $subfilename");     #error otherwise
    $errorline = <ERROR>;                                   #get lines
    while ($errorline ne "") {                              #until end of file
        #chop $errorline;
        #if their bad criterion matches with a line starting with a space...
        if (($errorline =~ /$pattern/i) && ($errorline =~ /^\ /)) {
            print "$errorline";                           #...then print it
        } elsif ($errorline !~ /^\ /) {
            print "$errorline";  #always print lines that don't start w/a space
        }
        $errorline = <ERROR>;
    }
    close(ERROR);
}
