#!/usr/local/bin/perl
#
# SVMTest_wrapper.perl
#
# Rui Pereira
# 28/09/2000
#
# Perl script used as a wrapper for the SVMTest program
# It executes the program with the specified files and then converts
# the output file to the Metal format
#
# Changes:
#
#     20/10/2000
#               - included option for regression
#
#     27/10/2000
#               - now all attributes are converted to continuous by default
#                 The -s flag can be used to maintain the old funcionality

$rcsid = '$Id: svmtest_wrapper,v 3.0 2002/06/30 19:05:19 johann Exp $';
$version = '$Revision: 3.0 $';

print "\nSVMTest Wrapper\n";
print "-------------------\n";
print "Rui Pereira, LIACC, October of 2000\n\n";

if (@ARGV<3) {
    print "Perl script used as a wrapper for the SVMTest program.\n";
    print "It runs the SVMTest program and then converts the result to the Metal format\n\n";
    print "Syntax: $0 InputModelFile InputFileStem OutputFile [-r] [-s] [-k]\n";
    print "\n\n"; 
    print "where:\n";
    print "-r    Use regression mode (default is classification mode)\n";
    print "-s    Use continuous attributes only (default is convert and use all)\n";
    print "-k    Keep temporary file created by SVMTest\n";
    exit;
}

for($i=3; $i<@ARGV; $i++) {
    if ($ARGV[$i] eq "-k") {
	$d_flag=1;
    }    
    elsif ($ARGV[$i] eq "-r") {
	$r_flag=1;
    }
    elsif ($ARGV[$i] eq "-s") {
	$s_flag=1;
    }

}


# Input and Output file names
$model_input_file=$ARGV[0];

$test_input_file=$ARGV[1].".test";
$names_input_file=$ARGV[1].".names";
$temp_test_input_file=$test_input_file.".tmp";

if(!$s_flag) {
    print "Using full symbolic-continuous conversion\n";
    $intempstem=$ARGV[1].".tempcont";
    $tempstem=$ARGV[1].".cont";

    system("mv $test_input_file $intempstem.data");
    system("mv $names_input_file $intempstem.names");

    if ($r_flag) {
	system("make_continuous $intempstem $tempstem -r");
    }
    else {
	system("make_continuous $intempstem $tempstem");
    }

    system("mv $intempstem.data $test_input_file");
    system("mv $intempstem.names $names_input_file");


    $test_input_file=$tempstem.".data";
    $names_input_file=$tempstem.".names";
}
else {
    print "Only continuous attributes used\n";
}


# Input and Output temp file names
$output_file=$ARGV[2];
$temp_output_file=$ARGV[2].'.temp';


#####################
# Input names file

print "Processing the NAMES file...\n";
open(INNAMES,  $names_input_file)  or (print "Can't open $names_input_file: $!\n" and exit);

$line=<INNAMES>;
chop $line;
@names=split (/\s*[,\.]\s*/ , $line);
$NR_CLASSES=@names;
$reg_name=$names[0];

foreach $line (<INNAMES>) {
    if ($line =~ /(.*)\s*:.*continuous.*/) {
	$NR_NUM_ATTRIB++;
	push(@num_attrib,$NR_ATTRIB);
	if (($r_flag) && ($1 eq $reg_name)) {
	    $reg_number=$NR_ATTRIB;
	}
    }
    $NR_ATTRIB++;
}
$NR_ATTRIB++;

close(INNAMES);
print "Finished processing names file\n";

#############################
# The data file must be converted!

open(INDATA,  $test_input_file)  or (print "Can't open $test_input_file: $!\n" and exit);
open(INDATA_TEMP, ">$temp_test_input_file") or (print "Can't open $temp_test_input_file: $!\n" and exit);

print "Processing the data file...\n";

# First pass
# Counts the number of examples by counting the lines
# and calculates the average
LINE: foreach $line (<INDATA>) {
    chop $line;
    next LINE if (!$line);
    @attribs=split (/\s*,\s*/ , $line);
    for($i=0; $i<$NR_NUM_ATTRIB; $i++) {
	$value=$attribs[$num_attrib[$i]];
	if ($value ne "?") {
	    $num_attrib_mean[$i]=$num_attrib_mean[$i]+$value;
	    $num_attrib_mean_count[$i]++;
	}
    }
    $NR_EXAMPLES++;  
}
close(<INDATA>);
for($i=0; $i<$NR_NUM_ATTRIB; $i++) {
    $num_attrib_mean[$i]=$num_attrib_mean[$i]/$num_attrib_mean_count[$i];
}

# Second pass
open(INDATA,  $test_input_file)  or (print "Can't open $data_input_file: $!\n" and exit);

$temp=$NR_NUM_ATTRIB+1;
if ($r_flag) {
    $temp=$NR_NUM_ATTRIB;
}
print INDATA_TEMP "$NR_EXAMPLES $temp\n";
 LINE: foreach $line (<INDATA>) {
    chop $line;
    next LINE if (!$line);
    @attribs=split (/\s*,\s*/ , $line);
    for($i=0; $i<$NR_NUM_ATTRIB; $i++) {

	$index=$i;

	if ($r_flag) {
	    if ($i == $reg_number) {
		$index=$NR_NUM_ATTRIB-1;
	    }
	    if ($i == $NR_NUM_ATTRIB-1) {
		$index=$reg_number;
	    }
	}
	
	$value=$attribs[$num_attrib[$index]];
	if ($value eq "?") {
	    $value=$num_attrib_mean[$index];
	}
	print INDATA_TEMP "$value ";
    } 
    $value=getindex($attribs[$NR_ATTRIB-1]);
    
    if($NR_CLASSES==2) {
	if($value==0) {
	    $value=-1;
	}
	if($value==1) {
	    $value=1;
	}
    }
    if (!$r_flag) {
	print INDATA_TEMP $value;
    }
    print INDATA_TEMP "\n";
}

# First part completed, so close all files

close(INDATA);
close(INDATA_TEMP);

print "Finished processing DATA file\n";
print "Converted test data file written as $temp_test_input_file\n";
print "Process complete.\n\n";

#####################################3




# Calls the SVMTest program


if($r_flag) {
    $commandline="svm_test -ae -am -oa $temp_output_file  $model_input_file  $temp_test_input_file";
    print "Testing in regression mode\n";
}
else {
    if(@names==2) {
	$commandline="svm_test -ae -am -oa $temp_output_file  $model_input_file  $temp_test_input_file";
	print "Two classes found - standard mode used\n";
    }
    else {
	$commandline="svm_test -ae -am -multi -oa $temp_output_file  $model_input_file  $temp_test_input_file";
	print "More than two classes found - multiclass mode used\n";
    }
}


print "Executing SMVTest....\n\n";
print "Using command line:\n";
print "$commandline";
print "\n\n";

if ((system($commandline) >> 8) != 0) {
    print "\n\nSVMTest program execution failed! Exiting...\n";
    exit;
}


print "\nSVMTest program execution complete\n\n";


if (!$r_flag) {
    print "Begin conversion of the output file...\n";
    
    
    open(TEMPOUTDATA,  $temp_output_file)  or (print "Can't open $temp_output_file: $!\n" and exit);
    open(OUTDATA, ">$output_file") or (print "Can't open $output_file: $!\n" and exit);
    
    
    foreach $line (<TEMPOUTDATA>) {
	chop $line;
	print OUTDATA translate($line)."\n";
    }
    
    close(TEMPOUTDATA);
    close(OUTDATA);
    
}

if ($r_flag) {
    system("mv $temp_output_file $output_file");
    $d_flag=1;
    print ("Regression mode - no conversion needed. Renaming $temp_output_file to $output_file\n\n");
}

print "Done\n";
print "\nProcess complete.\n";



if (!$d_flag) {
    system("rm -f $temp_output_file");
    system("rm -f $tempstem.data $tempstem.names");
    print "Temporary files deleted\n";
}


sub translate {
    $value=@_[0];
    if(@names==2) {
	if ($value<0) {
	    $index=0;
	}
	else {
	    $index=1;
	}
    }
    else {
	$index=$value;
    }
    return @names[$index];
}


sub getindex {
    for($i=0; $i<$NR_CLASSES; $i++) {
	if($_[0] eq $names[$i]) {
	    return $i;
	}
    }
    return -1;
}













