#!/usr/bin/perl -w 
#This will take as input user parameter file and convert it to Simon style parameter file
#use strict;

$paramfile = $ARGV[0];
$outparamfile = "$ARGV[0].Hapmix.out";

if(($#ARGV+1) != 1)
  {
    print ("WRONG USAGE: Usage runHapmix.pl paramfile\n");
    exit;
  }

open(PARAMFILE,$paramfile) ||die("File $paramfile not found");

@rowp = <PARAMFILE>;
for($i = 0; $i < scalar @rowp; $i++)
  {
    chomp($rowp[$i]);
    if($rowp[$i] =~ /#/)
      {
	next;
      }
    @data = split(':',$rowp[$i]);
    if(scalar @data ==0)
      {
	next;
      }
    if(scalar @data == 2)
      {
	@val = split(' ',$data[1]);
	if(scalar @val == 1)
	  {
	    $data[1] =~ s/\s//g;
	  }
	$paramdata{$data[0]} = $data[1];
      }
  }

my(%param_key) = ( 'GENOTYPE'=>2, 
		   'OUTPUT_SITES'=>39, 
 		   'SITE_POSITIONS'=>6, 
		   'RECOMBINATION_VALS'=>106, 
		   'MUTATION_VALS'=>107, 
		   'MISCOPYING_VALS'=>108);


my(%param_file) = ('REFPOP1GENOFILE'=>'REFPOP1GENOFILE',  
                   'REFPOP2GENOFILE'=>'REFPOP2GENOFILE',  
                   'REFPOP1SNPFILE'=>'REFPOP1SNPFILE',  
                   'REFPOP2SNPFILE'=>'REFPOP2SNPFILE',  
                   'ADMIXSNPFILE'=>'ADMIXSNPFILE',  
                   'ADMIXGENOFILE'=>'ADMIXGENOFILE',  
                   'REF1LABEL'=>'REF1LABEL',  
                   'REF2LABEL'=>'REF2LABEL',
		   'RATESFILE'=>'RATESFILE',
		   'CHR'=>'CHR',
		   'OUTDIR'=>'OUTDIR',
		   'THETA'=>'THETA',
		   'LAMBDA'=>'LAMBDA',
		   'HAPMIX_MODE'=>'HAPMIX_MODE',
		   'OUTPUT_DETAILS'=>'OUTPUT_DETAILS',
		   'THRESHOLD'=>'THRESHOLD',
		  'ADMIXPOP'=>'ADMIXPOP',
		  'KEEPINTFILES'=>'KEEPINTFILES'); 
 

open(OUTPARAMFILE,">$outparamfile") || die("Unable to make file $outparamfile");

#Check the values and set defaults here:
if(!(exists $paramdata{'GENOTYPE'}))
  {
    print "WARNING Setting GENOTYPE parameter to 1, since not specified\n";
    $paramdata{'GENOTYPE'} = 1;
  }
if(!(exists $paramdata{'OUTPUT_SITES'}))
  {
    print "WARNING Setting OUTPUT_SITES parameter to 0, since not specified\n";
    $paramdata{'OUTPUT_SITES'} = 0;
  }
if(!(exists $paramdata{'SITE_POSITIONS'}))
  {
    print "WARNING Setting SITE_POSITIONS parameter, since not specified\n";
    $paramdata{'SITE_POSITIONS'} = "1 1000000000";
  }
if(!(exists $paramdata{'RECOMBINATION_VALS'}))
  {
    print "ERROR Please set the parameter RECOMBINATION_VALS\n";
    exit;
  }
if(!(exists $paramdata{'MUTATION_VALS'}))
  {
    print "ERROR Please set the parameter MUTATION_VALS\n";
    exit;
  }
if(!(exists $paramdata{'MISCOPYING_VALS'}))
  {
    print "ERROR Please set the parameter MISCOPYING_VALS\n";
    exit;
  }
if(!(exists $paramdata{'REFPOP1GENOFILE'}))
  {
    print "ERROR Please specify the input file REFPOP1GENOFILE\n";
    exit;
  }
if(!(exists $paramdata{'REFPOP2GENOFILE'}))
  {
    print "ERROR Please specify the input file REFPOP2GENOFILE\n";
    exit;
  }
if(!(exists $paramdata{'REFPOP1SNPFILE'}))
  {
    print "ERROR Please specify the input file REFPOP1SNPFILE\n";
    exit;
  }
if(!(exists $paramdata{'REFPOP2SNPFILE'}))
  {
    print "ERROR Please specify the input file REFPOP2SNPFILE\n";
    exit;
  }
if(!(exists $paramdata{'ADMIXSNPFILE'}))
  {
    print "ERROR Please specify the input file ADMIXSNPFILE\n";
    exit;
  }
if(!(exists $paramdata{'ADMIXGENOFILE'}))
  {
    print "ERROR Please specify the input file ADMIXGENOFILE\n";
    exit;
  }
if(!(exists $paramdata{'RATESFILE'}))
  {
    print "ERROR Please specify the input file RATESFILE\n";
    exit;
  }
if(!(exists $paramdata{'CHR'}))
  {
    print "ERROR Please specify the chromosome num\n";
    exit;
  }
if(!(exists $paramdata{'OUTDIR'}))
  {
    print "ERROR Please specify the directory for output files\n";
    exit;
  }
if(!(exists $paramdata{'THETA'}))
  {
    print "ERROR Please specify the parameter THETA\n";
    exit;
  }
if(!(exists $paramdata{'LAMBDA'}))
  {
    print "ERROR Please specify the parameter LAMBDA\n";
    exit;
  }
if(!(exists $paramdata{'HAPMIX_MODE'}))
  {
    $paramdata{'HAPMIX_MODE'} = 'LOCAL_ANC';
  }
if(!(exists $paramdata{'OUTPUT_DETAILS'}))
  {
    if(($paramdata{'HAPMIX_MODE'} eq 'LOCAL_ANC') || ($paramdata{'HAPMIX_MODE'} eq 'DIPLOID') || 
       ($paramdata{'HAPMIX_MODE'} eq 'HAPLOID'))
      {
	$paramdata{'OUTPUT_DETAILS'} = 'PROB';
      }
    elsif($paramdata{'HAPMIX_MODE'} eq 'SAMPLE_RANDOM_PATHS')
      {
	$paramdata{'OUTPUT_DETAILS'} = 'ANC_INT_SAMPLED';
      }
}
if(!(exists $paramdata{'THRESHOLD'}))
  {
    $paramdata{'THRESHOLD'} = 0.0;
  }
if(!(exists $paramdata{'ADMIXPOP'}))
  {
    print "ERROR Please specify the parameter ADMIXPOP\n";
    exit;
  }
if(!(exists $paramdata{'REF1LABEL'}))
  {
    $paramdata{'REF1LABEL'} = $paramdata{'ADMIXPOP'}.".REF1";
  }
if(!(exists $paramdata{'REF2LABEL'}))
  {
    $paramdata{'REF2LABEL'} = $paramdata{'ADMIXPOP'}.".REF2";
  }
if(!(exists $paramdata{'KEEPINTFILES'}))
  {
    print "WARNING Setting KEEPINTFILES parameter to 0, since not specified\n";
    $paramdata{'KEEPINTFILES'} = 0;
  }

foreach $key(keys %paramdata) 
  { 
    if($key eq 'THETA')
      {
          $theta = $paramdata{$key};
	  }

    if($key eq 'LAMBDA')
      {
	
	if($paramdata{$key} <= 1)
	  {
	    print "ERROR: Please choose a valid value of lambda greater than 1\n";
	    exit;
	  }

	$lambda = ($paramdata{$key})/100;
      }
#Extract the Output format:
    if($key eq 'HAPMIX_MODE')
      {
	if($paramdata{'GENOTYPE'} ==1)
	  {
	    if($paramdata{$key} eq 'LOCAL_ANC')
	      {
		$paramout{38} = 5;
	      }
	    elsif($paramdata{$key} eq 'SAMPLE_RANDOM_PATHS')
	      {
		$paramout{38} =2 ;
	      }
	    elsif($paramdata{$key} eq 'DIPLOID')
	      {
		$paramout{38} = 1;
	      }
	    elsif($paramdata{$key} eq 'HAPLOID')
	      {
		print "ERROR: Cannot have Haploid output mode for genotype run, please change one of them\n";
		exit;
	      }
	  }
	elsif($paramdata{'GENOTYPE'} ==0) 
          { 
            if(($paramdata{$key} eq 'LOCAL_ANC') || ($paramdata{$key} eq 'SAMPLE_RANDOM_PATHS') || ($paramdata{$key} eq 'DIPLOID')) 
              { 
		print "ERROR: Cannot have the mode specified for haplotype run, please change one of them\n"; 
                exit; 

              } 
	  }
      }
	
#Chromsome 
    if($key eq 'CHR')
      {
	if($paramdata{$key} eq 'X')
	  {
	    $paramdata{$key} = 23;
	  }
      }

    if(exists $param_key{$key})
      {
	$paramout{$param_key{$key}} = $paramdata{$key};
      }
    elsif(exists $param_file{$key})
      {
	next;
      }
    else
      {
	print "Unknown parameter please check ",$key,"\n";
      }
    
  }

foreach $key(sort keys %paramout)
  {
    print OUTPARAMFILE ":",$key,":",$paramout{$key},"\n";
  }

print OUTPARAMFILE ":104:",$lambda*(1.0-$theta)," ",$lambda*(1.0-$theta),"\n";
print OUTPARAMFILE ":105:",$lambda*$theta," ",$lambda*$theta,"\n"; 

#Hard coded values depending on genotype/haploid mode
if($paramdata{'GENOTYPE'} == 1)
  {
    print OUTPARAMFILE ":33:1\n";
    print OUTPARAMFILE ":37:1 0.01\n";
    print OUTPARAMFILE ":40:1\n";
    
  }
elsif($paramdata{'GENOTYPE'} == 0)
  {
    print OUTPARAMFILE ":33:0\n";
    print OUTPARAMFILE ":37:0\n";
    print OUTPARAMFILE ":40:0\n";

  }

#Setting the following parameters:
#SIMULATE 
print OUTPARAMFILE ":1:0\n";
#SEPARATE_FILES
print OUTPARAMFILE ":41:1\n";
#TRIM_LEND , TRIM_REND
print OUTPARAMFILE ":3: 1\n";          
print OUTPARAMFILE ":4: 1000000000\n"; 
#PHASED
print OUTPARAMFILE ":110:1\n";    
#NUM_SIMULATIONS
print OUTPARAMFILE ":101:1\n";         
#THETA1S, THETA2S
print OUTPARAMFILE ":12: 10 0.00005 0.0001 0.0005 0.001 0.005 0.001 0.05 0.1 0.5 1\n";
print OUTPARAMFILE ":13: 10 0.00005 0.0001 0.0005 0.001 0.005 0.01 0.05 0.1 0.5 1\n";
#ORIG_SEED
print OUTPARAMFILE ":100:5466565\n";    
#MISSING_PROB
print OUTPARAMFILE ":109:0\n";
   
close OUTPARAMFILE;


$inadmixgenofile = $paramdata{'ADMIXGENOFILE'};
$inadmixsnpfile = $paramdata{'ADMIXSNPFILE'};
$inparent1genofile = $paramdata{'REFPOP1GENOFILE'};
$inparent1snpfile = $paramdata{'REFPOP1SNPFILE'};
$inparent2genofile = $paramdata{'REFPOP2GENOFILE'};
$inparent2snpfile = $paramdata{'REFPOP2SNPFILE'};
$parent1label = $paramdata{'REF1LABEL'};
$parent2label = $paramdata{'REF2LABEL'};
$admixpop = $paramdata{'ADMIXPOP'};

print "perl convert_phasedeghapmix_chrsamp.pl $inadmixgenofile $inadmixsnpfile $paramdata{'CHR'} $admixpop $paramdata{'GENOTYPE'}\n";
`perl bin/convert_phasedeghapmix_chrsamp.pl $inadmixgenofile $inadmixsnpfile $paramdata{'CHR'} $admixpop $paramdata{'GENOTYPE'}`;
if(($?>>8) != 0)
  {
    print "Error in creating Hapmix format admixed files\n";
    exit;
  }

$admixsnpfile = "$admixpop.SNPOUT.$paramdata{'CHR'}";


open(GENOFILE,$inadmixgenofile) ||die("File $inadmixgenofile not found");
$row = <GENOFILE>;
chomp($row);
@data = split('',$row);
$nadmix = scalar @data;

print "perl convert_phasedeghapmix_chr.pl $inparent1genofile $inparent1snpfile $parent1label $paramdata{'CHR'} $admixpop\n";
`perl bin/convert_phasedeghapmix_chr.pl $inparent1genofile $inparent1snpfile $parent1label $paramdata{'CHR'} $admixpop`;
if(($?>>8) != 0)
  {
    print "Error in creating Hapmix format reference P1 files\n";
    exit;
  }

$ref1file = $admixpop.".".$parent1label."GENOOUT.$paramdata{'CHR'}";
#print "Parent1 File length $out\n";

print "perl convert_phasedeghapmix_chr.pl $inparent2genofile $inparent2snpfile $parent2label $paramdata{'CHR'} $admixpop\n";
`perl bin/convert_phasedeghapmix_chr.pl $inparent2genofile $inparent2snpfile $parent2label $paramdata{'CHR'} $admixpop`;
if(($?>>8) != 0)
  {
    print "Error in creating Hapmix format reference P2 files\n";
    exit;
  }
$ref2file = $admixpop.".".$parent2label."GENOOUT.$paramdata{'CHR'}"; 

print "Finished creating genotype files to run Hapmix\n";

print "Starting Hapmix Runs\n";

$runstyle = $paramdata{'HAPMIX_MODE'};
print $runstyle,"\n";
if($runstyle eq 'LOCAL_ANC')
  {
    $runstyle = 'LOCALANC';
  }
elsif($runstyle eq 'SAMPLE_RANDOM_PATHS')
  {
    $runstyle = 'SAMPRANPATH';
  }


$chr = $paramdata{'CHR'};
$outdir = $paramdata{'OUTDIR'};
$ratesfile =$paramdata{'RATESFILE'};
print $ref1file,"\t",$ref2file,"\t",$admixsnpfile,"\n";

for($n=0; $n<$nadmix; $n++) 
  { 
    
    $command = "bin/admix2 "; 
    $command .= "$outparamfile ";                        
    $command .= "$ref1file "; 
    $command .= "$ref2file ";   
    $command .= "$admixsnpfile "; 
    $command .= "$admixpop.$n.$chr ";                       
    $command .= " $ratesfile "; 
    $command .= "$outdir/$admixpop.$runstyle.$n.$chr ";
    $command .= "> $outdir/run.$admixpop.$runstyle.$n.$chr ";
    print("$command\n"); 
#      system("bsub -o temp.out -q all_2h $command"); 
    `$command`; 
    
#    if(($?>>8) == 0)
#      {
#	print "Finished running Hapmix successfully for sample $n\n";
#      }
#    else
#      {
#	print "Error in running Hapmix for sample $n\n";
#	exit;
  
#      }
} 


print "Finished running Hapmix for all sample, processing output files\n";
#Handle the output

$likefile = "$outdir/loglhood.$admixpop.$runstyle.$chr";

for($n=0; $n<$nadmix; $n++) 
  { 
    `grep Likelihood $outdir/run.$admixpop.$runstyle.$n.$chr >> $likefile`;
    if($paramdata{'KEEPINTFILES'} == 0)
      {
	`rm $outdir/run.$admixpop.$runstyle.$n.$chr`;
      }
} 

if($paramdata{'HAPMIX_MODE'} eq 'LOCAL_ANC')
  {
    $details = $paramdata{'OUTPUT_DETAILS'};
    if($details eq 'PROB')
      {
	print "Look at the output files in $paramdata{'OUTDIR'}\n";
      }
    elsif(($details eq 'ANC_INT_THRESH') || ($details eq 'ANC_INT_ROUND') || ($details eq 'ANC_EXPECTED'))
      {
	`perl bin/extract_output.pl $paramdata{'OUTDIR'} $nadmix $paramdata{'CHR'} $details $paramdata{'THRESHOLD'} $admixpop`;
	if(($?>>8) == 0)
	  {
	    print "Look at the output files in $paramdata{'OUTDIR'}\n";
	    if($paramdata{'KEEPINTFILES'} == 0)
	      {
		for($n=0;$n < $nadmix;$n++)
		  {
		    `rm $outdir/$admixpop.$runstyle.$n.$chr`;
		  }
	      }
	  }
	else
	  {
	    print "Error in creating output files\n";
	    exit;
	  }
      }
    else
      {
	print "Please check your OUTPUT_DETAILS parameter value\n";
	exit;
      }
  }
elsif($paramdata{'HAPMIX_MODE'} eq 'SAMPLE_RANDOM_PATHS')
  {
    $details = $paramdata{'OUTPUT_DETAILS'};
    print $details,"\n";
    if(($details eq 'ANC_INT_SAMPLED') || ($details eq 'HAPLOID_FILES'))
      {
	`perl bin/extract_output2.pl $paramdata{'OUTDIR'} $nadmix $paramdata{'CHR'} $details $admixpop`;
	if(($?>>8) == 0)
	  {
	    print "Look at the output files in $paramdata{'OUTDIR'}\n";
	    if($paramdata{'KEEPINTFILES'} == 0)
	      {
		for($n=0;$n<$nadmix;$n++)
		  {
		    `rm $outdir/$admixpop.$runstyle.$n.$chr`;
		  }
	      }
	  }
	else
	  {
	    print "Error in creating output files\n";
	    exit;
	  }
      }
    else
      {
	print "Please check your OUTPUT_DETAILS parameter value\n";
	exit;
      }
  }
elsif($paramdata{'HAPMIX_MODE'} eq 'DIPLOID')                   
  {                                                                                         
    $details = $paramdata{'OUTPUT_DETAILS'};                            
    if($details eq 'PROB')
      {
	print "Look at the output files in $paramdata{'OUTDIR'}\n";
      }
    elsif($details eq 'HAPLOID_FILES')
      {
	`perl bin/extract_output3.pl $paramdata{'OUTDIR'} $nadmix $paramdata{'CHR'} $paramdata{'THRESHOLD'} $admixpop`;
	if(($?>>8) == 0)
	  {
	    print "Look at the output files in $paramdata{'OUTDIR'}\n";
	    if($paramdata{'KEEPINTFILES'} == 0)
	      {
		for($n=0;$n<$nadmix;$n++)
		  {
		    `rm $outdir/$admixpop.$runstyle.$n.$chr`;
		  }
	      }
	  }
	else
	  {
	    print "Error in creating output files\n";
	    exit;
	  }
      }
    else
      {
	print "Please check your OUTPUT_DETAILS parameter value\n";
	exit;
      }
  }
elsif($paramdata{'HAPMIX_MODE'} eq 'HAPLOID')                   
  {
    $details = $paramdata{'OUTPUT_DETAILS'};
    if($details eq 'PROB')
      {
	print "Look at the output files in $paramdata{'OUTDIR'}\n";
      }
    elsif(($details eq 'ANC_PROB') || ($details eq 'ANC_INT_THRESH') || ($details eq 'ANC_INT_ROUND') || ($details eq 'HAPLOID_FILES'))
      {
	print $paramdata{'HAPMIX_MODE'},"\t",$details,"\n";
	`perl bin/extract_outputhap.pl $paramdata{'OUTDIR'} $nadmix $paramdata{'CHR'} $details $paramdata{'THRESHOLD'} $admixpop`;
	if(($?>>8) == 0)
	  {
	    print "Look at the output files in $paramdata{'OUTDIR'}\n";
	    if($paramdata{'KEEPINTFILES'} == 0)
	      {
		for($n=0;$n<$nadmix;$n++)
		  {
		    `rm $outdir/$admixpop.$runstyle.$n.$chr`;
		  }
	      }
	  }
	else
	  {
	    print "Error in creating output files\n";
	    exit;
	  }
      }
    else
      {
	print "Please check your OUTPUT_DETAILS parameter value\n";
	exit;
      }
  }

#Clean Up all intermediate files
`rm $outparamfile`;
`rm $admixsnpfile`;
`rm $ref1file`;
`rm $ref2file`;
for($i=0;$i<$nadmix;$i++)
  {
    
    $admixfile ="$admixpop.$i.$paramdata{'CHR'}";
    `rm $admixfile`;
    
  }
