#!/usr/bin/perl -w 
#This converts a phased eigenstrat file with data for a single chromosome and
# a correspg SNP file and makes separate Hapmix format SNP and genotype files for a particular chr and all samples

$infile = $ARGV[0];
$snpfile = $ARGV[1];
$chr = $ARGV[2];
$admixpop = $ARGV[3];
$genotype =$ARGV[4];

if(($#ARGV +1)!= 5)
  {
    print "WRONG USAGE: perl convert_phasedeghapmix_chrsamp.pl ingenotypefile insnpfile chr_num admixpop genotype\n";
    exit;
  }


open(FILEONE ,$infile)||die("File $infile not found");
open(SNPFILE ,$snpfile)||die("File $snpfile not found");

@row= <SNPFILE>;
$nsnps = scalar @row;
@row1= <FILEONE>;
$lengeno = scalar @row1;
if($lengeno != $nsnps)          
  {                           
    print "ERROR: Admixed $infile and $snpfile files are not of the same length\t", $lengeno,"\t", $nsnps,"\n";  
    exit;  
  }   

for($i = 0; $i < scalar @row; $i++)
  {
    chomp($row[$i]);
    @data = split(' ',$row[$i]);
    $physpos[$i] = $data[3];
  }

$snpoutfile = "$admixpop.SNPOUT.$chr"; 
open(SNPOUTFILE ,">$snpoutfile")||die("Unable to make file $snpoutfile");   
       
print SNPOUTFILE ":sites:".$nsnps,"\n"; 
for ($i = 0; $i < $nsnps; $i++)
  { 
    print SNPOUTFILE $physpos[$i]," "; 
  } 
print SNPOUTFILE "\n"; 
close SNPOUTFILE; 
print "Finished writing Admixed SNP file for CHR NUM $chr\n"; 


for($j=0;$j<$lengeno;$j++)
  {
    chomp($row1[$j]);
    @data = split('',$row1[$j]);
    $nind = scalar @data;
    for($i=0; $i<scalar @data; $i++)
      {
	$geno[$i][$j] = $data[$i];

      }
  }
print "Finished reading admixed file for CHR NUM $chr\n";

for($i = 0; $i < $nind;$i++)
  {

    $genofile = "$admixpop.$i.$chr";
    open(GENOFILE ,">$genofile")||die("Unable to make file $genofile");  
      
    if($genotype == 1)
      {
	print GENOFILE ":sequences:2","\n";
      }
    elsif($genotype == 0)
      {
	print GENOFILE ":sequences:1","\n";
      }
    
    for($j=0;$j < $nsnps;$j++)
      {
	if($geno[$i][$j] == 9)
	  {
	    print GENOFILE '?';
	  }
	else
	  {
	    print GENOFILE $geno[$i][$j];
	  }
      }
    print GENOFILE "\n";
    close GENOFILE;
  }
    
print "Done writing admixed data files for CHR NUM $chr\n";


close FILEONE;
close SNPFILE;

