

//////////////////////////////////////////////////////////////////
//                                                              //
//           PLINK (c) 2005-2006 Shaun Purcell                  //
//                                                              //
// This file is distributed under the GNU General Public        //
// License, Version 2.  Please see the file COPYING for more    //
// details                                                      //
//                                                              //
//////////////////////////////////////////////////////////////////


#include <iostream>

#include "plink.h"
#include "helper.h"
#include "options.h"
#include "crandom.h"

using namespace std;

//////////////////////////////////////////////////////////////////////
// A simple routine to simulate a dataset of unlinked case/control SNPs

class SimParameters
{
public:
  int nsnp;
  double lfreq;
  double ufreq;
  double odds;
  double missing;
  string name;

  SimParameters()
  {
    name = "";
    nsnp = 0;
    missing = 0.00;
    lfreq = ufreq = odds = 0;
  }
};


vector_t instanceSNP(SimParameters & s)
{

  vector_t freqs(3);

  // Calculate actual population allele frequency for this SNP

  double freq = s.lfreq + CRandom::rand() * ( s.ufreq - s.lfreq ) ;

  // Get case and control allele frequencies given GRR, population frequency and disease frequency

  double f0, f1, f2;
  double g0 = freq * freq;
  double g1 = 2 * freq * ( 1-freq );
  double g2 = 1 - g0 - g1;

  f2 = par::simul_prevalence / ( g0 * s.odds * s.odds + g1 * s.odds + g2 );
  f0 = f2 * s.odds * s.odds;
  f1 = f2 * s.odds;

  // P(G|X)

  double d0 = g0 * f0;
  double d1 = g1 * f1;
  double d2 = g2 * f2;
  double dS = d0 + d1 + d2;
  d0 /= dS;
  d1 /= dS;
  d2 /= dS;

  double u0 = g0 * (1-f0);
  double u1 = g1 * (1-f1);
  double u2 = g2 * (1-f2);
  double uS = u0 + u1 + u2;
  u0 /= uS;
  u1 /= uS;
  u2 /= uS;

  freqs[0] = freq;
  freqs[1] = d0 + d1/2.0;
  freqs[2] = u0 + u1/2.0;

  return freqs;
}


void Plink::simulateSNPs()
{

  // Read in SNP parameters

  // Number of SNPs
  // Lower allele frequency for '1' (versus '2') allele
  // Upper allele frequency (population)
  // Odds ratio ('1' allele)

  checkFileExists(par::simul_file);
  printLOG("Reading simulation parameters from [ " 
	   + par::simul_file + " ]\n");
  
  printLOG("Writing SNP population frequencies to [ " 
	   + par::output_file_name + ".simfreq ]\n");
  
  ofstream SOUT( ( par::output_file_name+".simfreq").c_str(), ios::out);
  
  ifstream SIM;
  SIM.open( par::simul_file.c_str(), ios::in );
  
  vector<SimParameters> sp;

  while ( ! SIM.eof() )
    {
      SimParameters s;

      SIM >> s.nsnp
	  >> s.name
	  >> s.lfreq 
	  >> s.ufreq 
	  >> s.odds;
      
      if ( SIM.eof() )
	continue;

      sp.push_back(s);

    }

  SIM.close();


  ////////////////////////////////////////////
  // Make room for total number of SNPs, etc
  
  int tsnp = 0;
  
  for (int s=0; s<sp.size(); s++)
    tsnp += sp[s].nsnp;
  
  int nind = par::simul_ncases + par::simul_ncontrols;
      
  printLOG("Read " + int2str( sp.size() ) + " sets of SNPs, specifying " + int2str(tsnp) + " SNPs in total\n");
  printLOG("Simulating " + int2str( par::simul_ncases ) + " cases and " + int2str( par::simul_ncontrols ) + " controls\n");
  printLOG("Assuming a disease prevalence of " + dbl2str( par::simul_prevalence ) + "\n\n");

  int pos = 0;

  for (int s=0; s<sp.size(); s++)  
    for (int l=0;l<sp[s].nsnp;l++)
    {
      Locus * loc = new Locus;
      
      // Optionally add
      if ( sp[s].nsnp > 1 ) 
	loc->name = sp[s].name+"_"+int2str(l);
      else
	loc->name = sp[s].name;

      loc->chr = 1;
      loc->allele1 = "A";
      loc->allele2 = "B";
      loc->bp = ++pos; 
      loc->pos = 0;

      locus.push_back(loc);

      CSNP * newset = new CSNP;
      
      newset->one.resize(nind);
      newset->two.resize(nind);

      // Sample case and control population allele frequencies

      // Contains population, case, control frequencies

      vector_t f = instanceSNP(sp[s]);      
      
      SOUT << 1 << " "
	   << loc->name << "\t"
	   << f[0] << " " << f[0] << "\t" 
	   << sp[s].odds << "\n";

      for ( int i = 0 ; i < nind ; i++ ) 
	{

	  // Simple missingness

	  if ( CRandom::rand() < sp[s].missing ) 
	    {
		  newset->one[i] = true;
		  newset->two[i] = false;
	    }
	  else
	    {
	      
	      bool isCase = i < par::simul_ncases ? true : false;
	      
	      int g = 0;
	      if ( isCase ) 
		{
		  if (CRandom::rand() > f[1] )
		    g++;
		  if (CRandom::rand() > f[1] )
		    g++;
		}
	      else
		{
		  if (CRandom::rand() > f[2] )
		    g++;
		  if (CRandom::rand() > f[2] )
		    g++;
		}
	      
	      
	      if ( g == 0 ) 
		{
		  newset->one[i] = false;
		  newset->two[i] = false;
		}
	      else if ( g == 1 ) 
		{
		  newset->one[i] = false;
		  newset->two[i] = true;
		}
	      else
		{
		  newset->one[i] = true;
		  newset->two[i] = true;
		}
	      
	    }
	}
      
      SNP.push_back(newset);
    }
  
  
  // Phenotypes

  for (int i=0;i<nind;i++)
    {
      Individual * person = new Individual;
      person->fid = person->iid = "per"+int2str(i);
      person->missing = false;
      person->pat = "0";
      person->mat = "0";

      if ( i < par::simul_ncases ) 
	person->phenotype = 2;
      else
	person->phenotype = 1;

      person->sex = false;
      person->sexcode = "2";
      sample.push_back(person);      
    }
  
  SOUT.close();


}


// void Plink::sibgen()
// {

//   const int MaxFam = 1000000;
//   const int MaxSibSize = 10; // 11 sibs
  
//   // PARAMETERS...  
  
//   // each family 50:50 [A]/[B]
//   // each parent 50:50 [1]:[2], other parent is opposite

//   // allele freqs
//   double pA, qA, pB, qB;
  
//   // basic kid VCs
//   double Vpoly, Vc, Vn;
//   double a, d;
  
//   int N, SibSize;
//   int pat1, pat2, mat1, mat2;
//   double g22, g12, g11;
//   double mg22, mg12, mg11;
//   double pat_pheno, mat_pheno;
  
//   int g[MaxSibSize][3];
//   int g2[MaxSibSize][3];
//   double gv[MaxSibSize];

//   double resC, resPoly[MaxSibSize], resN[MaxSibSize];
//   double pheno[MaxSibSize];
//   double qt[MaxSibSize];
//   double qt_pat, qt_mat;
  
//   double tA, tB;
//   int numAffO;
//   int numAffP;

//   int i,j;
//   double u1, u2, z1, z2;

//   int mode_within, mode_kid;


//   // allele freq set, strata
//   cin >> pA;
//   qA = 1 - pA;


//   // genetic parameters (maybe allow these to differ between strata)
//   cin >> a >> d;
//   cin >> Vpoly >> Vc >>  Vn;



//   // sample sizes
//   cin >> N >> SibSize;

//   // disease threshold
//   cin >> tA >> tB;

//   // ascertainment (required nums of affected parents, offspring)
//   cin >> numAffO;
//   cin >> numAffP;

//   // C/C mode -- act slightly differently
//   double cc_mode = false;
//   if (numAffP<0 && numAffO<0) cc_mode=true;
//   // now numAffO == number of cases
//   // and numAffP == number of controls
 
//   bool exactly = false;
//   if (numAffP<0 && numAffO>=0) { exactly = true; numAffP = -numAffP; }

//   // genotypic scores; by strata and parent/offspring
//   // ASSUME: no strata diffs if there is a QTL...
//   g22 =  a - (a*(pA-qA)+2*pA*qA*d);
//   g12 =  d - (a*(pA-qA)+2*pA*qA*d);
//   g11 = -a - (a*(pA-qA)+2*pA*qA*d);

//   // calculate total expected Variance (assumes genetic model same for A and B, if no strata effects)
//   // ASSUME: no strata diffs if there is a QTL...
//   double Vqtl = 2 * pA * qA * ( a + d*(qA-pA))*(a + d*(qA-pA)) + (2*pA*qA*d)*(2*pA*qA*d);
//   double Vres = 1 - Vqtl;

//   Vpoly *= Vres;
//   Vc *= Vres;
//   Vn *= Vres;

//   double V = Vqtl + Vpoly + Vc + Vn;


//   // build up requisite number of families

//   int count =0;

//  loop:

//   {
      
//       // Step 1. Generate family strata type (50:50 stratification)
      
//       int patAB = 1;
//       if (rand()/double(RAND_MAX) < 0.5 ) patAB = 2;

//       int matAB = 1;
//       if (mode_within==0) matAB = patAB;
//       else if (mode_within==2) 
// 	{
// 	  // force all families to be mixed
// 	  if (patAB==1) matAB=2;
// 	  else matAB=1;
// 	}
//       else if (rand()/double(RAND_MAX) < 0.5 ) matAB = 2;

                        
//       // Step 1. Generate Parental Mating Types
      
//       // q > 1
//       // p > 2 : increaser allele

//       pat1 = 1;
//       pat2 = 1;
//       mat1 = 1;
//       mat2 = 1;

//       double ppat, pmat;

//       if (patAB==1) ppat = pA;
//       else ppat = pB;
      
//       if (matAB==1) pmat = pA;
//       else pmat = pB;

//       if (rand()/double(RAND_MAX) < ppat) pat1 = 2;
//       if (rand()/double(RAND_MAX) < ppat) pat2 = 2;

//       if (rand()/double(RAND_MAX) < pmat) mat1 = 2;
//       if (rand()/double(RAND_MAX) < pmat) mat2 = 2;


//       // Step 2. Generate Offspring Genotypes

//       for (i=1; i<=SibSize; i++)
// 	{
	  
// 	  if (rand()/double(RAND_MAX) < 0.5) 
// 	    {
// 	      g[i][1] = pat1;
// 	      g2[i][1] = 1;
// 	    }
// 	  else
// 	    {
// 	      g[i][1] = pat2;
// 	      g2[i][1] = 2;
// 	    }


// 	  if (rand()/double(RAND_MAX) < 0.5) 
// 	    {
// 	      g[i][2] = mat1;
// 	      g2[i][2] = 3;
// 	    }
// 	  else
// 	    {
// 	      g[i][2] = mat2;
// 	      g2[i][2] = 4;
// 	    }
//     }


//       // paternal genotypic score
//       double gv_pat, gv_mat;
//       if ( (pat1 == 2) && (pat2 == 2) ) gv_pat = g22;
//       if ( (pat1 == 1) && (pat2 == 2) ) gv_pat = g12;
//       if ( (pat1 == 2) && (pat2 == 1) ) gv_pat = g12;
//       if ( (pat1 == 1) && (pat2 == 1) ) gv_pat = g11;
      
//       // paternal genotypic score
//       if ( (mat1 == 2) && (mat2 == 2) ) gv_mat = g22;
//       if ( (mat1 == 1) && (mat2 == 2) ) gv_mat = g12;
//       if ( (mat1 == 2) && (mat2 == 1) ) gv_mat = g12;
//       if ( (mat1 == 1) && (mat2 == 1) ) gv_mat = g11;



//       // Step 3. Generate Offspring genetic values conditional on genotypes
      
//       for (i=1; i<=SibSize; i++)
//       {
// 	  if ( (g[i][1] == 2) && (g[i][2] == 2) ) gv[i] = g22;
// 	  if ( (g[i][1] == 1) && (g[i][2] == 2) ) gv[i] = g12;
// 	  if ( (g[i][1] == 2) && (g[i][2] == 1) ) gv[i] = g12;
// 	  if ( (g[i][1] == 1) && (g[i][2] == 1) ) gv[i] = g11;
//       }

      
//       // Generate PATERNAL & MATERNAL POLYGENES
//       u1 = rand() / double(RAND_MAX);
//       u2 = rand() / double(RAND_MAX);
//       z1 = sqrt(-2*log(u1)) * cos(2*M_PI*u2);
//       z2 = sqrt(-2*log(u1)) * sin(2*M_PI*u2);
      

//       double patA = z1;
//       double matA = z2;

//       double patPoly = patA * sqrt(Vpoly);
//       double matPoly = matA * sqrt(Vpoly);

//       // Generate offspring Polygenic influence
//       for (i=1; i<=SibSize; i++)
//       {
// 	  u1 = rand() / double(RAND_MAX);
// 	  u2 = rand() / double(RAND_MAX);
// 	  z1 = sqrt(-2*log(u1)) * sin(2*M_PI*u2);
	  
// 	  resPoly[i]  = z1/sqrt(2.0) + (patA+matA)/sqrt(4.0);
// 	  resPoly[i]  *= sqrt(Vpoly);
// 	}

      
//       // Generate FAMILY WIDE SHARED ENV..

//       u1 = rand() / double(RAND_MAX);
//       u2 = rand() / double(RAND_MAX);
//       z1 = sqrt(-2*log(u1)) * cos(2*M_PI*u2);
      
//       resC = z1 * sqrt(Vc);
 


// // Step 5. Generate nonshared variable Vn
// //         mean 0 variance Vn

//       u1 = rand() / double(RAND_MAX);
//       u2 = rand() / double(RAND_MAX);
//       z1 = sqrt(-2*log(u1)) * cos(2*M_PI*u2);
//       z2 = sqrt(-2*log(u1)) * sin(2*M_PI*u2);

//       double patN = z1 * sqrt(Vn);
//       double matN = z2 * sqrt(Vn);
      
//       for (i=1; i<=SibSize; i++)
//       {
// 	  u1 = rand() / double(RAND_MAX);
// 	  u2 = rand() / double(RAND_MAX);
// 	  z1 = sqrt(-2*log(u1)) * cos(2*M_PI*u2);
	  
// 	  resN[i] = z1 * sqrt(Vn);
//       }



  
// // Step 6. Calculate trait :  Pheno = Step 3 + Step 4 + Step 5
      
//       pat_pheno = qt_pat = (gv_pat + patPoly + resC + patN)/sqrt(V);
//       mat_pheno = qt_mat = (gv_mat + matPoly + resC + matN)/sqrt(V);

  
//       double pat_thresh = tA;
//       double mat_thresh = tA;

//       if (patAB==2) pat_thresh = tB;
//       if (matAB==2) mat_thresh = tB;

//       if (pat_pheno >= pat_thresh) pat_pheno=2; else pat_pheno=1;
//       if (mat_pheno >= mat_thresh) mat_pheno=2; else mat_pheno=1;
      
//       double rtmp = rand() / double(RAND_MAX);
      
//       double kid_thresh[MaxSibSize];

//       // what to do with the kids??? vis a vis stratificaiton effects??
//       for (i=1; i<=SibSize; i++)
// 	 {
// 	     pheno[i] = qt[i] = (gv[i] + resPoly[i] + resC + resN[i])/sqrt(V);
	     
// 	     // kid-stratification mode
// 	     // 1 = average of pat/mat
// 	     // 2 = random of pat/mat, same for each kid
// 	     // 3 = random of pat/mat, diff. for each kid
// 	     // 4 = largest of pat/mat
// 	     double kid_strat=0;
	     
// 	     // average
// 	     if (mode_kid==1) 
// 	     {
// 		 if (patAB==2)
// 		     kid_strat = 0.5;
// 		 if (matAB==2)
// 		     kid_strat += 0.5;
//       		 kid_thresh[i] = tA + kid_strat * (tB-tA);
// 	     }
		     

// 	     // random parent, within family
// 	     if (mode_kid==2)
// 	     {
//       		 if (rtmp > 0.5) kid_thresh[i] = pat_thresh;
// 		 else kid_thresh[i] = mat_thresh;
// 	     }
	     

// 	     // random parent, random kid
// 	     if (mode_kid==3) 
// 	     {
// 		 if (rand()/double(RAND_MAX) > 0.5) kid_thresh[i] = pat_thresh;
// 		 else kid_thresh[i] = mat_thresh;
// 	     }
	     
// 	     // largest (worst-case scenario)
// 	     if (mode_kid==4) 
// 	     {
// 	       // disease more common in A
// 	       if (tA<tB) { 
// 	         if ( patAB==1 || matAB==1 ) kid_thresh[i] = tA;
// 		 else kid_thresh[i] = tB;
// 	       }
// 	       else
// 		 { 
// 	         if ( patAB==2 || matAB==2 ) kid_thresh[i] = tB;
// 		 else kid_thresh[i] = tA;
// 		 }
// 	     }

// 	     // inherit minimum 
// 	     if (mode_kid==5) 
// 	     {
// 	       // disease more rare in A (i.e. higher threshold)
// 	       if (tA>tB) { 
// 	         if ( patAB==1 || matAB==1 ) kid_thresh[i] = tA;
// 		 else kid_thresh[i] = tB;
// 	       }
// 	       else
// 		 { 
// 	         if ( patAB==2 || matAB==2 ) kid_thresh[i] = tB;
// 		 else kid_thresh[i] = tA;
// 		 }
// 	     }


// 	     // dichotomize trait
// 	     if (pheno[i] >= kid_thresh[i]) pheno[i]=2; else pheno[i]=1;
   	     	     
// 	 }


//       ////////////////////////////////////////
//       // Do we meet ascertainment conditions?
      
//       if (!cc_mode)
// 	{
// 	  int cntAffP = 0;
// 	  int cntAffO = 0;
// 	  if (pat_pheno==2) cntAffP++;
// 	  if (mat_pheno==2) cntAffP++;
// 	  for (i=1; i<=SibSize;i++)
// 	    if (pheno[i]==2) cntAffO++;
	  
// 	  bool accept = false;

// 	  if (!exactly)
// 	    if (cntAffP>=numAffP && cntAffO>=numAffO) accept = true;
	  
// 	  if (exactly)
// 	    if (cntAffP==numAffP && cntAffO>=numAffO) accept = true;
	  
// 	  if (accept) { 
	    
// 	    // okay -- let's sampe this family
	    
// 	    count++;
	    
// 	    // parents
	    
// 	    cout << count 
// 		 << " 1 " // individual ID
// 		 << " 0 0 " // founder
// 		 << " 1 " // male
// 		 << " " << pat_pheno 
// 		 << " " << pat1 << " " << pat2 // genotypes
// 		 << "\n";
	    
// 	    cout << count
// 		 << " 2 " // individual ID
// 		 << " 0 0 " // founder
// 		 << " 2 " // male
// 		 << " " << mat_pheno
// 		 << " " << mat1 << " " << mat2 // genotypes
// 		 << "\n";
	    
// 	    for (i=1; i<=SibSize; i++)
// 	      {
// 		cout << count << " "
// 		     << i+2 // individual ID
// 		     << "  1 2 " // parents
// 		     << " 1 " // male
// 		     << " " << pheno[i]
// 		     << "  " << g[i][1] << " " << g[i][2] // genotypes
// 		     << "\n";
// 	      }
	    
// 	  } // end J loop
	
// 	  if (count<N) goto loop;
// 	}
      
      
//       // OR C/C MODE?
//       if (cc_mode) 
// 	{
// 	  count++;
// 	  if ((pheno[1]==2 && numAffO<0 ) || ( pheno[1]==1 && numAffP<0)) { 
	    
// 	    // okay -- let's sampe this family
	    
// 	    if (pheno[1]==2) numAffO++;
// 	    if (pheno[1]==1) numAffP++;
	    
// 	    cout << count << " " 
// 		 << 1 // individual ID
// 		 << "  0 0 " // parents
// 		 << " 1 " // male
// 		 << " " << pheno[1] // no quantitative trait ;
// 		 << " " << g[1][1] << " " << g[1][2] // genotypes
// 		 << "\n";
	      
// 	  }
// 	  if ((numAffO+numAffP)<0) goto loop;
// 	}
      
      

//       exit(0);
//   }
//       // Output
      
  
// }

//}
