/*
 * Written by Bastien Chevreux (BaCh)
 *
 * Copyright (C) 2012 and later by Bastien Chevreux
 *
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the
 * Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 *
 */


#include "contig.H"
#include "simple_2Dsignalprocessing.H"

using namespace std;



#define CEBUG(bla)   {cout << bla; cout.flush();}
std::pair<int32,int32> Contig::findBestPairConsistencyRange()
{
  FUNCSTART("void Contig::checkPairConsistency()");

  pair<int32,int32> ret(0,getContigLength());

  if(getContigLength()==0) return ret;

  bool runpeakfinder=false;
  vector<ReadGroupLib::ReadGroupID> rgtocheck;
  for(uint32 rgi=1; rgi<ReadGroupLib::getNumReadGroups(); ++rgi){
    auto rgid=ReadGroupLib::getReadGroupID(rgi);
    if(rgid.hasTemplateInfo()){
      rgtocheck.push_back(rgid);
      if(rgid.getInsizeTo()>=0 && rgid.getInsizeTo()<=1000) runpeakfinder=true;
    }
  }

  if(rgtocheck.size()){
    CEBUG("Need to check readgroups:\n");
    vector<uint8> breakmarker(getContigLength(),0);
    vector<vector<uint64> > spanner(rgtocheck.size());
    vector<uint64> tmpforstats;
    vector<vector<int32> > valleydepths(rgtocheck.size());
    vector<vector<int32> > peakheights(rgtocheck.size());

    auto spanner12=spanner;
    auto nonspanner=spanner;
    for(auto rgi=0; rgi<spanner.size(); ++rgi){
      priv_cprForRGID(rgtocheck[rgi],spanner[rgi],spanner12[rgi],nonspanner[rgi]);
      string desc=getContigName();
      desc+='_';
      desc+=rgtocheck[rgi].getGroupName();
      //dbgContainerToWiggle(spanner[rgi],getContigName(),desc+"_sp");
      //dbgContainerToWiggle(spanner12[rgi],getContigName(),desc+"_sp12");
      //dbgContainerToWiggle(nonspanner[rgi],getContigName(),desc+"_nsp");

      coverageinfo_t tci;
      tmpforstats=spanner[rgi];
      calcStatsOnContainer(tci,tmpforstats);
      cout << "spanner - normal:\n" << tci << endl;
      calcSecondOrderStatsOnContainer(tci,tmpforstats);
      cout << "spanner - 2n:\n" << tci << endl;

      // spanner are searched for valleys in coverage
      // valley is <= mean cov/3, size >=50
      if(tci.mean>=9){
	bool invalley=false;
	auto vsI=spanner[rgi].cbegin();
	uint64 pos=0;
	uint64 threshold=tci.mean/3;

	uint64 firsttpos=getContigLength();
	uint64 lasttpos=0;
	{
	  auto dist=distToFirstThreshold(spanner[rgi].begin(),spanner[rgi].end(),tci.mean/2);
	  if(dist>=0) firsttpos=dist;
	  dist=distToFirstThreshold(spanner[rgi].rbegin(),spanner[rgi].rend(),tci.mean/2);
	  if(dist>=0) lasttpos=getContigLength()-dist;
	}

	cout << "threshold=" << threshold << endl;
	cout << "ftp: " << firsttpos << endl;
	cout << "ltp: " << lasttpos << endl;
	for(auto sI=spanner[rgi].cbegin(); sI!=spanner[rgi].cend(); ++sI, ++pos){
	  bool valleystop=false;
	  if(pos<firsttpos) continue;
	  if(pos>=lasttpos){
	    if(invalley) {
	      valleystop=true;
	    }else{
	      break;
	    }
	  }
	  if(invalley){
	    if(*sI>threshold){
	      valleystop=true;
	      invalley=false;
	    }
	  }else{
	    if(*sI<threshold){
	      invalley=true;
	      vsI=sI;
	    }
	  }
	  if(valleystop){
	    auto veI=sI;
	    cout << "New potential valley: " << vsI-spanner[rgi].cbegin() << "\t" << veI-spanner[rgi].cbegin() << "\t" << veI-vsI << endl;
	    auto minval=*vsI;
	    for(auto tI=vsI; tI<veI; ++tI){
	      if(*tI<minval) minval=*tI;
	    }
	    for(auto tI=vsI; tI<veI; ++tI){
	      if(*tI==minval) {
		valleydepths[rgi].push_back(tI-spanner[rgi].cbegin());
		cout << "Valley min " << minval << " at " << tI-spanner[rgi].cbegin() << endl;
	      }
	    }
	  }
	}
      }
    }

    if(runpeakfinder){
      // but search peaks only in areas not covered by found valleys
      // create a peakmasker for this
      vector<uint8> peakmasker(getContigLength(),0);
      for(auto rgi=0; rgi<spanner.size(); ++rgi){
	for(auto vde : valleydepths[rgi]){
	  auto from=vde-rgtocheck[rgi].getInsizeTo();
	  if(from<0) from=0;
	  auto to=vde+rgtocheck[rgi].getInsizeTo();
	  if(to>getContigLength()) to=getContigLength();
	  for(; from<to; ++from) peakmasker[from]=1;
	}
      }

      for(auto rgi=0; rgi<spanner.size(); ++rgi){
	// do this only for libraries with small template size
	if(rgtocheck[rgi].getInsizeTo()>=0 && rgtocheck[rgi].getInsizeTo()<=1000){
	  // We won't search here, just need some numbers for the
	  // correct threshold in non-spanner down below
	  tmpforstats=spanner12[rgi];

	  coverageinfo_t tci;
	  calcStatsOnContainer(tci,tmpforstats);
	  cout << "spanner12 - normal:\n" << tci << endl;
	  calcSecondOrderStatsOnContainer(tci,tmpforstats);
	  cout << "spanner12 - 2n:\n" << tci << endl;

	  uint64 nsthreshold=tci.mean/2;

	  tmpforstats=nonspanner[rgi];
	  calcStatsOnContainer(tci,tmpforstats);
	  cout << "nonspanner - normal:\n" << tci << endl;
	  calcSecondOrderStatsOnContainer(tci,tmpforstats);
	  cout << "nonspanner - 2n:\n" << tci << endl;

	  bool inpeak=false;
	  auto vsI=nonspanner[rgi].cbegin();
	  uint64 pos=0;
	  uint64 threshold=nsthreshold;
	  if(tci.mean*6>threshold) threshold=tci.mean*6;

	  uint64 firsttpos=1;
	  uint64 lasttpos=getContigLength()-1;

	  cout << "threshold=" << threshold << endl;
	  cout << "ftp: " << firsttpos << endl;
	  cout << "ltp: " << lasttpos << endl;
	  for(auto sI=nonspanner[rgi].cbegin(); sI!=nonspanner[rgi].cend(); ++sI, ++pos){
	    bool peakstop=false;
	    if(pos<firsttpos) continue;
	    if(pos>=lasttpos){
	      if(inpeak) {
		peakstop=true;
	      }else{
		break;
	      }
	    }
	    if(inpeak){
	      if(*sI<threshold){
		peakstop=true;
		inpeak=false;
	      }
	    }else{
	      if(*sI>threshold){
		inpeak=true;
		vsI=sI;
	      }
	    }
	    if(peakstop){
	      auto veI=sI;
	      cout << "New potential peak: " << vsI-nonspanner[rgi].cbegin() << "\t" << veI-nonspanner[rgi].cbegin() << "\t" << veI-vsI << endl;
	      auto maxval=*vsI;
	      for(auto tI=vsI; tI<veI; ++tI){
		if(*tI>maxval) maxval=*tI;
	      }
	      for(auto tI=vsI; tI<veI; ++tI){
		if(*tI==maxval) {
		  if(peakmasker[tI-nonspanner[rgi].cbegin()]){
		    cout << "Peak masked " << maxval << " at " << tI-nonspanner[rgi].cbegin() << endl;
		  }else{
		    peakheights[rgi].push_back(tI-nonspanner[rgi].cbegin());
		    cout << "Peak max " << maxval << " at " << tI-nonspanner[rgi].cbegin() << endl;
		  }
		}
	      }
	    }
	  }
	}
      }
    }

    // now create the break ranges +/-50 from the valleys
    for(auto rge : valleydepths){
      for(auto pos : rge){
	auto from=pos-50;
	if(from<0) from=0;
	auto to=pos+50;
	if(to>getContigLength()) to=getContigLength();
	for(; from<to; ++from) breakmarker[from]=1;
      }
    }

    // add in the peak markers
    for(auto rge : peakheights){
      for(auto pos : rge){
	breakmarker[pos]=1;
      }
    }

    // last step: find longest range without problems
    auto tpair=ret;
    bool inrange=!breakmarker[0];
    if(inrange) tpair.first=0;
    for(int32 bmi=0; bmi<breakmarker.size(); ++bmi){
      //cout << bmi << "\t" << inrange << " " << static_cast<int16>(breakmarker[bmi]);
      if(inrange){
	if(breakmarker[bmi]) {
	  //cout << " stop range";
	  tpair.second=bmi;
	  if(tpair.second-tpair.first > ret.second-ret.first){
	    ret=tpair;
	    //cout << " best";
	  }
	  inrange=false;
	}
      }else{
	if(!breakmarker[bmi]) {
	  //cout << " starting range";
	  inrange=true;
	  tpair.first=bmi;
	}
      }
      //cout << "\n";
    }
    if(inrange){
      tpair.second=getContigLength();
      if(tpair.second-tpair.first > ret.second-ret.first){
	ret=tpair;
      }
    }
  }

  CEBUG("returning ret: " << ret.first << "\t" << ret.second << endl);
  return ret;
}
#define CEBUG(bla)

#define CEBUG(bla)   {cout << bla; cout.flush();}
void Contig::priv_cprForRGID(ReadGroupLib::ReadGroupID rgid, vector<uint64> & spanner, vector<uint64> & spanner12, vector<uint64> & nonspanner)
{
  FUNCSTART("void Contig::checkPairConsistency()");

  spanner.clear();
  spanner.resize(getContigLength(),0);
  spanner12=spanner;
  nonspanner=spanner;

  CEBUG("cprForRGID on readgroup:\n" << rgid);
  auto pcrI=CON_reads.begin();
  auto opcrI=pcrI;
  auto crE=CON_reads.end();
  for(; pcrI != crE; ++pcrI){
    if(pcrI->getReadGroupID()!=rgid
       || !pcrI->hasTemplateInfo()
       || pcrI->isBackbone()
       || pcrI->isRail()
       || pcrI->isCoverageEquivalentRead()) continue;

    opcrI=CON_reads.getIteratorOfReadpoolID(pcrI->getTemplatePartnerID());
    bool tpartnerincontig=opcrI!=crE;
    //CEBUG("Looking at " << pcrI.getORPID() << "\t" << pcrI->getName() << endl);
    //CEBUG(pcrI->isBackbone() << '\t' << pcrI->isRail() << '\t' << pcrI->isCoverageEquivalentRead() << '\t' << pcrI->hasTemplateInfo()  << '\t' << pcrI->getTemplatePartnerID() << '\t' << tpartnerincontig << endl);

    if(tpartnerincontig){
      auto start1=pcrI.getReadStartOffset();
      auto end1=start1+pcrI->getLenClippedSeq();
      auto start2=opcrI.getReadStartOffset();
      auto end2=start2+opcrI->getLenClippedSeq();

      auto start=min(start1,start2);
      auto end=max(end1,end2);

      for(auto * ptr=&spanner[start]; ptr!=&spanner[end]; ++ptr) *ptr+=1;
      for(auto * ptr=&spanner12[start1]; ptr!=&spanner12[end1]; ++ptr) *ptr+=1;
      for(auto * ptr=&spanner12[start2]; ptr!=&spanner12[end2]; ++ptr) *ptr+=1;
    }else{
      auto start=pcrI.getReadStartOffset();
      auto end=start+pcrI->getLenClippedSeq();
      for(auto * ptr=&nonspanner[start]; ptr!=&nonspanner[end]; ++ptr) *ptr+=1;
    }

  }
}
#define CEBUG(bla)
