/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "SArrayIndex.h"
#include <time.h>
#include <assert.h>

namespace GB2 {

//////////////////////////////////////////////////////////////////////////
//SArrayIndex
//////////////////////////////////////////////////////////////////////////
SArrayIndex::SArrayIndex(const char* seq, quint32 seqSize,  quint32 _len, bool& _activeFlag, char unknownChar, const QBitArray& bitTable, quint32 _gap, quint32 _gapOffset)
: w(_len), w4(_len/4), wRest(_len%4), skipGap(_gap),  gapOffset(_gapOffset), activeFlag(_activeFlag), L1_SIZE(0), L1_SIZE_1(0), l1Step(0)
{
	long t1 = clock();
    Q_UNUSED(t1);
    size = seqSize - w + 1;
	quint32 _skipGap = skipGap;//local
	if (_skipGap > 0) {
		size = (size / _skipGap) + 1;
	}
	sArray = new quint32[size];
	quint32 bitCharLen = getBitLen(bitTable.count(true));
	//printf("@@@@@@@@@@@@@@@@bitlen=%d\n", bitCharLen);
	if (bitCharLen>0 && bitCharLen<=5) {
		wCharsInMask = qMin(30 / bitCharLen, w);//30 to avoid +- overflow
		wAfterBits =  qMax<quint32>(0, w - wCharsInMask);
		if (wCharsInMask * bitCharLen == 32) {
			bitFilter = 0xFFFFFFFF;
		} else {
			bitFilter = (1<<(bitCharLen * wCharsInMask))-1;
		}
	} else {
		bitMask = NULL;
		bitFilter = wAfterBits  = wCharsInMask  = 0;	
	}
	
	quint32* arunner= sArray;
	seqStart = seq;
	const char* seqEnd= seqStart+seqSize - w + 1;
	if (unknownChar == 0) {
		quint32 step = 1+_skipGap;
		for (const char* crunner = seqStart+gapOffset; crunner < seqEnd; arunner++, crunner+=step) {
//TODO: 64bit issue			*arunner=(quint32)crunner;
		}
	} else { //filter
                quint32 oldSize = size;
                Q_UNUSED(oldSize);
		const char* crunner = seqStart;
		quint32 lastErrDist;
		for (lastErrDist = 0; crunner < seqEnd && lastErrDist < w-1; crunner++) {
			if (*crunner!=unknownChar) {
				lastErrDist++;
				continue;
			}
			lastErrDist=0;
		}
		const char* cpos = crunner - w;
		qint32 gapLeft = _gapOffset;
		quint32 w1 = w-1;
		if (size != 0) {
			while ( ++cpos < seqEnd ) {
				if (*(cpos+w1)!=unknownChar) {
					lastErrDist++;
					if (lastErrDist >= w && gapLeft-- == 0) {
//TODO: 64bit issue						*arunner=(quint32)cpos;
						arunner++;
						gapLeft = _skipGap;
					} 
					continue;
				}
				lastErrDist=0;
				gapLeft = _gapOffset;
			}
		}
		size=arunner-sArray;		
		//DBG(printf("filtered len %d, percent %f\n",(oldSize-size), (size/(float)(oldSize!=0?oldSize:1))));
//		fflush(stdout);
	}
	size=arunner-sArray;

	size_1 = size-1;

	if (bitCharLen!=0) {
		bitMask = new quint32[size];
		quint32 bitValue = 0;
		quint32* arunner= sArray;
		quint32* mrunner = bitMask;
		quint32 expectedNext = 0;
		quint32 wCharsInMask1 = wCharsInMask - 1;
		for (quint32* end = mrunner+size; mrunner < end; arunner++, mrunner++) {
			char* c = (char*)*arunner;
			if (*arunner == expectedNext) {
				char pos = *(c+wCharsInMask1);
				assert(pos >=0 && pos<bitTable.size());
				bitValue= ( (bitValue << bitCharLen) | bitTable[pos]) &  bitFilter;
			} else {
				bitValue = 0;
				for (quint32 i =0; i < wCharsInMask; i++) {
					char pos = *(c+i);
					assert(pos >=0 && pos<bitTable.size());
					bitValue = (bitValue << bitCharLen) | bitTable[pos];
				}
			} 
			*mrunner = bitValue;
//TODO: 64 bit issue			expectedNext = (quint32)(c+1);
		}
	}

	if (!activeFlag) {
		return;
	}
	if (bitMask!=NULL) 	{
		sortBit(bitMask, 0, size);
		//sortBitCl(bitMask, 0, size);
		if (size < 100*1000) {
			L1_SIZE = size;
			L1_SIZE_1 = size-1;
			l1Step = 1;
			l1bitMask = bitMask;
		} else {
			L1_SIZE=8192;
			L1_SIZE_1=(L1_SIZE-1);
			l1bitMask = new quint32[L1_SIZE];
			l1Step = size / L1_SIZE;
			for (int i=0; i < L1_SIZE; i++) {
				l1bitMask[i] = bitMask[i*l1Step];
			}
			l1bitMask[L1_SIZE-1] = bitMask[size-1];
		}
	} else {
		sort(sArray, 0, size);
	}
        long t2 = clock();
        Q_UNUSED(t2);
	if (bitMask!=NULL) {//debug check
		//debugCheck(unknownChar);
	}
	//DBG(printf("full index time: %d\n", (t2-t1)));
	//fflush(stdout);
}

SArrayIndex::~SArrayIndex() {
	delete[] sArray;
	if (bitMask!=NULL) {
		if (bitMask!=l1bitMask) {
			delete l1bitMask;
		}
		delete bitMask;
	}
}

void SArrayIndex::sort(quint32* x, qint32 off, qint32 len) {
	// Insertion sort on smallest arrays
	if (!activeFlag) {
		return;
	}
	if (len < 7) {
		for (qint32 i=off; i<len+off; i++){
			for (qint32 j=i; j > off && compare(x[j-1],x[j])>0; j--) {
			    swap(x, j, j-1);
			}
		}
	    return;
	}

	// Choose a partition element, v
	quint32 m = off + len / 2;       // Small arrays, middle element
	if (len > 7) {
		quint32 l = off;
		quint32 n = off + len - 1;
		if (len > 40) {        // Big arrays, pseudomedian of 9
			quint32 s = len / 8;
			l = med3(x, l,     l+s, l+2*s);
			m = med3(x, m-s,   m,   m+s);
			n = med3(x, n-2*s, n-s, n);
		}
		m = med3(x, l, m, n); // Mid-size, med of 3
	}
	quint32 v = x[m];

	// Establish Invariant: v* (<v)* (>v)* v*
	qint32 a = off, b = off, c = off + len - 1, d = c;
	while(true) {
		qint32 cr;
		while (b <= c && (cr = compare(v, x[b])) >= 0) {
			if (cr == 0) {
				swap(x, a++, b);
			}
			b++;
		}
		while (c >= b && (cr = compare(x[c], v)) >=0 ) {
			if (cr == 0) {
				swap(x, c, d--);
			}
			c--;
		}
		if (b > c) {
			break;
		}
		swap(x, b++, c--);
	}

	// Swap partition elements back to middle
	qint32 s, n = off + len;
	s = qMin(a-off, b-a  );  vecswap(x, off, b-s, s);
	s = qMin(d-c,   n-d-1);  vecswap(x, b,   n-s, s);

	// Recursively sort non-partition-elements
	if ((s = b-a) > 1) {
	    sort(x, off, s);
	}
	if ((s = d-c) > 1) {
	    sort(x, n-s, s);
	}
}



void SArrayIndex::sortBit(quint32* x, qint32 off, qint32 len) {
	// Insertion sort on smallest arrays
	if (!activeFlag) {
		return;
	}
	if (len < 7) {
		for (qint32 i=off; i<len+off; i++){
			for (qint32 j=i; j > off && compareBit(x+j-1,x+j)>0; j--) {
				swapBit(x+j, x+j-1);
			}
		}
	    return;
	}

	// Choose a partition element, v
	quint32 m = off + len / 2;       // Small arrays, middle element
	if (len > 7) {
	    quint32 l = off;
	    quint32 n = off + len - 1;
	    if (len > 40) {        // Big arrays, pseudomedian of 9
			quint32 s = len / 8;
			l = med3Bit(x, l,     l+s, l+2*s);
			m = med3Bit(x, m-s,   m,   m+s);
			n = med3Bit(x, n-2*s, n-s, n);
		}
	    m = med3Bit(x, l, m, n); // Mid-size, med of 3
	}
	quint32* v = x + m;

	// Establish Invariant: v* (<v)* (>v)* v*
	qint32 a = off, b = a, c = off + len - 1, d = c;
	while(TRUE) {
	    qint32 cr;
		while (b <= c && (cr = compareBit(v, x+b)) >=0 ) {
			if (cr == 0) {
				(x+b==v) && (v=x+a);//save middle pos value
				swapBit(x+a++,x+b);
			}
			b++;
	    }
	    while (c >= b && (cr = compareBit(x+c, v)) >=0 ) {
			if (cr == 0) {
				(x+c==v) && (v=x+d);//save middle pos value
				swapBit(x+c, x+d--);
			}
			c--;
	    }
		if (b > c) {
			break;
		}
		swapBit(x+b++, x+c--);
	}

	// Swap partition elements back to middle
	qint32 s, n = off + len;
	(s = qMin(a-off, b-a  )) &&  vecswapBit(x+off, x+b-s, s);
	(s = qMin(d-c,   n-d-1)) &&  vecswapBit(x+b,   x+n-s, s);

	// Recursively sort non-partition-elements
	if ((s = b-a) > 1) {
	    sortBit(x, off, s);
	}
	if ((s = d-c) > 1) {
	    sortBit(x, n-s, s);
	}
}

qint32  SArrayIndex::partition(quint32* x, qint32 p, qint32 r) {
	quint32* xp = x+p;
	qint32 i = p-1;
	qint32 j = r+1;
	while (true) {
		do {
			j--;
		} while (compareBit(x+j, xp) > 0);
		do {
			i++;
		} while (compareBit(x+i, xp) < 0);
		if (i < j) {
			swapBit(x+i, x+j);
		} else {
			return j;
		}
	}
}


void SArrayIndex::sortBitClassic(quint32* x, qint32 p, qint32 r) {
	if (p < r) {
		qint32 q = partition(x, p, r);
		sortBitClassic(x, p, q);
		sortBitClassic(x, q+1, r);
	}
}


qint32 SArrayIndex::compare(quint32 x1, quint32 x2) const {
	quint32* a1 = (quint32*)x1;
	quint32* a2 = (quint32*)x2;
	qint32 rc;
	for (quint32* aend1 = a1+w4; a1 < aend1; a1++, a2++) {
		if ( (rc=*a1-*a2) ) {
			return rc;
		}
	}
	if (wRest > 0) {
		char* b1 = (char*)a1;
		char* b2 = (char*)a2;
		if ( (rc=*b1-*b2) ) {
			return rc;
		}
		if (wRest > 1) {
			if ( (rc=*++b1-*++b2) ) {
				return rc;
			}
			return wRest > 2 ? *++b1-*++b2:0;
		}
	}
	return 0;
}

qint32 SArrayIndex::compare(const char* seq1, const char* seq2) const {
	const quint32* a1 = (const quint32*)seq1;
	const quint32* a2 = (const quint32*)seq2;
	qint32 rc;
	for (const quint32* aend1 = a1+w4; a1 < aend1; a1++, a2++) {
		if ( (rc=*a1-*a2) ) {
			return rc;
		}
	}
	if (wRest > 0) {
		char* b1 = (char*)a1;
		char* b2 = (char*)a2;
		if ( (rc=*b1-*b2) ) {
			return rc;
		}
		if (wRest > 1) {
			if ( (rc=*++b1-*++b2) ) {
				return rc;
			}
			return wRest > 2 ? *++b1-*++b2: 0;
		}
	}
	return 0;
}


qint32 SArrayIndex::compareBit(const quint32* x1, const quint32* x2) const {
	qint32 rc;
	if ( (rc=*x1-*x2) ) {
		return rc;
	}
	const char* b1 = ((const char*)*(sArray+(x1-bitMask)))+wCharsInMask;
	const char* b2 = ((const char*)*(sArray+(x2-bitMask)))+wCharsInMask;
	for (const char* end = b1+wAfterBits; b1<end; b1++, b2++) {
		if ( (rc=*b1-*b2) ) {
			return rc;
		}
	}
	return 0;
}

qint32 SArrayIndex::compareAfterBits(quint32 bitMaskPos, const char* seq) const {
	const char* b1 = ((const char*)*(sArray+bitMaskPos))+wCharsInMask;
	const char* b2 = seq;
	qint32 rc;
	for (const char* end = b1+wAfterBits; b1<end; b1++, b2++) {
		if ( (rc=*b1-*b2) ) {
			return rc;
		}
	}
	return 0;
}


qint32 SArrayIndex::compareBitByPos(const quint32* x1, const quint32* x2) const {
	qint32 rc;
	if ( (rc=bitMask[x1-sArray]-bitMask[x2-sArray]) ) {
		return rc;
	}
	const char* b1 = ((const char*)*x1)+wCharsInMask;
	const char* b2 = ((const char*)*x2)+wCharsInMask;
	for (const char* end = b1+wAfterBits; b1<end; b1++, b2++) {
		if ( (rc=*b1-*b2) ) {
			return rc;
		}
	}
	return 0;
}


/** Swaps x[a] with x[b]*/
void SArrayIndex::swap(quint32* x, quint32 a, quint32 b) const {
	quint32 tmp = x[a];
	x[a]=x[b];
	x[b]=tmp;
}

void SArrayIndex::swapBit(quint32* x1, quint32* x2) const {
	quint32* a1 = sArray+(x1-bitMask);
	quint32* a2 = sArray+(x2-bitMask);
	
	quint32 tmp = *x1;
	*x1 = *x2;
	*x2 = tmp;
	
	tmp = *a1;
	*a1 = *a2;
	*a2 = tmp;
}


	
/** Swaps x[a .. (a+n-1)] with x[b .. (b+n-1)]. */
void SArrayIndex::vecswap(quint32* x, quint32 a, quint32 b, quint32 n) {
	for (quint32 i=0; i<n; i++, a++, b++) {
		quint32 tmp = x[a];
		x[a]=x[b];
		x[b]=tmp;
	}
}

int SArrayIndex::vecswapBit(quint32* x1, quint32* x2, quint32 n) {
	quint32* a1 = sArray+(x1-bitMask);
	quint32* a2 = sArray+(x2-bitMask);
	for (quint32 i=0; i<n; i++, x1++, x2++, a1++, a2++) {
		quint32 tmp = *x1;
		*x1 = *x2;
		*x2 = tmp;

		tmp = *a1;
		*a1 = *a2;
		*a2 = tmp;
	}
	return 0;
}

		
		/** Returns the index of the median of the three indexed x[] values.*/
quint32 SArrayIndex::med3(quint32* x, quint32 a, quint32 b, quint32 c) {
	qint32 bc = compare(x[b], x[c]);
	qint32 ac = compare(x[a], x[c]);
	return compare(x[a], x[b]) < 0 ?
		(bc < 0 ? b : ac < 0 ? c : a) :
		(bc > 0 ? b : ac > 0 ? c : a);
}

quint32 SArrayIndex::med3Bit(quint32* x, quint32 a, quint32 b, quint32 c) {
	qint32 bc = compareBit(x+b, x+c);
	qint32 ac = compareBit(x+a, x+c);
	return compareBit(x+a, x+b) < 0 ?
		(bc < 0 ? b : ac < 0 ? c : a) :
		(bc > 0 ? b : ac > 0 ? c : a);
}

bool SArrayIndex::find(SArrayIndex::SAISearchContext* t, const char* seq)  {
	qint32 low = 0;
	qint32 high = size_1;
	quint32* a = sArray;
	while (low <= high) {
	    quint32 mid = (low + high) / 2;
	    const char* midSeq = (const char*)a[mid];

		qint32 rc = compare(midSeq, seq);
		if (rc < 0) {
			low = mid + 1;
		} else if (rc > 0) {
			high = mid - 1;
		} else {//found
			t->currSample = seq;
			qint32 i = mid;//signed
			while(--i>=0 && compare((char*)a[i], seq) == 0){};
			t->currPos = i + 1;
			return TRUE;
		}
	}
	return FALSE;
}

bool SArrayIndex::findBit(SArrayIndex::SAISearchContext* t, quint32 bitValue, const char* seq) {
	qint32 low = 0;
	qint32 high = L1_SIZE_1;
	quint32* a = l1bitMask;
	while (low <= high) {
		quint32 mid = (low + high) >> 1;
		qint32 rc = a[mid]-bitValue;
		if (rc < 0) {
			low = mid + 1;
		} else if (rc > 0) {
			high = mid - 1;
		} else {
            for(low=mid+1;low < (qint32)size && a[low]==bitValue; low++){};
            for(high=mid-1;high >  0 && a[high]==bitValue; high--){};
			break;
		}
	}
	quint32 newLow = high>0? high*l1Step : 0;
	high = low < L1_SIZE_1? low*l1Step : size-1;
	low = newLow;
	a = bitMask;
	while (low <= high) {
	    quint32 mid = (low + high) >> 1;
		qint32 rc = a[mid]-bitValue;
		if (rc < 0) {
			low = mid + 1;
		} else if (rc > 0) {
			high = mid - 1;
		} else {//found bitMask
			if (wAfterBits == 0) {
				quint32* maskPos = bitMask+mid;
				for (; maskPos>=0 && compareBit(maskPos, maskPos-1) == 0; maskPos--){};
				t->currPos = maskPos - bitMask;
			} else {
				quint32 midVal = a[mid];
				const char* afterBitsSeq = seq+wCharsInMask;
				bool found = FALSE;
				rc = compareAfterBits(mid, afterBitsSeq);
				if (rc == 0) {
					found = TRUE;// going to the start of searching item
					while (mid > 0 && a[mid-1] == midVal && compareAfterBits(mid-1, afterBitsSeq) == 0) {
						mid--;
					}
                } else if (int(rc) > 0) { // searching item should have lower index
                    for (mid = mid-1; int(mid)>= 0 && a[mid] == midVal; mid--) {
						if (compareAfterBits(mid, afterBitsSeq) == 0) {
							found = TRUE;
							while (mid > 0 && a[mid-1] == midVal && compareAfterBits(mid-1, afterBitsSeq) == 0) {
								mid--;
							}
							break;
						}
					}
				} else { //if (rc < 0) { // searching item should have greater index
					for (mid = mid+1;mid < size && a[mid] == midVal; mid++) {
						if (compareAfterBits(mid, afterBitsSeq) == 0) {
							found = TRUE;
							break;
						}
					}
				} 
				if (!found) {
					return FALSE;
				}
				t->currPos = mid;
			}
			t->bitValue = bitValue;
			t->currSample = seq;
			return TRUE;
		}
	}
	return FALSE;
}
/*
bool SArrayIndex::findBit(SArrayIndex::SAISearchContext* t, quint32 bitValue, char* seq)  {
	qint32 low = 0;
	qint32 high = size-1;
	quint32* a = bitMask;
	while (low <= high) {
		quint32 mid = (low + high) / 2;
		qint32 rc = a[mid]-bitValue;
		if (rc < 0) {
			low = mid + 1;
		} else if (rc > 0) {
			high = mid - 1;
		} else {//found bitMask
			if (wAfterBits == 0) {
				quint32* maskPos = bitMask+mid;
				for (; maskPos>=0 && compareBit(maskPos, maskPos-1) == 0; maskPos--){};
				t->currPos = maskPos - bitMask;
			} else {
				quint32 midVal = a[mid];
				char* afterBitsSeq = seq+wCharsInMask;
				bool found = FALSE;
				rc = compareAfterBits(mid, afterBitsSeq);
				if (rc == 0) {
					found = TRUE;// going to the start of searching item
					while (mid > 0 && a[mid-1] == midVal && compareAfterBits(mid-1, afterBitsSeq) == 0) {
						mid--;
					}
				} else if (rc > 0) { // searching item should have lower index
					for (mid = mid-1; mid>= 0 && a[mid] == midVal; mid--) {
						if (compareAfterBits(mid, afterBitsSeq) == 0) {
							found = TRUE;
							while (mid > 0 && a[mid-1] == midVal && compareAfterBits(mid-1, afterBitsSeq) == 0) {
								mid--;
							}
							break;
						}
					}
				} else { //if (rc < 0) { // searching item should have greater index
					for (mid = mid+1;mid < size && a[mid] == midVal; mid++) {
						if (compareAfterBits(mid, afterBitsSeq) == 0) {
							found = TRUE;
							break;
						}
					}
				} 
				if (!found) {
					return FALSE;
				}
				t->currPos = mid;
			}
			t->bitValue = bitValue;
			t->currSample = seq;
			return TRUE;
		}
	}
	return FALSE;
}*/
/*
quint32 SArrayIndex::nextArrSeqPos(SArrayIndex::SAISearchContext* t) {
if (t->currPos==0xFFFFFFFF) {
return 0xFFFFFFFF;
}
if (t->reverseDir) {
quint32 result = sArray[t->currPos] - (quint32)seqStart;
if (bitMask) {
if (t->currPos==0 || compareBit(bitMask+t->currPos-1, bitMask+t->currPos)) {
t->currPos=t->startPos+1;
t->reverseDir = FALSE;
goto forward1;
}
} else {
if (t->currPos==0 || compare((char*)sArray[t->currPos-1], t->currSample)) {
t->currPos=t->startPos+1;
t->reverseDir = FALSE;
goto forward2;
}
} 
t->currPos--;
return result;
} else {
t->currPos++;
if (bitMask) {
forward1:
if (t->currPos==size || compareBit(bitMask+t->currPos-1, bitMask+t->currPos)) {
t->currPos =0xFFFFFFFF;
return t->currPos;
}
} else {
forward2:
if (t->currPos==size || compare((char*)sArray[t->currPos], t->currSample)) {
t->currPos=0xFFFFFFFF;
return t->currPos;
}
} 
quint32 result = sArray[t->currPos] - (quint32)seqStart;
return result;
}
}
*/

quint32 SArrayIndex::nextArrSeqPos(SArrayIndex::SAISearchContext* t) {
	if (t->currPos==0xFFFFFFFF) {
		return 0xFFFFFFFF;
	}
//TODO: 64 bit issue	quint32 result = sArray[t->currPos] - (quint32)seqStart;
        quint32 result = 0;
	t->currPos++;
	if (t->currPos==size || (bitMask && compareBit(bitMask+t->currPos-1, bitMask+t->currPos))
		|| compare((char*)sArray[t->currPos], t->currSample)) 
	{
		t->currPos=0xFFFFFFFF;
	} 
	return result;
}

void SArrayIndex::debugCheck(char unknownChar){
  quint32 i;
  for ( i=1; i < size; i++) {
    if (bitMask[i-1] > bitMask[i]) {
      //DBG(printf("error!!\n"));
    }
  }

  quint32* prev = bitMask;
  for (i=1; i < size; i++) {
    quint32* next = bitMask+i;
    if (compareBit(prev, next) > 0) {
      //DBG(printf("error1\n"));
    }
    prev = next;
  }
  quint32 prevMask = 0;
  for (i=1; i < size; i++) {
    quint32 newMask = bitMask[i];
    if (prevMask == newMask) {
      char* prevC =(char*)sArray[i-1]; 
      char* newC = (char*)sArray[i];
      if (memcmp(prevC, newC, wCharsInMask)) {
	//DBG(printf("error2\n"));
      }
    }
    prevMask = newMask;
  }
  if (unknownChar!=0) {
    for (i=0; i < size; i++) {
      char* prefix = (char*)sArray[i];
      for (quint32 j=0; j < w; j++) {
	if (prefix[j] == unknownChar) {
	  //DBG(printf("Error!!!"));
	}
      }
    }
  }
  for (i=1; i<(quint32)L1_SIZE; i++) {
    if (l1bitMask[i-1] > l1bitMask[i]) {
      //DBG(printf("err00r!\n"));
    }
  }
}

}//namespace
