/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "EMBLPlainTextFormat.h"
#include "GenbankLocationParser.h"
#include "DocumentFormatUtils.h"

#include <gobjects/AnnotationTableObject.h>
#include <gobjects/DNASequenceObject.h>
#include <gobjects/GObjectUtils.h>

#include <core_api/IOAdapter.h>
#include <core_api/Task.h>
#include <core_api/DNAAlphabet.h>

#include <util_text/TextUtils.h>

namespace GB2 {

/* TRANSLATOR GB2::EMBLPlainTextFormat */    
/* TRANSLATOR GB2::EMBLGenbankAbstractDocument */ 

//TODO: local8bit or ascii??

EMBLPlainTextFormat::EMBLPlainTextFormat(QObject* p) 
: EMBLGenbankAbstractDocument(BaseDocumentFormats::PLAIN_EMBL, tr("EMBL"), 80, p) 
{
	extensions << "em" << "emb" << "embl";
    sequenceStartPrefix = "SQ";
    fPrefix = "FT";
}

bool EMBLPlainTextFormat::isDataFormatSupported(const char* data, int size) const {
	//todo: improve handling
	bool textOnly = !TextUtils::contains(TextUtils::BINARY, data, size);
	if (!textOnly || size < 100) {
		return false;
	}
    //todo: improve format checking
	return TextUtils::equals("ID   ", data, 5);
}

bool EMBLPlainTextFormat::checkConstraints(const DocumentFormatConstraints& c) const {
    if (c.mustSupportWrite) {
        return false;
    }
    bool ok = EMBLGenbankAbstractDocument::checkConstraints(c);
    return ok;
}

//////////////////////////////////////////////////////////////////////////
// loading

EMBLGenbankAbstractIDLine* EMBLPlainTextFormat::readIdLine(const QString& line, TaskStateInfo& si) {
    QString idLineStr= line.mid(2).trimmed();
	int len = idLineStr.length();
	if ( len < 6) {
		si.error = EMBLPlainTextFormat::tr("error_parsing_id_line");
		return NULL;
	}
	QStringList tokens = idLineStr.split(";", QString::SkipEmptyParts);
    if (tokens.isEmpty()) {
        si.error = EMBLPlainTextFormat::tr("error_parsing_id_line");
        return NULL;
    }
    EMBLGenbankAbstractIDLine* idLine = new EMBLGenbankAbstractIDLine();
    idLine->name = tokens[0];
    if (tokens.size() > 1) {
        QString last = tokens.last();
        if (last.endsWith("BP")) {
            idLine->seqLen = last.right(last.size()-2).toInt();
        }
    }
	return idLine;
}

EMBLGenbankAbstractHeader* EMBLPlainTextFormat::readHeader(IOAdapter* io, TaskStateInfo& si) {
	static int READ_BUFF_SIZE = 4096;
	
	EMBLGenbankAbstractHeader* hdr = new EMBLGenbankAbstractHeader();
	qint64 len;
    QString _name;
	QByteArray readBuffer(READ_BUFF_SIZE, '\0');
	char* cbuff = readBuffer.data();
	QString lastTagName;
	bool lineOk = true;
	while ( (len = io->readUntil(cbuff, READ_BUFF_SIZE, TextUtils::LINE_BREAKS, IOAdapter::Term_Include, &lineOk)) > 0 ) {
        if (si.cancelFlag) {
            break;
        }
		if (!lineOk) {
			si.error = EMBLGenbankAbstractDocument::tr("line_is_too_long_or_unexpected_oef");
			break;
		} else if (len < 2) {
            si.error = EMBLGenbankAbstractDocument::tr("line_is_too_short");
            break;
        }
		
        if (hdr->idLine == NULL) {
            cbuff[len-1]='\0';
            QString line = QString(cbuff).trimmed();
			if (!line.startsWith("ID")) {
				si.error = EMBLPlainTextFormat::tr("idline_not_first_line");
			} else {
				hdr->idLine= readIdLine(line, si);
			}
			assert(si.hasErrors() || hdr->idLine!= NULL);
			if (si.hasErrors()) {
				break;			
			}
			continue;
		}
		si.progress = io->getProgress();

        if (TextUtils::equals(cbuff, "FT", 2) 
            || TextUtils::equals(cbuff, "FH", 2)  
            || TextUtils::equals(cbuff, "SQ", 2)  
            || TextUtils::equals(cbuff, "//", 2)) {
			io->skip(-len);
			break; // end of header
		}
        //todo: reading EMBL tags is not supported
	}
	if (hdr->idLine == NULL) {
		delete hdr;
		return NULL;
	}
	return hdr;
}

QList<SharedAnnotationData> EMBLPlainTextFormat::readAnnotations(IOAdapter* io, TaskStateInfo& si, int offset) {
	static int READ_BUFF_SIZE = 8192;

	QList<SharedAnnotationData> list;
	//TODO: +1 here but no +1 in readHeader?
	QByteArray readBuffer(READ_BUFF_SIZE+1, '\0');
	char* cbuff = readBuffer.data();
	
	bool lineOk = true;
	int len = 0;
    while ((len = io->readUntil(cbuff, READ_BUFF_SIZE, TextUtils::LINE_BREAKS, IOAdapter::Term_Include, &lineOk)) > 0) {
        if (si.cancelFlag) {
            break;
        }
        if (!lineOk) {
            EMBLGenbankAbstractDocument::tr("line_is_too_long_or_unexpected_oef");
			break;
		}
        bool header   = cbuff[0]=='F' && cbuff[1]=='H';
        bool skipLine = cbuff[0]=='X' && cbuff[1]=='X';
        if (header || skipLine) {
            continue;
        }

        bool feature = cbuff[0]=='F' && cbuff[1]=='T';
        if (!feature) {// end of feature table
			io->skip(-len);
			break; 
		}
        if (len < 6 || !TextUtils::equals(cbuff+2, "   ", 3)) {//check line format: key starts on offset 6; (max len 15);
			io->skip(-len);
			si.error = EMBLGenbankAbstractDocument::tr("invalid_feature_format");
			break;
		}
		
		//parsing feature;
        SharedAnnotationData f = readAnnotation(io, cbuff, len, READ_BUFF_SIZE, si, offset);
        if (si.hasErrors()) {
			break;
		}
		list.push_back(f);
        si.progress = io->getProgress();
	}
	return list;
}


//////////////////////////////////////////////////////////////////////////
/// saving document
void EMBLPlainTextFormat::storeDocument(Document* d, TaskStateInfo& ts, IOAdapterFactory* io, const QString& newDocURL) {
    Q_UNUSED(d); Q_UNUSED(ts); Q_UNUSED(io); Q_UNUSED(newDocURL);
    assert(0);
    ts.error = tr("writing_not_supported"); 
}

}//namespace
