/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "DocumentFormatUtils.h"

#include <core_api/AppContext.h>
#include <core_api/DNAAlphabet.h>
#include <core_api/IOAdapter.h>
#include <core_api/DocumentModel.h>
#include <core_api/DocumentFormats.h>

#include <util_gui/DialogUtils.h>
#include <util_text/TextUtils.h>

#include <datatype/MAlignment.h>
#include <gobjects/AnnotationTableObject.h>
#include <gobjects/DNASequenceObject.h>
#include <gobjects/GObjectTypes.h>
#include <gobjects/GObjectRelationRoles.h>


namespace GB2 {

DNAAlphabet* DocumentFormatUtils::findAlphabet(const QByteArray& arr) {
    DNAAlphabetRegistry* r = AppContext::getDNAAlphabetRegistry();
    assert(r);
    QList<DNAAlphabet*> als = r->findAlphabets(arr, true);
    assert(!als.empty());
    return als.first();
}

DNAAlphabet* DocumentFormatUtils::findAlphabet(const QByteArray& arr, const QList<LRegion>& regionsToProcess) {
    DNAAlphabetRegistry* r = AppContext::getDNAAlphabetRegistry();
    assert(r);
    QList<DNAAlphabet*> als = r->findAlphabets(arr, regionsToProcess, true);
    assert(!als.empty());
    return als.first();
}


QList<DNAAlphabet*> DocumentFormatUtils::findAlphabets(const QByteArray& arr) {
    DNAAlphabetRegistry* r = AppContext::getDNAAlphabetRegistry();
    assert(r);
    QList<DNAAlphabet*> als = r->findAlphabets(arr, false);
    return als;
}


DNASequenceObject* DocumentFormatUtils::addSequenceObject(QList<GObject*>& objects, const QString& sequenceName, QByteArray& sequence, DNAAlphabet* al) {
    if (al == NULL) {
        al = findAlphabet(sequence);
        assert(al!=NULL);
    }
    
    if (!al->isCaseSensitive()) {
        TextUtils::translate(TextUtils::UPPER_CASE_MAP, sequence.data(), sequence.length());
    }
    
    trySqueeze(sequence);

    DNASequenceObject* so = new DNASequenceObject(sequence, al, sequenceName);
    objects.append(so);
    return so;
}


DNASequenceObject* DocumentFormatUtils::addMergedSequenceObject(QList<GObject*>& objects, const QString& docUrl, const QStringList& contigs, QByteArray& mergedSequence, const QList<LRegion>& mergedMapping) {
    if (contigs.size() == 1) {
        DNAAlphabet* al = findAlphabet(mergedSequence);
        return DocumentFormatUtils::addSequenceObject(objects, contigs.first(), mergedSequence, al);
    }

    assert(contigs.size() >=2);
    assert(contigs.size() == mergedMapping.size());

    DNAAlphabet* al = findAlphabet(mergedSequence, mergedMapping);
    char defSym = al->getDefaultSymbol();
    //fill gaps with defSym
    for (int i=1; i<mergedMapping.size(); i++) {
        const LRegion& prev = mergedMapping[i-1];
        const LRegion& next = mergedMapping[i];
        int gapSize = next.startPos - prev.endPos();
        assert(gapSize >= 0);
        if (gapSize > 0) {
            qMemSet(mergedSequence.data() + prev.endPos(), defSym, (size_t)gapSize);
        }
    }
    DNASequenceObject* so = addSequenceObject(objects, "Sequence", mergedSequence, al);
    AnnotationTableObject* ao = new AnnotationTableObject("Contigs");

    //save relation if docUrl is not empty
    if (!docUrl.isEmpty()) {
        GObjectReference r(docUrl, so->getGObjectName(), GObjectTypes::DNA_SEQUENCE);
        ao->addObjectRelation(GObjectRelation(r, GObjectRelationRole::SEQUENCE));
    }

    //save mapping info as annotations
    for (int i=0; i<contigs.size(); i++) {
        SharedAnnotationData d(new AnnotationData());
        d->aminoStrand = TriState_No;
        d->name = "contig";
        d->location.append(mergedMapping[i]);
        ao->addAnnotation(new Annotation(d), NULL);
    }
    objects.append(ao);
    return so;
}


#define MAX_REALLOC_SIZE (300*1000*1000)
#define MIN_K_TO_REALLOC 1.07
void DocumentFormatUtils::trySqueeze(QByteArray& a) {
    //squeeze can cause 2x memusage -> avoid squeezing of large arrays
    float k =  float(a.capacity()) / a.size();
    if (a.size() <= MAX_REALLOC_SIZE && k > MIN_K_TO_REALLOC) {
        a.squeeze();
    }
}


int DocumentFormatUtils::getIntSettings(const QVariantMap& fs, const char* sName, int defVal) {
    QVariant v = fs.value(sName);
    if (v.type()!= QVariant::Int) {
        return defVal;
    }
    return v.toInt();
}

void DocumentFormatUtils::updateFormatSettings(QList<GObject*>& objects, QVariantMap& fs) { 
    //1. remove all cached sequence sizes
    //2. add new sizes
    QList<GObject*> sequences;
    foreach(GObject* obj, objects) {
        if (obj->getGObjectType() == GObjectTypes::DNA_SEQUENCE) {
            sequences.append(obj);
        }
    }
    if (sequences.size() == 1) {
        DNASequenceObject* so = qobject_cast<DNASequenceObject*>(sequences.first());
        int len = so->getSequence().length();
        fs[MERGE_MULTI_DOC_SEQUENCE_SIZE_SETTINGS] = len;
    }
}

void DocumentFormatUtils::addOffset(QList<LRegion>& location, int offset) {
    if (offset == 0) {
        return;
    }
    for (int i=0, n=location.size();i<n;i++) {
        LRegion& r = location[i];
        r.startPos+=offset;
        assert(r.startPos>=0);
    }
}

QList<DocumentFormat*> DocumentFormatUtils::detectFormat(const QString& url) {
    QList<DocumentFormat*> result;
    if (url.isEmpty()) {
        return result;
    }
    DocumentFormatConstraints c;
    c.rawData = BaseIOAdapters::readFileHeader(url);
    c.checkRawData = true;
    DocumentFormatRegistry* fr = AppContext::getDocumentFormatRegistry();
    QList<DocumentFormatId> formatIds = fr->selectFormats(c);
    QString ext = DialogUtils::getDocumentExtension(url);
    foreach(DocumentFormatId id, formatIds) {
        DocumentFormat* f = fr->getFormatById(id);
        assert(f);
        if (f->checkConstraints(c)) {
            if (f->getSupportedDocumentFileExtensions().contains(ext)) {
                result.prepend(f);
            } else {
                result.append(f);
            }
        }
    }
    if (result.size() > 1) { // using BaseDocumentFormats::PLAIN_TEXT is the worst case -> remove it if there are alternatives
        DocumentFormat* raw = fr->getFormatById(BaseDocumentFormats::RAW_DNA_SEQUENCE);
        result.removeOne(raw);
        if (result.size() > 1) {
            DocumentFormat* txt = fr->getFormatById(BaseDocumentFormats::PLAIN_TEXT);
            result.removeOne(txt);
        }
    }

    return result;
}

void DocumentFormatUtils::assignAlphabet(MAlignment& ma) {
    QList<DNAAlphabet*> matchedAlphabets;
    for (int i=0, n = ma.getNumSequences();i<n; i++) {
        MAlignmentItem& item = ma.alignedSeqs[i];
        QList<DNAAlphabet*> als = DocumentFormatUtils::findAlphabets(item.sequence);
        if (i == 0) {
            matchedAlphabets = als;
        } else {
            QMutableListIterator<DNAAlphabet*> it(matchedAlphabets);
            while (it.hasNext()) {
                DNAAlphabet* al = it.next();
                if (!als.contains(al)) {
                    it.remove();
                }
            }
            if (matchedAlphabets.isEmpty()) {
                break;
            }
        }
    }
    if (matchedAlphabets.isEmpty()) {
        return; //nothing matched
    }

    ma.alphabet = matchedAlphabets.first();

    if (!ma.alphabet->isCaseSensitive()) {
        for (int i=0, n = ma.getNumSequences();i<n; i++) {
            MAlignmentItem& item = ma.alignedSeqs[i];
            TextUtils::translate(TextUtils::UPPER_CASE_MAP, item.sequence.data(), item.sequence.length());
        }
    }
}

} //namespace
