/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

/* Routines for the P7_PROFILE structure - Plan 7's search profile
*                                         
*    1. The P7_PROFILE object: allocation, initialization, destruction.
*    2. Access methods.
*    3. Debugging and development code.
*    4. Unit tests.
*    5. Test driver.
*
* See also: 
*   modelconfig.c : routines that configure a profile given an HMM
*    
* SRE, Thu Jan 11 15:16:47 2007 [Janelia] [Sufjan Stevens, Illinois]
* SVN $Id: p7_profile.c 2818 2009-06-03 12:31:02Z eddys $
*/

#include <hmmer3/p7_config.h>

#include <math.h>

#include <string.h>
#ifdef HAVE_MPI
#include <mpi.h>
#endif

#include <hmmer3/easel/easel.h>
#include <hmmer3/easel/esl_vectorops.h>

#include <hmmer3/hmmer.h>


/*****************************************************************
* 1. The P7_PROFILE object: allocation, initialization, destruction.
*****************************************************************/

/* Function:  p7_profile_Create()
* Synopsis:  Allocates a profile.
* Incept:    SRE, Thu Jan 11 15:53:28 2007 [Janelia]
*
* Purpose:   Allocates for a profile of up to <M> nodes, for digital
*            alphabet <abc>.
*            
*            Because this function might be in the critical path (in
*            hmmscan, for example), we leave much of the model
*            unintialized, including scores and length model
*            probabilities. The <p7_ProfileConfig()> call is what
*            sets these. 
*            
*            The alignment mode is set to <p7_NO_MODE>.  The
*            reference pointer <gm->abc> is set to <abc>.
*
* Returns:   a pointer to the new profile.
*
* Throws:    <NULL> on allocation error.
*
* Xref:      STL11/125.
*/
P7_PROFILE *
p7_profile_Create(int allocM, const ESL_ALPHABET *abc)
{
    P7_PROFILE *gm = NULL;
    int         x;
    int         status;

    /* level 0 */
    ESL_ALLOC_WITH_TYPE(gm, P7_PROFILE*, sizeof(P7_PROFILE));
    gm->tsc       = NULL;
    gm->rsc       = NULL;
    gm->rf        = NULL;
    gm->cs        = NULL;
    gm->consensus = NULL;

    /* level 1 */
    ESL_ALLOC_WITH_TYPE(gm->tsc, float*,       sizeof(float)   * allocM * p7P_NTRANS); 
    ESL_ALLOC_WITH_TYPE(gm->rsc, float**,       sizeof(float *) * abc->Kp);
    ESL_ALLOC_WITH_TYPE(gm->rf,  char*,        sizeof(char)    * (allocM+2)); /* yes, +2: each is (0)1..M, +trailing \0  */
    ESL_ALLOC_WITH_TYPE(gm->cs,  char*,        sizeof(char)    * (allocM+2));
    ESL_ALLOC_WITH_TYPE(gm->consensus, char*,  sizeof(char)    * (allocM+2));
    gm->rsc[0] = NULL;

    /* level 2 */
    ESL_ALLOC_WITH_TYPE(gm->rsc[0], float*, sizeof(float) * abc->Kp * (allocM+1) * p7P_NR);
    for (x = 1; x < abc->Kp; x++) 
        gm->rsc[x] = gm->rsc[0] + x * (allocM+1) * p7P_NR;

    /* Initialize some edge pieces of memory that are never used,
    * and are only present for indexing convenience.
    */
    esl_vec_FSet(gm->tsc, p7P_NTRANS, -eslINFINITY);     /* node 0 nonexistent, has no transitions  */
    if (allocM > 1) {
        p7P_TSC(gm, 1, p7P_DM) = -eslINFINITY;             /* delete state D_1 is wing-retracted      */
        p7P_TSC(gm, 1, p7P_DD) = -eslINFINITY;
    }
    for (x = 0; x < abc->Kp; x++) {        
        p7P_MSC(gm, 0,      x) = -eslINFINITY;             /* no emissions from nonexistent M_0... */
        p7P_ISC(gm, 0,      x) = -eslINFINITY;             /* or I_0... */
        /* I_M is initialized in profile config, when we know actual M, not just allocated max M   */
    }
    x = esl_abc_XGetGap(abc);	                       /* no emission can emit/score gap characters */
    esl_vec_FSet(gm->rsc[x], (allocM+1)*p7P_NR, -eslINFINITY);
    x = esl_abc_XGetMissing(abc);	                      /* no emission can emit/score missing data characters */
    esl_vec_FSet(gm->rsc[x], (allocM+1)*p7P_NR, -eslINFINITY);

    /* Set remaining info  */
    gm->mode         = p7_NO_MODE;
    gm->L            = 0;
    gm->allocM       = allocM;
    gm->M            = 0;
    gm->nj           = 0.0f;

    gm->name         = NULL;
    gm->acc          = NULL;
    gm->desc         = NULL;
    gm->rf[0]        = 0;     /* RF line is optional annotation; this flags that it's not set yet */
    gm->cs[0]        = 0;     /* likewise for CS annotation line */
    gm->consensus[0] = 0;

    for (x = 0; x < p7_NEVPARAM; x++) gm->evparam[x] = p7_EVPARAM_UNSET;
    for (x = 0; x < p7_NCUTOFFS; x++) gm->cutoff[x]  = p7_CUTOFF_UNSET;
    for (x = 0; x < p7_MAXABET;  x++) gm->compo[x]   = p7_COMPO_UNSET;

    gm->abc         = abc;
    return gm;

ERROR:
    p7_profile_Destroy(gm);
    return NULL;
}


/* Function:  p7_profile_Copy()
* Synopsis:  Copy a profile.
* Incept:    SRE, Sun Feb 17 10:27:37 2008 [Janelia]
*
* Purpose:   Copies profile <src> to profile <dst>, where <dst>
*            has already been allocated to be of sufficient size.
*
* Returns:   <eslOK> on success.
* 
* Throws:    <eslEMEM> on allocation error; <eslEINVAL> if <dst> is too small 
*            to fit <src>.
*/
int
p7_profile_Copy(const P7_PROFILE *src, P7_PROFILE *dst)
{
    int x,z;
    int status;

    if (src->M > dst->allocM) ESL_EXCEPTION(eslEINVAL, "destination profile is too small to hold a copy of source profile");

    esl_vec_FCopy(src->tsc, src->M*p7P_NTRANS, dst->tsc);
    for (x = 0; x < src->abc->Kp;   x++) esl_vec_FCopy(src->rsc[x], (src->M+1)*p7P_NR, dst->rsc[x]);
    for (x = 0; x < p7P_NXSTATES;   x++) esl_vec_FCopy(src->xsc[x], p7P_NXTRANS,       dst->xsc[x]);

    dst->mode        = src->mode;
    dst->L           = src->L;
    dst->allocM      = src->allocM;
    dst->M           = src->M;
    dst->nj          = src->nj;

    if (dst->name != NULL) free(dst->name);
    if (dst->acc  != NULL) free(dst->acc);
    if (dst->desc != NULL) free(dst->desc);

    if ((status = esl_strdup(src->name,      -1, &(dst->name)))      != eslOK) return status;
    if ((status = esl_strdup(src->acc,       -1, &(dst->acc)))       != eslOK) return status;
    if ((status = esl_strdup(src->desc,      -1, &(dst->desc)))      != eslOK) return status;

    strcpy(dst->rf,        src->rf);         /* RF is optional: if it's not set, *rf=0, and strcpy still works fine */
    strcpy(dst->cs,        src->cs);         /* CS is also optional annotation */
    strcpy(dst->consensus, src->consensus);  /* consensus though is always present on a valid profile */

    for (z = 0; z < p7_NEVPARAM; z++) dst->evparam[z] = src->evparam[z];
    for (z = 0; z < p7_NCUTOFFS; z++) dst->cutoff[z]  = src->cutoff[z];
    for (z = 0; z < p7_MAXABET;  z++) dst->compo[z]   = src->compo[z];
    return eslOK;
}


/* Function:  p7_profile_Clone()
* Synopsis:  Duplicates a profile.
* Incept:    SRE, Mon Jun 25 08:29:23 2007 [Janelia]
*
* Purpose:   Duplicate profile <gm>; return a pointer
*            to the newly allocated copy.
*/
P7_PROFILE *
p7_profile_Clone(const P7_PROFILE *gm)
{
    P7_PROFILE *g2 = NULL;
    int         status;

    if ((g2 = p7_profile_Create(gm->allocM, gm->abc)) == NULL) return NULL;
    if ((status = p7_profile_Copy(gm, g2)) != eslOK) goto ERROR;
    return g2;

ERROR:
    p7_profile_Destroy(g2);
    return NULL;
}



/* Function:  p7_profile_SetNullEmissions()
* Synopsis:  Set all emission scores to zero (experimental).
* Incept:    SRE, Mon Jun 25 08:12:06 2007 [Janelia]
*
* Purpose:   Set all emission scores in profile <gm> to zero.
*            This makes the profile a null model, with all the same
*            length distributions as the original model, but
*            the emission probabilities of the background.
*            
*            Written to test the idea that score statistics will be
*            even better behaved when using a null model with the
*            same length distribution as the search model.
*
* Returns:   <eslOK> on success.
*/
int
p7_profile_SetNullEmissions(P7_PROFILE *gm)
{
    int x;
    for (x = 0; x <= gm->abc->K; x++)                esl_vec_FSet(gm->rsc[x], (gm->M+1)*p7P_NR, 0.0);   /* canonicals    */
    for (x = gm->abc->K+1; x <= gm->abc->Kp-3; x++)  esl_vec_FSet(gm->rsc[x], (gm->M+1)*p7P_NR, 0.0);   /* noncanonicals */
    return eslOK;
}


/* Function:  p7_profile_Reuse()
* Synopsis:  Prepare profile to be re-used for a new HMM.
* Incept:    SRE, Wed Jan  2 17:32:36 2008 [Janelia]
*
* Purpose:   Prepare profile <gm>'s memory to be re-used
*            for a new HMM.
*/
int
p7_profile_Reuse(P7_PROFILE *gm)
{
    /* name, acc, desc annotation is dynamically allocated for each HMM */
    if (gm->name != NULL) { free(gm->name); gm->name = NULL; }
    if (gm->acc  != NULL) { free(gm->acc);  gm->acc  = NULL; }
    if (gm->desc != NULL) { free(gm->desc); gm->desc = NULL; }

    /* set annotations to empty strings */
    gm->rf[0]        = 0;
    gm->cs[0]        = 0;
    gm->consensus[0] = 0;

    /* reset some other things, but leave the rest alone. */
    gm->mode = p7_NO_MODE;
    gm->L    = 0;
    gm->M    = 0;
    gm->nj   = 0.0f;

    return eslOK;
}


/* Function:  p7_profile_Destroy()
* Synopsis:  Frees a profile.
* Incept:    SRE, Thu Jan 11 15:54:17 2007 [Janelia]
*
* Purpose:   Frees a profile <gm>.
*
* Returns:   (void).
*
* Xref:      STL11/125.
*/
void
p7_profile_Destroy(P7_PROFILE *gm)
{
    if (gm != NULL) {
        if (gm->rsc   != NULL && gm->rsc[0] != NULL) free(gm->rsc[0]);
        if (gm->tsc       != NULL) free(gm->tsc);
        if (gm->rsc       != NULL) free(gm->rsc);
        if (gm->name      != NULL) free(gm->name);
        if (gm->acc       != NULL) free(gm->acc);
        if (gm->desc      != NULL) free(gm->desc);
        if (gm->rf        != NULL) free(gm->rf);
        if (gm->cs        != NULL) free(gm->cs);
        if (gm->consensus != NULL) free(gm->consensus);
        free(gm);
    }
    return;
}


/*****************************************************************
* 2. Access methods.
*****************************************************************/

/* Function:  p7_profile_IsLocal()
* Synopsis:  Return TRUE if profile is in a local alignment mode.
* Incept:    SRE, Thu Jul 12 11:57:49 2007 [Janelia]
*
* Purpose:   Return <TRUE> if profile is in a local alignment mode.
*/
int
p7_profile_IsLocal(const P7_PROFILE *gm)
{
    if (gm->mode == p7_UNILOCAL || gm->mode == p7_LOCAL) return TRUE;
    return FALSE;
}

/* Function:  p7_profile_IsMultihit()
* Synopsis:  Return TRUE if profile is in a multihit alignment mode.
* Incept:    SRE, Thu Jul 12 11:58:58 2007 [Janelia]
*
* Purpose:   Return <TRUE> if profile is in a multihit alignment mode.
*/
int
p7_profile_IsMultihit(const P7_PROFILE *gm)
{
    if (gm->mode == p7_LOCAL || gm->mode == p7_GLOCAL) return TRUE;
    return FALSE;
}




/* Function:  p7_profile_GetTScore()
* Incept:    SRE, Wed Apr 12 14:20:18 2006 [St. Louis]
*
* Purpose:   Convenience function that looks up a transition score in
*            profile <gm> for a transition from state type <st1> in
*            node <k1> to state type <st2> in node <k2>. For unique
*            state types that aren't in nodes (<p7T_S>, for example), the
*            <k> value is ignored, though it would be customarily passed as 0.
*            Return the transition score in <ret_tsc>.
*            
* Returns:   <eslOK> on success, and <*ret_tsc> contains the requested
*            transition score.            
* 
* Throws:    <eslEINVAL> if a nonexistent transition is requested. Now
*            <*ret_tsc> is set to $-\infty$.
*/
int
p7_profile_GetT(const P7_PROFILE *gm, char st1, int k1, char st2, int k2, float *ret_tsc)
{
    int   status;
    float tsc = 0.0f;

    switch (st1) {
  case p7T_S:  break;
  case p7T_T:  break;

  case p7T_N:
      switch (st2) {
  case p7T_B: tsc =  gm->xsc[p7P_N][p7P_MOVE]; break;
  case p7T_N: tsc =  gm->xsc[p7P_N][p7P_LOOP]; break;
  default:     ESL_XEXCEPTION(eslEINVAL, "bad transition %s->%s", p7_hmm_DecodeStatetype(st1), p7_hmm_DecodeStatetype(st2));
      }
      break;

  case p7T_B:
      switch (st2) {
  case p7T_M:  tsc = p7P_TSC(gm, k2-1, p7P_BM); break; /* remember, B->Mk is stored in [k-1][p7P_BM] */
  default:     ESL_XEXCEPTION(eslEINVAL, "bad transition %s->%s", p7_hmm_DecodeStatetype(st1), p7_hmm_DecodeStatetype(st2));
      }
      break;

  case p7T_M:
      switch (st2) {
  case p7T_M: tsc = p7P_TSC(gm, k1, p7P_MM); break;
  case p7T_I: tsc = p7P_TSC(gm, k1, p7P_MI); break;
  case p7T_D: tsc = p7P_TSC(gm, k1, p7P_MD); break;
  case p7T_E: 
      if (k1 != gm->M && ! p7_profile_IsLocal(gm)) ESL_EXCEPTION(eslEINVAL, "local end transition (M%d of %d) in non-local model", k1, gm->M);
      tsc = 0.0f;		/* by def'n in H3 local alignment */
      break;
  default:     ESL_XEXCEPTION(eslEINVAL, "bad transition %s_%d->%s", p7_hmm_DecodeStatetype(st1), k1, p7_hmm_DecodeStatetype(st2));
      }
      break;

  case p7T_D:
      switch (st2) {
  case p7T_M: tsc = p7P_TSC(gm, k1, p7P_DM); break;
  case p7T_D: tsc = p7P_TSC(gm, k1, p7P_DD); break;
  case p7T_E: 
      if (k1 != gm->M && ! p7_profile_IsLocal(gm)) ESL_EXCEPTION(eslEINVAL, "local end transition (D%d of %d) in non-local model", k1, gm->M);
      tsc = 0.0f;		/* by def'n in H3 local alignment */
      break;
  default:     ESL_XEXCEPTION(eslEINVAL, "bad transition %s_%d->%s", p7_hmm_DecodeStatetype(st1), k1, p7_hmm_DecodeStatetype(st2));
      }
      break;

  case p7T_I:
      switch (st2) {
  case p7T_M: tsc = p7P_TSC(gm, k1, p7P_IM); break;
  case p7T_I: tsc = p7P_TSC(gm, k1, p7P_II); break;
  default:     ESL_XEXCEPTION(eslEINVAL, "bad transition %s_%d->%s", p7_hmm_DecodeStatetype(st1), k1, p7_hmm_DecodeStatetype(st2));
      }
      break;

  case p7T_E:
      switch (st2) {
  case p7T_C: tsc = gm->xsc[p7P_E][p7P_MOVE]; break;
  case p7T_J: tsc = gm->xsc[p7P_E][p7P_LOOP]; break;
  default:     ESL_XEXCEPTION(eslEINVAL, "bad transition %s->%s", p7_hmm_DecodeStatetype(st1), p7_hmm_DecodeStatetype(st2));
      }
      break;

  case p7T_J:
      switch (st2) {
  case p7T_B: tsc = gm->xsc[p7P_J][p7P_MOVE]; break;
  case p7T_J: tsc = gm->xsc[p7P_J][p7P_LOOP]; break;
  default:     ESL_XEXCEPTION(eslEINVAL, "bad transition %s->%s", p7_hmm_DecodeStatetype(st1), p7_hmm_DecodeStatetype(st2));
      }
      break;

  case p7T_C:
      switch (st2) {
  case p7T_T:  tsc = gm->xsc[p7P_C][p7P_MOVE]; break;
  case p7T_C:  tsc = gm->xsc[p7P_C][p7P_LOOP]; break;
  default:     ESL_XEXCEPTION(eslEINVAL, "bad transition %s->%s", p7_hmm_DecodeStatetype(st1), p7_hmm_DecodeStatetype(st2));
      }
      break;

  default: ESL_XEXCEPTION(eslEINVAL, "bad state type %d in traceback", st1);
    }

    *ret_tsc = tsc;
    return eslOK;

ERROR:
    *ret_tsc = -eslINFINITY;
    return status;
}


/*****************************************************************
* 3. Debugging and development code.
*****************************************************************/

/* Function:  p7_profile_Validate()
* Incept:    SRE, Tue Jan 23 13:58:04 2007 [Janelia]
*
* Purpose:   Validates the internals of the generic profile structure
*            <gm>.
*            
*            TODO: currently this function is incomplete, and only
*            validates the entry distribution.
*            
* Returns:   <eslOK> if <gm> internals look fine. Returns <eslFAIL>
*            if something is wrong, and leaves an error message in
*            <errbuf> if caller passed it non-<NULL>.
*/
int
p7_profile_Validate(const P7_PROFILE *gm, char *errbuf, float tol)
{
    int     status;
    int     k;
    double *pstart = NULL;

    ESL_ALLOC_WITH_TYPE(pstart, double*, sizeof(double) * (gm->M+1));
    pstart[0] = 0.0;

    /* Validate the entry distribution.
    * In a glocal model, this is an explicit probability distribution,
    * corresponding to left wing retraction.
    * In a local model, this is an implicit probability distribution,
    * corresponding to the implicit local alignment model, and we have
    * to calculate the M(M+1)/2 fragment probabilities accordingly.
    */
    if (p7_profile_IsLocal(gm))
    {				/* the code block below is also in emit.c:sample_endpoints */
        for (k = 1; k <= gm->M; k++)
            pstart[k] = exp(p7P_TSC(gm, k-1, p7P_BM)) * (gm->M - k + 1); /* multiply p_ij by the number of exits j */
    }
    else
    {
        for (k = 1; k <= gm->M; k++)
            pstart[k] = exp(p7P_TSC(gm, k-1, p7P_BM));
    }

    if (esl_vec_DValidate(pstart, gm->M+1, tol, NULL) != eslOK) ESL_XFAIL(eslFAIL, errbuf, "profile entry distribution is not normalized properly");
    free(pstart);
    return eslOK;

ERROR:
    if (pstart != NULL) free(pstart);
    return eslFAIL;
}

/* Function:  p7_profile_Compare()
* Synopsis:  Compare two profiles for equality.
* Incept:    SRE, Thu Jun 21 17:57:56 2007 [Janelia]
*
* Purpose:   Compare two profiles <gm1> and <gm2> to each other.
*            Return <eslOK> if they're identical, and <eslFAIL> if
*            they differ. Floating-point probabilities are 
*            compared for equality within a fractional tolerance
*            <tol>.  Only compares the scores, not any annotation
*            on the profiles.
*/
int
p7_profile_Compare(P7_PROFILE *gm1, P7_PROFILE *gm2, float tol)
{
    int x;

    if (gm1->mode != gm2->mode) return eslFAIL;
    if (gm1->M    != gm2->M)    return eslFAIL;

    if (esl_vec_FCompare(gm1->tsc, gm2->tsc, gm1->M*p7P_NTRANS, tol)         != eslOK) return eslFAIL;
    for (x = 0; x < gm1->abc->Kp; x++) 
        if (esl_vec_FCompare(gm1->rsc[x], gm2->rsc[x], (gm1->M+1)*p7P_NR, tol) != eslOK) return eslFAIL;

    for (x = 0; x < p7P_NXSTATES; x++)
        if (esl_vec_FCompare(gm1->xsc[x], gm2->xsc[x], p7P_NXTRANS, tol)       != eslOK) return eslFAIL;

    return eslOK;
}

//int p7_profile_Dump( P7_PROFILE* p, FILE* out ) {
//    int i = 0;
//    int j = 0;
//    fprintf( out, "P7_PROFILE_DUMP\n" );
//    fprintf( out, "mode %d\n", p->mode );
//    fprintf( out, "L %d\n", p->L );
//    fprintf( out, "allocM %d\n", p->allocM );
//    fprintf( out, "M %d\n", p->M );
//    fprintf( out, "nj %f\n", p->nj );
//    fprintf( out, "name %s\n", p->name );
//    fprintf( out, "acc %s\n", p->acc );
//    fprintf( out, "desc %s\n", p->desc );
//    fprintf( out, "rf %s\n", p->rf );
//    fprintf( out, "cs %s\n", p->cs );
//    fprintf( out, "consensus %s\n", p->consensus );
//    fprintf( out, "evparam: " );
//
//    for( i = 0; i < p7_NEVPARAM; ++i ) {
//        fprintf( out, " %f ", p->evparam[i] );
//    }
//    fprintf( out, "\ncutoff: " );
//    for( i = 0; i < p7_NCUTOFFS; ++i ) {
//        fprintf( out, " %f ", p->cutoff[i] );
//    }
//    fprintf( out, "\ncompo: " );
//    for( i = 0; i < p7_MAXABET; ++i ) {
//        fprintf( out, " %f ", p->compo[i] );
//    }
//    fprintf( out, "\n" );
//    fprintf( out, "abc %d\n", p->abc->type );
//
//    fprintf( out, "tsc: " );
//    for( i = 0; i < p->M; ++i ) {
//        for( j = 0; j < p7P_NTRANS; ++j ) {
//            fprintf( out, " %f ", p7P_TSC( p, i, j ) );
//        }
//    }
//
//    fprintf( out, "\nxsc: " );
//    for( i = 0; i < p7P_NXSTATES; ++i ) {
//        for( j = 0; j < p7P_NXTRANS; ++j ) { 
//            fprintf( out, " %f ", p->xsc[i][j] );
//        }
//    }
//
//    fprintf( out, "\n" );
//    // without rsc
//    return 0;
//}

/*****************************************************************
* HMMER - Biological sequence analysis with profile HMMs
* Version 3.0b2; June 2009
* Copyright (C) 2009 Howard Hughes Medical Institute.
* Other copyrights also apply. See the COPYRIGHT file for a full list.
* 
* HMMER is distributed under the terms of the GNU General Public License
* (GPLv3). See the LICENSE file for details.
*****************************************************************/
