/* GNU polyxmass - the massist's program.
   -------------------------------------- 
   Copyright (C) 2000,2001,2002,2003,2004 Filippo Rusconi

   http://www.polyxmass.org

   This file is part of the "GNU polyxmass" project.
   
   The "GNU polyxmass" project is an official GNU project package (see
   www.gnu.org) released ---in its entirety--- under the GNU General
   Public License and was started at the Centre National de la
   Recherche Scientifique (FRANCE), that granted me the formal
   authorization to publish it under this Free Software License.

   This software is free software; you can redistribute it and/or
   modify it under the terms of the GNU  General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.
   
   This software is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.
   
   You should have received a copy of the GNU  General Public
   License along with this software; if not, write to the
   Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.
*/

#include "pxmchem-searchmass.h"
#include "pxmchem-masscalc.h"
#include "pxmchem-monomer.h"



gint
pxmchem_searchmass_polymer (PxmPolymer *polymer,
			    GPtrArray *GPA,
			    PxmPolchemdef *polchemdef,
			    PxmSearchmassOpt *smopt,
			    PxmCalcOpt *calcopt,
			    PxmIonizerule *ionizerule /* can be set by
							 the caller to
							 the
							 polchemdef->ionizerule
						      */,
			    gchar *prefix)
{
  gint count = 0;
  gint length = 0;
  
  gint iter = 0;
  gint jter = 0;
  gint kter = 0;
  
  gdouble min = 0;
  gdouble max = 0;

  gchar *modif = NULL;
    
  PxmCalcOpt *calcopt_local = NULL;

  PxmOligomer *oligomer_test = NULL;
  PxmOligomer *oligomer = NULL;

  PxmMonomer *monomer = NULL;

  PxmProp *prop_new = NULL;
  PxmProp *prop_found = NULL;
  

  /* We get a pointer to a polymer object where we are going to search
     for oligomers which have masses that match the mass passed in the
     PxmSearchmassOpt * parameter. The polymer sequence will be
     iterated and all combinations of oligomers are going to be tested
     for their masses matching the one to search for.

     In order to compare masses, each theoretical oligomer must have
     its masses calculated so that they can be compared with the one to
     be searched for. For this the usual mass calculating functions are
     used by passing to them the parameters that we get here: calcopt
     and ionizerule.
  
     The GPA array is an array where all the found oligomers (the ones
     the masses of which comply with the mass being searched for) are
     to be stored.
  */

  g_assert (polymer != NULL);
  g_assert (polymer->monomerGPA != NULL);
  length = polymer->monomerGPA->len;
  
  g_assert (GPA != NULL);
  g_assert (polchemdef != NULL);
  g_assert (smopt != NULL);

  g_assert (calcopt != NULL);
  
  g_assert (ionizerule != NULL);

  g_assert (prefix != NULL);
  

  if (length <= 0)
    return count;
  
  /* We have to calculate the min and max values. The calculation
     depends on the value of the smopt->tolerance_type variable.
  */
  if (smopt->tolerance_type == PXM_MASS_TOLERANCE_AMU)
    {
      /* atomic mass units 
       */
      min = smopt->mass - smopt->tolerance;
      max = smopt->mass + smopt->tolerance;
    }
  else if (smopt->tolerance_type == PXM_MASS_TOLERANCE_PCT)
    {
      /* percentage
       */
      min = smopt->mass - (smopt->tolerance * (smopt->mass /100));
      max = smopt->mass + (smopt->tolerance * (smopt->mass /100));
    }
  else if (smopt->tolerance_type == PXM_MASS_TOLERANCE_PPM)
    {
      /* part per million */
      min = smopt->mass - (smopt->tolerance * (smopt->mass /1000000));
      max = smopt->mass + (smopt->tolerance * (smopt->mass /1000000));
    }
  else
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	     _("%s@%d: the value of the tolerance type is unknown: '%d'\n"),
	     __FILE__, __LINE__, smopt->tolerance_type);
      
      return -1;
    }

  /* The calcopt parameter contains the coordinates into which the
     masses must be searched in the polymer sequence.
  */
  if (calcopt->start_idx < 0 || calcopt->end_idx < 0)
    {
      calcopt->start_idx = 0;
      calcopt->end_idx = length - 1;
    }
  else if (calcopt->start_idx > length - 1
	   || calcopt->end_idx > length - 1)
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	     _("%s@%d: invalid calcopt->start and calcopt->end indices\n"),
	     __FILE__, __LINE__);
      
      return -1;
    }
      
  /* Make a copy of the calcopt that was passed as parameter so that
     we can set the mass calculation-specific start_idx and end_idx 
     values without tampering with the 'calcopt' parameter.
  */
  calcopt_local = pxmchem_calcopt_dup (calcopt);

  /* We now know what mass interval the oligomer masses must fall
     within. The next work is now to iterate in the polymer's array
     of monomers within the the borders in calcopt.
  */
  oligomer_test = pxmchem_oligomer_new ();
  oligomer_test->masspair = libpolyxmass_masspair_new ();
  
  for (iter = calcopt->start_idx; 
       iter < calcopt->end_idx + 1; 
       iter++)
    {
      for (jter = 0; jter < calcopt->end_idx + 1 - iter; jter++)
	{
	  /* Set the borders of the test oligomer, so that the
	     oligomer is self-conscious of itself with respect to the
	     polymer sequence.
	  */
	  oligomer_test->start_idx = iter;
	  oligomer_test->end_idx = iter + jter;

	  /* Set the polymer from which this oligomer's mass calculations
	     will be performed.
	  */
	  oligomer_test->polymer = polymer;
	  
	  /* Reset the mass of the testing masspair.
	   */
	  libpolyxmass_masspair_set_both (oligomer_test->masspair, 
					  (gdouble) 0, (gdouble) 0);
	  
	  /* Make the mass calculation according to the different options
	     that were passed as parameter to this function.
	  */
	  
	  /* Here we need to check something serious. I decided that
	     in order to make code reuse optimal in the polyxmass
	     project, a number of functions are written to take
	     parameters that means different things depending on the
	     context. The function that we'll call below is one of
	     these functions: depending on the context the parameters
	     passed to it are calculated in different manners so that
	     the function does the expected work. For example, when a
	     selection is made in the sequence editor, and that we
	     need to calculate the masses for the selected oligomer,
	     the way the numbering of the monomers is done in the
	     sequence editor is different than the way the numbering
	     is done for defining the borders of an oligomer obtained
	     through the cleavage of a polymer. In the former case,
	     the end_idx is incremented by one respect to the latter
	     case, which is why in the function
	     masscalc_noncapped_monomer_GPA () we have the following
	     control loop : for (iter = calcopt->start_idx; iter <
	     calcopt->end_idx; iter++). Thus, since here the end_idx
	     is one value lower than the one reported upon selection
	     of an oligomer in the sequence editor window, we have to
	     increment it by one prior to passing its value to the
	     calculation function. For this change to happen
	     independently of the 'calcopt' parameter (that we use for
	     int he for loop), we work the mass calculations on a copy
	     of the 'calcopt':
	  */
	  calcopt_local->start_idx = oligomer_test->start_idx;
	  calcopt_local->end_idx = oligomer_test->end_idx + 1;

	  if (FALSE == pxmchem_masscalc_oligomer (oligomer_test,
						  polchemdef,
						  calcopt_local,
						  NULL, /* cleaveopt */
						  ionizerule,
						  oligomer_test->masspair))
	    {
	      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
		     _("%s@%d: failed to calculate the masses of oligomer: "
		       "[%d-%d]\n"),
		     __FILE__, __LINE__, 
		     oligomer_test->start_idx,
		     oligomer_test->end_idx);
	      
	      continue;
	    }
	  
	  /* At this point we have the masses of the currently tested
	     oligomer. We must compared these to the one that is searched
	     for.
	  */
	  if (smopt->mass_type == PXM_MASS_MONO)
	    {
	      if (oligomer_test->masspair->mono > max)
		{
		  /* The oligomer already is too heavy. We can safely
		     exit this inner for loop because it is certainly
		     not going to be lighter if we continue adding
		     monomers to it.
		  */
		  break;
		}
	      
	      if (oligomer_test->masspair->mono < min)
		{
		  /* The oligomer is still to light (its mass is less
		     than the minimum mass allowed for our mass
		     search), so we just continue adding this inner
		     for loop, which means we add monomers to this too
		     light oligomer.
		  */
		  continue;
		}
	    }
	  else if (smopt->mass_type == PXM_MASS_AVG)
	    {
	      if (oligomer_test->masspair->avg > max)
		{
		  /* The oligomer already is too heavy. We can safely
		     exit this inner for loop because it is certainly
		     not going to be lighter if we continue adding
		     monomers to it.
		  */
		  break;
		}
	      
	      if (oligomer_test->masspair->avg < min)
		{
		  /* The oligomer is still to light (its mass is less
		     than the minimum mass allowed for our mass
		     search), so we just continue adding this inner
		     for loop, which means we add monomers to this too
		     light oligomer.
		  */
		  continue;
		}
	    }
	  else
	    {
	      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
		     _("%s@%d: the smopt->mass_type value is incorrect: '%d'\n"),
		     __FILE__, __LINE__, smopt->mass_type);
	      
	      pxmchem_oligomer_free (oligomer_test);
	      
	      pxmchem_calcopt_free (calcopt_local);
	      
	      return -1;
	    }
	  
	      
	  /* Here, we know that the oligomer has masses complying
	     with the [min--max] mass that was initially searched
	     for. We create an oligomer but we do not need to
	     recalculate masses, so we pass NULL as the calcopt
	     param.
	  */
	  oligomer =
	    pxmchem_oligomer_with_options_new (polymer,
					       oligomer_test->start_idx,
					       oligomer_test->end_idx,
					       NULL, /*calcopt */
					       ionizerule,
					       smopt->plm_chement,
					       polchemdef);
	      
	  oligomer->masspair = 
	    libpolyxmass_masspair_dup (oligomer_test->masspair);
	  
	  /* The new oligomer now has a duplicated masspair object, so
	     it self-aware of its masses.
	  */

	  /* Make a property with the smopt struct after duplication
	     so that the oligomer will be totally self-contained.
	  */
	  prop_new = libpolyxmass_prop_new ();
	  libpolyxmass_prop_set_name (prop_new, "SEARCHMASS_OPT");
	      
	  prop_new->data = 
	    pxmchem_searchmassopt_dup (smopt,
				       PXM_SEARCHMASSOPT_PROP_DUP_DEEP_YES);
	      	      
	  /* Set the pointer to the free'ing function that will be called
	     when the prop object is freed.
	  */
	  prop_new->custom_free = pxmchem_searchmassopt_prop_free;
	  prop_new->custom_dup = pxmchem_searchmassopt_prop_dup;
	      
	  g_ptr_array_add (oligomer->propGPA, prop_new);


	  /* Now we can tailor the name of the oligomer, which will just
	     be its number starting at 1 apposed to the prefix that was provided by
	     the caller.
	  */
	  oligomer->name = g_strdup_printf ("%s#%d", prefix, count + 1);

	  /* Put in the oligomer a prop object of which the data is
	     the sequence of the oligomer, so that later it can be
	     displayed.
	  */
	  if (smopt->put_sequence == TRUE)
	    {
	      prop_new = libpolyxmass_prop_new ();
	      
	      libpolyxmass_prop_set_name (prop_new, "SEQUENCE");
	      
	      prop_new->data = 
		(gchar *) pxmchem_polymer_make_codes_string (polymer,
							     oligomer->
							     start_idx,
							     oligomer->
							     end_idx);
	      
	      g_assert (prop_new->data != NULL);
	      
	      g_ptr_array_add (oligomer->propGPA, prop_new);
	    }
	  
	  if (smopt->mnm_chement != PXMCHEMENT_MNM_NONE)
	    {
	      /* For each monomer in the polymer sequence GPtrArray of
		 monomers, we have to check if the monomer is modified with a
		 prop "MODIF". If so, we create an informative prop into the
		 oligomer.
	      */
	      for (kter = oligomer->start_idx; 
		   kter < oligomer->end_idx + 1; 
		   kter++)
		{
		  monomer = g_ptr_array_index (polymer->monomerGPA, kter);

		  prop_found =  libpolyxmass_prop_find_prop (monomer->propGPA,
							     NULL,
							     NULL,
							     "MODIF",
							     NULL,
							     PXM_UNDEF_PROP_CMP);
		  
		  if (NULL != prop_found)
		    {
		      modif = prop_found->data;
		      
		      /* The iterated monomer is modified, construct a string
			 representing the index (IN THE OLIGOMER AND NOT IN
			 THE POLYMER!) at which the modif is found.
		      */
		      prop_new = libpolyxmass_prop_new ();
		      libpolyxmass_prop_set_name (prop_new, "POS/IDX/MODIF");
		      prop_new->data =
			g_strdup_printf ("%d/%d/%s",
					 /* pos in the oligomer */
					 kter - oligomer->start_idx + 1,
					 /* index in the sequence */
					 kter,
					 (gchar *) modif);
		      
		      g_ptr_array_add (oligomer->propGPA, prop_new);
		    }
		}
	      /* end of 
		 for (kter = oligomer->start_idx; 
		 kter < oligomer->end_idx, 
		 kter++)
	      */

	      /*
		ONLY INFORMATIONAL STUFF, NOT FOR MASS CALCULATIONS...
		------------------------------------------------------
     
		Here we want to set properties to the oligomer for the following
		data:

		1. if the oligomer corresponds to the left end of the parent
		polymer.

		2.  if the oligomer corresponds to the right end of the parent
		polymer.
	      */
	      if (oligomer->start_idx == 0 && oligomer->end_idx == length - 1)
		{
		  /*
		    The oligomer is both the left end-terminal and right
		    end-terminal oligomer.
		  */
		  prop_new = 
		    libpolyxmass_prop_both_strings_new ("LEFT_RIGHT_END_OLIGOMER",
							"BOTH_ENDS");
      
		  g_ptr_array_add (oligomer->propGPA, prop_new);
      
		  /*
		    Now that we know that the current oligomer corresponds in
		    fact to the whole polymer (both left end-terminal and
		    right end-terminal oligomer of one polymer), we must check
		    if these two ends are modified or not.
		  */
		  prop_found = libpolyxmass_prop_find_prop (polymer->propGPA,
							    NULL,
							    NULL,
							    "LEFT_END_MODIF",
							    NULL,
							    PXM_UNDEF_PROP_CMP);
		  if (NULL != prop_found)
		    {
		      if (prop_found->data != NULL)
			{
			  prop_new = libpolyxmass_prop_dup (prop_found,
							    PXM_MODIF_PROP_DUP_DEEP_YES);
	      
			  g_ptr_array_add (oligomer->propGPA, prop_new);
			}
		    }
      
		  prop_found = libpolyxmass_prop_find_prop (polymer->propGPA,
							    NULL,
							    NULL,
							    "RIGHT_END_MODIF",
							    NULL,
							    PXM_UNDEF_PROP_CMP);
		  if (NULL != prop_found)
		    {
		      if (prop_found->data != NULL)
			{
			  prop_new = libpolyxmass_prop_dup (prop_found,
							    PXM_MODIF_PROP_DUP_DEEP_YES);
	      
			  g_ptr_array_add (oligomer->propGPA, prop_new);
			}
		    }
		}
	      /* end of 
		 if (oligomer->start_idx == 0 && oligomer->end_idx == length - 1)
	      */
	      else if (oligomer->start_idx == 0)
		{
		  prop_new = 
		    libpolyxmass_prop_both_strings_new ("LEFT_RIGHT_END_OLIGOMER",
							"LEFT_END");
      
		  g_ptr_array_add (oligomer->propGPA, prop_new);
      
		  /*
		    Now that we know that the current oligomer corresponds in
		    fact to the left end-terminal oligomer of the polymer),
		    we must check if these two ends are modified or not.
		  */
		  prop_found = libpolyxmass_prop_find_prop (polymer->propGPA,
							    NULL,
							    NULL,
							    "LEFT_END_MODIF",
							    NULL,
							    PXM_UNDEF_PROP_CMP);
      
		  if (NULL != prop_found)
		    {
		      if (prop_found->data != NULL)
			{
			  prop_new = libpolyxmass_prop_dup (prop_found,
							    PXM_MODIF_PROP_DUP_DEEP_YES);
	      
			  g_ptr_array_add (oligomer->propGPA, prop_new);
			}
		    }
		}
	      /* end of 
		 else if (oligomer->start_idx == 0)
	      */
	      else if (oligomer->end_idx == length - 1)
		{
		  prop_new = 
		    libpolyxmass_prop_both_strings_new ("LEFT_RIGHT_END_OLIGOMER",
							"RIGHT_END");
      
		  g_ptr_array_add (oligomer->propGPA, prop_new);
      
		  /*
		    Now that we know that the current oligomer corresponds in
		    fact to the left end-terminal oligomer of the polymer),
		    we must check if these two ends are modified or not.
		  */
		  prop_found = libpolyxmass_prop_find_prop (polymer->propGPA,
							    NULL,
							    NULL,
							    "RIGHT_END_MODIF",
							    NULL,
							    PXM_UNDEF_PROP_CMP);
      
		  if (NULL != prop_found)
		    {
		      if (prop_found->data != NULL)
			{
			  prop_new = libpolyxmass_prop_dup (prop_found,
							    PXM_MODIF_PROP_DUP_DEEP_YES);
	      
			  g_ptr_array_add (oligomer->propGPA, prop_new);
			}
		    }
		}
	      /* end of 
		 else if (oligomer->end_idx == 0)
	      */
	    }

	  /* Finally add the oligomer to the array of oligomers
	     and increment effectively the count of added oligomers.
	  */
	  g_ptr_array_add (GPA, oligomer);
	      
	  count++;
	}
      /* end of 
	 for (jter = 0; jter < calcopt->end_idx + 1 - iter; jter++)
      */
    }
  /* end of 
     for (iter = calcopt->start_idx; iter < calcopt->end_idx + 1; iter++)
  */
  
  pxmchem_oligomer_free (oligomer_test);
  
  pxmchem_calcopt_free (calcopt_local);

  return count;
}






