/*
 * Copyright (c) 2001-2003 The Trustees of Indiana University.  
 *                         All rights reserved.
 * Copyright (c) 1998-2001 University of Notre Dame. 
 *                         All rights reserved.
 * Copyright (c) 1994-1998 The Ohio State University.  
 *                         All rights reserved.
 * 
 * This file is part of the LAM/MPI software package.  For license
 * information, see the LICENSE file in the top level directory of the
 * LAM/MPI source distribution.
 * 
 * $HEADER$
 *
 * $Id: ssi_rpi_gm_short.c,v 1.11.2.2 2003/06/22 03:37:20 jsquyres Exp $
 *
 *	Function:	- everything to do with short messages
 */

#include <lam_config.h>

#include <stdlib.h>
#include <string.h>

#include <mpi.h>
#include <mpisys.h>
#include <rpisys.h>
#include <lamdebug.h>

#include <rpi_gm.h>
#include <rpi_gm_short.h>
#include <rpi_gm_dreg.h>
#include <rpi_gm_send_queue.h>
#include <rpi_gm_util.h>
#include <rpi_gm_recv_queue.h>
#include <rpi_gm_actions.h>
#include <rpi_gm_ack.h>
#include <rpi_gm_interval.h>


/*
 * private functions
 */
static int send_short(MPI_Request req);
static int receive_short_body(MPI_Request req, char *body);
static int short_complete_send_request(MPI_Request req, 
				       char *dummy_recvd_data);
static void short_send_env_callback(struct gm_port *port, void *context, 
                                    gm_status_t status);
static void short_send_body_unpin_callback(struct gm_port *port, 
					   void *context, 
					   gm_status_t status);
static void short_send_body_no_unpin_callback(struct gm_port *port, 
					      void *context, 
					      gm_status_t status);


/*
 * Try to send a short message:
 *
 * - See if there are any other messages ahead of this one that are
 * waiting to be sent.  If so, add this request onto the queue and try
 * to advance it.
 *
 * - Get two send tokens.
 *
 * - If we can't get two send tokens, queue this message up for later
 * sending.
 *
 * - Send the message
 *
 * This is an MPI_Request->send_advance_fn function.  Therefore, it
 * needs to reset MPI_Request->send_advance_fn when it completes.
 *
 * Returns 0 or LAMERROR.
 */
int 
lam_ssi_rpi_gm_short_send(MPI_Request req)
{
  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "send_short: sending request %p\n", 
                  req));

  /* Mark this request as in the progression engine */
  
  req->rq_state = LAM_RQSACTIVE;
  req->rq_rpi->is_advancing = 1;

  /* If we can't actually send now, set the next action to be to send
     the actual message. */

  if (!LAM_SSI_RPI_GM_SEND_QUEUE_EMPTY() ||
      lam_ssi_rpi_gm_stokens < 2) {
    req->rq_rpi->send_advance_fn = send_short;
    LAM_SSI_RPI_GM_SEND_QUEUE_ADD(req);

    /* Don't bother trying to advance the queue now -- every time we
       get a send token back, the queue is advanced.  Hence, we are
       guaranteed to not have enough send tokens to advance anything
       in the queue right now.  So just return. */

    return 0;
  } 

  /* If we actually send it now, there's no next action -- this
     request is done when the gm_send completes. */

  else {
    lam_ssi_rpi_gm_stokens -= 2;
    lam_debug_cond((PUB(did), "decremented stokens 2: %d", lam_ssi_rpi_gm_stokens));
    req->rq_rpi->send_advance_fn = NULL;
    return send_short(req);
  }

  /* Never exit through here */
}


/*
 * Try to send a short synchronous message:
 *
 * - Essentially the same as sending short messages, but at the end,
 * queue this message up to receive an ACK later.
 *
 * This is an MPI_Request->send_advance_fn function.  Therefore, it
 * needs to reset MPI_Request->send_advance_fn when it completes.
 *
 * Returns 0 or LAMERROR.
 */
int 
lam_ssi_rpi_gm_short_send_sync(MPI_Request req)
{
  /* Queue this request up to receive the ACK.  We must do this
   *before* the actual send to prevent a race condition -- there's
   no sense in having to do unexpected receives for ACKs.  i.e.,
   *guarantee* that the ACK will be expected by posting it before we
   do the actual send.  It's ok to do this, even if we don't know if
   the message will actually be sent now or later because the ACK
   request are queued up in order. */

  if (lam_ssi_rpi_gm_add_ack(req->rq_proc->p_rpi, req) != 0)
    return LAMERROR;

  /* When the ACK is received, it is technically a "receive" action.
     So set the function to complete this request on the recv_fn
     function.  When the ACK is received, this request is finished. */

  req->rq_rpi->recv_advance_fn = short_complete_send_request;

  /* Call the normal short send -- use exactly the same code */

  if (lam_ssi_rpi_gm_short_send(req) != 0)
    return LAMERROR;

  return 0;
}


/* 
 * This function is invoked when the envelope has already been
 * received, but the message body has not yet been received.
 */
int 
lam_ssi_rpi_gm_short_receive_env(struct lam_ssi_rpi_gm_envl *env, 
                                 MPI_Request req)
{
  /* Save the envelope */

  *(req->rq_rpi->cq_envbuf) = *env;

  /* The *next* receive that we do on this process will be the body.
     So we need to set the cp_current_recv_req / recv_advance_fn so
     that the next receive will be paired with this request, and
     trigger this request's specific next action (i.e., receive the
     short body). */
  
  req->rq_rpi->recv_advance_fn = receive_short_body;
  req->rq_proc->p_rpi->cp_current_recv_req = req;

  return 0;
}


/* 
 * This function is invoked when we have matched a request with a
 * buffered envelope (i.e., the envelope was previously unexpected),
 * but the [short] body for this envelope has not yet been received.
 *
 * No need to pin a buffer here, as short bodies are received into
 * commodity/pooled buffers and then memcpy'ed into the destination
 * buffer.
 */
int 
lam_ssi_rpi_gm_short_prepare_unexpected_body(struct lam_ssi_rpi_gm_envl *env, 
                                             MPI_Request req)
{
  /* Save the envelope */

  *(req->rq_rpi->cq_envbuf) = *env;

  /* Reset the proc so that it doesn't think that the next thing
     received from this proc is a body that needs to be [unexpected]
     buffered */

  req->rq_proc->p_rpi->cp_bmsg = NULL;

  /* The *next* receive that we do on this process will be the body.
     So we need to set the cp_current_recv_req / recv_advance_fn so
     that the next receive will be paired with this request, and
     trigger this request's specific next action (i.e., receive the
     short body). */
  
  req->rq_rpi->recv_advance_fn = receive_short_body;
  req->rq_proc->p_rpi->cp_current_recv_req = req;

  /* Setup so that the top-level advance() function doesn't go add
     this request to the pending receive queue(s). */

  req->rq_state = LAM_RQSACTIVE;
  req->rq_rpi->is_advancing = 1;

  return 0;
}


/************************************************************************/

/*
 * This function is a wrapper around the actual gm send -- all the
 * setup has been done.
 */
static int 
send_short(MPI_Request req)
{
  if (lam_ssi_rpi_gm_push_envelope(req, short_send_env_callback) != 0 ||
      lam_ssi_rpi_gm_push_short_body(req, short_send_body_unpin_callback, 
                                     short_send_body_no_unpin_callback) != 0)
    return LAMERROR;

  /* If this is a sync send, we just sent the message envelope
     (containing *my* rank in the communicator), we now need to change
     the rank in the request's envelope to the *receiver's* rank so
     that when the ACK arrives, and we're comparing it against this
     request, the ce_rank will match.  Ditto for the flags -- mark it
     with the C2CACK flag so that it matches when we look for
     comparisons later. */
  /* Have to defer these changes to the envelope callback -- cannot
     change the envelope now that it's been handed to the gm
     library */

  return 0;
}


/*
 * This function is invoked after the actual body of the message has
 * been received.
 */
static int
receive_short_body(MPI_Request req, char *body)
{
  /* The body of a short message has just been received.  We've
     already received the envelope, and we've already filled in the
     status.  So receive the body into the message buffer. */

  struct lam_ssi_rpi_gm_envl *env = req->rq_rpi->cq_envbuf;

  /* Be sure to reset the cp_current_recv_req */

  req->rq_proc->p_rpi->cp_current_recv_req = NULL;

  /* Check for length mismatch.  We know that there is a nonzero
     length payload. */

  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "receive_short: payload len: %d -- posted rcv for len %d",
                  env->ge_env.ce_len, req->rq_packsize));

  /* Since shorts are received into commodity/pooled buffers, we have
     to memcpy out into the destination buffer. */
  
  if (env->ge_env.ce_len <= req->rq_packsize) {
    lam_memcpy(req->rq_packbuf, body, env->ge_env.ce_len);
    lam_ssi_rpi_fill_mpi_status(req, env->ge_env.ce_rank, env->ge_env.ce_tag, env->ge_env.ce_len);
  } else {
    lam_memcpy(req->rq_packbuf, body, req->rq_packsize);
    lam_ssi_rpi_fill_mpi_status(req, env->ge_env.ce_rank, env->ge_env.ce_tag, 
                                req->rq_packsize);
    req->rq_flags |= LAM_RQFTRUNC;
  }

  /* Return this body's buffer back to the pool */

  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "short.c:%d:receive_short_body returning body buffer %p\n", 
                  __LINE__, body));
  lam_ssi_rpi_gm_dma_short_free(body);

  /* The receiving portion of this request is finished */

  req->rq_rpi->recv_advance_fn = NULL;

  /* Was this a sync mode send that requires an ACK?  If so, the
     request won't be finished until the ACK has successfully been
     sent.  Otherwise, it's done now. */

  if (env->ge_env.ce_flags & C2CSSEND) {
    lam_debug_cond((lam_ssi_rpi_gm_did, 
                    "receive_short: sending an ACK because this was an send"));
    if (lam_ssi_rpi_gm_send_ack_done(req, env) != 0)
      return LAMERROR;
    lam_debug_cond((lam_ssi_rpi_gm_did, "receive_short: ACK queued up"));
  } else {
    req->rq_state = LAM_RQSDONE;
    --lam_rq_nactv;
  }

  lam_ssi_rpi_gm_haveadv = 1;
  return 0;
}


/*
 * This function is invoked after an ACK is received.  It means that
 * the request is now finished, and we should move it into the done
 * state.  No data will be passed through the second argument.
 */
static int 
short_complete_send_request(MPI_Request req, char *dummy_recvd_data)
{
  req->rq_state = LAM_RQSDONE;
  lam_ssi_rpi_gm_haveadv = 1;
  --lam_rq_nactv;

  return 0;
}


static void 
short_send_env_callback(struct gm_port *port, void *context, 
			gm_status_t status)
{
  LAM_SSI_RPI_GM_SEND_CALLBACK_START;

  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "<short>send_env_callback: all done! %p\n", 
                  req));

  /* Alter the envelope as described in send_short() */

  if (req->rq_rpi->cq_envbuf->ge_env.ce_flags & C2CSSEND) {
    req->rq_rpi->cq_envbuf->ge_env.ce_rank = req->rq_rank;
    req->rq_rpi->cq_envbuf->ge_env.ce_flags |= C2CACK;
  }

  /* We still have to wait for the body of this message to complete
     sending, so we do not advance this request into the done
     state. */

  LAM_SSI_RPI_GM_SEND_CALLBACK_FINISH;
}


/*
 * This function is invoked by gm_unknown() when a gm_send finishes.
 * We knew ahead of time that we wanted to unpin the send buffer,
 * hence this callback was invoked.
 *
 * NOTE: The "unpin" in the name is a bit of a misnomer.  We actually
 * just return the pinned buffer to the short-pinned-buffer pool; we
 * don't actually unpin it.
 */
static void 
short_send_body_unpin_callback(struct gm_port *port, void *context, 
                               gm_status_t status)
{
  LAM_SSI_RPI_GM_SEND_CALLBACK_START;

  lam_debug_cond((lam_ssi_rpi_gm_did,
                  "<short>send_body_unpin_callback: all done! %p\n", 
                  req));

  /* Return the pinned data buffer to the pool */

  lam_debug_cond((lam_ssi_rpi_gm_did, 
                  "short.c:%d:short_send_body_unpin_callback "
                  "returning body buffer %p\n", 
                  __LINE__, req->rq_rpi->dma_data_buf));
  lam_ssi_rpi_gm_dma_short_free(req->rq_rpi->dma_data_buf);
  req->rq_rpi->dma_data_buf = NULL;

  /* 
   * Now that the envelope has finished, what do we do?  
   *
   * If this was a short, non-synchronous send, it is now done.  Move
   * it into the done state.
   *
   * Otherwise, it was synchronous.  Hence, this request has already
   * been queued to receive an ACK later.  So we don't need to do
   * anything here.
   */

  if ((req->rq_rpi->cq_envbuf->ge_env.ce_flags & C2CSSEND) == 0) {
    req->rq_state = LAM_RQSDONE;
    --lam_rq_nactv;
  }

  /* Increment the send tokens and mark that this c2c RPI has advanced */

  LAM_SSI_RPI_GM_SEND_CALLBACK_FINISH;
}


/*
 * This function is invoked by gm_unknown() when a gm_send finishes.
 * We knew ahead of time that we didn't want to unpin the send buffer,
 * hence this callback was invoked.
 */
void 
short_send_body_no_unpin_callback(struct gm_port *port, void *context, 
				  gm_status_t status)
{
  LAM_SSI_RPI_GM_SEND_CALLBACK_START;

  lam_debug_cond((lam_ssi_rpi_gm_did, 
		  "<short>send_body_no_unpin_callback: all done! %p\n", 
		  req));

  /* There's no unpinning to do in this callback.  Just unuse the
     interval */

  lam_ssi_rpi_gm_interval_unuse(req->rq_packbuf, req->rq_packsize);

  /* 
   * Now that the envelope has finished, what do we do?  
   *
   * If this was a short, non-synchronous send, it is now done.  Move
   * it into the done state.
   *
   * Otherwise, it was synchronous.  Hence, this request has already
   * been queued to receive an ACK later.  So we don't need to do
   * anything here.
   */

  if ((req->rq_rpi->cq_envbuf->ge_env.ce_flags & C2CSSEND) == 0) {
    req->rq_state = LAM_RQSDONE;
    --lam_rq_nactv;
  }

  /* Increment the send tokens and mark that this c2c RPI has advanced */

  LAM_SSI_RPI_GM_SEND_CALLBACK_FINISH;
}
