/*
 *	Ohio Trollius
 *	Copyright 1997 The Ohio State University
 *	NJN
 *
 *	$Id: shm.low.c,v 6.1.1.2 97/03/24 12:14:02 nevin Exp $
 *
 *	Function:	- shared memory low-level routines
 */

#include <lam_config.h>

#include <errno.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/uio.h>

#if NEED_SYS_SELECT_H
#include <sys/select.h>
#endif

#include <blktype.h>
#include <dl_inet.h>
#include <mpi.h>
#include <mpisys.h>
#include <net.h>
#include <rpisys.h>
#include <terror.h>
#include <typical.h>
#include <t_types.h>

/*
 * definitions
 */
#define ENVSIZE		(sizeof(struct c2c_envl))

typedef struct c2c_envl *envp_t;

/*
 * public functions
 */
int			_shm_advance();
int			_shm_proc_read_env();
int			_shm_req_send_long();
int			_shm_req_send_short();
int			_shm_req_send_synch();
int			_shmtcp_req_probe();
int			_shmtcp_req_recv();
int			_shm_buffered_adv();

/*
 * external functions
 */
extern int		_tcp_advmultiple();
extern int		_tcp_adv1();
extern int		_tcp_req_probe();
extern int		_tcp_req_recv();
extern int		_c2c_comm_dead();
extern void		_c2c_fill_mpi_status();
extern void		_c2c_fill_wildcards();
extern void		lam_commfault();
extern int		microsleep();

/*
 * private functions
 */
static int		shm_proc_read_body();
static int		shm_req_send_ack_long();
static int		shm_req_send_ack_only();
static int		shm_req_send_body();
static int		shm_req_rcvd_2nd();
static int		shm_req_rcvd_body_synch();
static int		shm_req_rcvd_long_ack();
static int		shm_req_done();
static int		shm_match_adv();
static int		shm_buffer();
static int		shm_push_body();
static int		shm_push_env();

/*
 * public variables
 */
struct c2c_proc		**_shm_read = 0;	/* reading shm processes */
struct c2c_proc		**_shm_write = 0;	/* writing shm processes */
int			_shm_nread;		/* # reading shm processes */
int			_shm_nwrite;		/* # writing shm processes */
int			_shm_nprocs = 0;	/* number of shm processes */
int			_shm_narray = 0;	/* shm read/write array sizes */
int			_shm_poll_delay = LAMSHMDELAY;
						/* poll loop delay (usecs) */

/*
 * external variables
 */
extern int		_c2c_flblock;		/* blocking flag */
extern int		_c2c_haveadv;		/* have advanced */
extern int		_tcp_nio;		/* # processes doing tcp io */
extern MPI_Request	_tcp_lastreq;		/* last tcp request */

/*
 *	_shm_advance
 *
 *	Function:	- advance tcp and shm requests where possible
 *	Returns:	- 0 or LAMERROR
 */
int
_shm_advance()

{
    MPI_Request		req;
    double		blkstart;
    double		loopstart;
    int			blksave;		/* save blocking state */
    int			i;
/*
 * Save and set blocking state.
 */
    blksave = _c2c_flblock;
    _c2c_flblock &=
	    ((_shm_nread + _shm_nwrite == 0)
		    || (_shm_nread + _shm_nwrite + _tcp_nio == 1));

    if ((_kio.ki_rtf & RTF_TRON) == RTF_TRON && blksave && !_c2c_flblock) {
	loopstart = blkstart = ttime();
    }
/*
 * Advance reading shared memory processes.
 */
    do {
	for (i = 0; i < _shm_nread; i++) {
	    _shm_read[i]->cp_read = 0;
	    if (_shm_read[i]->cp_readfn(_shm_read[i])) {
		return(LAMERROR);
	    }
	}
/*
 * Advance writing shared memory processes.
 */
	for (i = 0; i < _shm_nwrite; i++) {
	    _shm_write[i]->cp_write = 0;
	    req = _shm_write[i]->cp_wreq;
	    if (req->rq_rpi.c2c.cq_adv(_shm_write[i], req)) {
		return(LAMERROR);
	    }
	}
/*
 * Advance the TCP requests (if any).  In the case of a single TCP
 * request make sure that the request has not been completed.  This can
 * happen when a receive MPI_ANY_SOURCE is completed by a send from a
 * shared memory source.
 */
	if (_tcp_nio == 1 && _tcp_lastreq->rq_state != LAM_RQSDONE) {
	    if (_tcp_adv1()) return(LAMERROR);
	}
	else if (_tcp_nio > 1) {
	    if (_tcp_advmultiple()) return(LAMERROR);
	}

	if (blksave && !_c2c_haveadv) {
	    if (_shm_poll_delay) {
		microsleep(_shm_poll_delay);
	    }
	    LAM_TRACE(loopstart = ttime());
	}

    } while (blksave && !_c2c_haveadv);

    if ((_kio.ki_rtf & RTF_TRON) == RTF_TRON && blksave && !_c2c_flblock) {
	_kio.ki_blktime += (loopstart - blkstart);
    }

    _c2c_flblock = blksave;
    return(_c2c_haveadv);
}

/*
 *	_shm_proc_read_env
 *
 *	Function:	- read envelope from process
 *			- if full envelope read in then try to match with
 *			  and advance a receiving request
 *	Accepts:	- process
 *	Returns:	- 0 or LAMERROR
 */
int
_shm_proc_read_env(ps)

struct c2c_proc		*ps;

{
	int		lock;
	double		starttime;

	if (_c2c_flblock) {
		LAM_TRACE(starttime = ttime());
		if (_shm_readlock(ps)) return(LAMERROR);
		LAM_TRACE(_kio.ki_blktime += (ttime() - starttime));
	}
	else {
		if ((lock = _shm_readtrylock(ps)) < 0) {
			return(LAMERROR);
		} else if (lock == 1) {
			return(0);
		}
	}
	ps->cp_locked = 1;

	return(shm_match_adv(ps));
}

/*
 *	shm_proc_read_body
 *
 *	Function:	- read the body of an MPI message from process
 *			- this is only called when there is a receiving request
 *	Accepts:	- process
 *	Returns:	- 0 or LAMERROR
 */
static int
shm_proc_read_body(ps)

struct c2c_proc		*ps;

{
	int		len;
	int		lock;

	if (ps->cp_locked) {
/*
 * Already locked, copy data minus the envelope from shared buffer to receiver.
 */
		len = min(ps->cp_nmsgin, SHMSHORTMSGLEN);
		memcpy(ps->cp_msgbuf, ps->cp_shmin + ENVSIZE, len);
		ps->cp_msgbuf += len;
		ps->cp_nmsgin -= len;
/*
 * Done with the buffer, unlock it.
 */
		if (_shm_readunlock(ps)) return(LAMERROR);
		ps->cp_locked = 0;
	}

	if (_c2c_flblock) {
/*
 * Blocking case. Loop until all of message has been read.
 */
		while (ps->cp_nmsgin) {
			if (_shm_readlock(ps)) return(LAMERROR);

			len = min(ps->cp_nmsgin, SHMBUFSIZE);
			memcpy(ps->cp_msgbuf, ps->cp_shmin, len);
			ps->cp_msgbuf += len;
			ps->cp_nmsgin -= len;

			if (_shm_readunlock(ps)) return(LAMERROR);
		}
	}
	else {
/*
 * Non-blocking case.  Loop until all of message has been read or we cannot
 * get the lock in which case we just exit to try again another day.
 */
		while (ps->cp_nmsgin) {

			if ((lock = _shm_readtrylock(ps)) < 0) {
				return(LAMERROR);
			} else if (lock == 1) {
				return(0);
			}

			len = min(ps->cp_nmsgin, SHMBUFSIZE);
			memcpy(ps->cp_msgbuf, ps->cp_shmin, len);
			ps->cp_msgbuf += len;
			ps->cp_nmsgin -= len;

			if (_shm_readunlock(ps)) return(LAMERROR);
		}
	}
/*
 * All of message has been read.  Advance the request receiving the message.
 */
	if (ps->cp_rreq->rq_rpi.c2c.cq_adv(ps, ps->cp_rreq)) {
		return(LAMERROR);
	}
/*
 * Set process up to read the next incoming envelope.
 */
	ps->cp_rreq = 0;
	ps->cp_readfn = _shm_proc_read_env;
	return(0);
}

/*
 *	shm_req_send_ack_long
 *
 *	Function:	- long protocol transition from writing ack
 *			  to reading message tail
 *	Accepts:	- source process
 *			- request
 *	Returns:	- 0 or LAMERROR
 */
static int
shm_req_send_ack_long(ps, req)

struct c2c_proc		*ps;
MPI_Request		req;

{
	int		done;

	if ((done = shm_push_env(ps, req)) <= 0) {
		return(done);
	}
/*
 * The ack has been sent.
 */
	_c2c_haveadv = 1;
	ps->cp_wreq = 0;
/*
 * Receive message body.
 */
	req->rq_rpi.c2c.cq_state = C2CREAD;
	req->rq_rpi.c2c.cq_env.ce_flags &= ~C2CACK;
	req->rq_rpi.c2c.cq_env.ce_flags |= C2C2ND;
	req->rq_rpi.c2c.cq_env.ce_rank = req->rq_rpi.c2c.cq_peer;
	req->rq_rpi.c2c.cq_adv = shm_req_rcvd_2nd;
	return(0);
}

/*
 *	shm_req_send_ack_only
 *
 *	Function:	- short/long protocol transition from
 *			  writing ack to done
 *	Accepts:	- source process
 *			- request
 *	Returns:	- 0 or LAMERROR
 */
static int
shm_req_send_ack_only(ps, req)

struct c2c_proc		*ps;
MPI_Request		req;

{
	int		done;

	if ((done = shm_push_env(ps, req)) <= 0) {
		return(done);
	}
/*
 * The ack has been sent.
 */
	_c2c_haveadv = 1;
	ps->cp_wreq = 0;
	req->rq_rpi.c2c.cq_state = C2CDONE;
	req->rq_state = LAM_RQSDONE;
	return(0);
}

/*
 *	shm_req_send_body
 *
 *	Function:	- protocol transition from writing message body to done
 *	Accepts:	- destination process
 *			- request
 *	Returns:	- 0 or LAMERROR
 */
static int
shm_req_send_body(ps, req)

struct c2c_proc		*ps;
MPI_Request		req;

{
	int		done;

	if ((done = shm_push_body(ps, req)) <= 0) {
		return(done);
	}
/*
 * All of message has been written.
 */
	_c2c_haveadv = 1;
	ps->cp_wreq = 0;
	req->rq_rpi.c2c.cq_state = C2CDONE;
	req->rq_state = LAM_RQSDONE;
	return(0);
}

/*
 *	_shm_req_send_long
 *
 *	Function:	- long protocol transition from writing first envelope
 *			  to reading ack
 *	Accepts:	- destination process
 *			- request
 *	Returns:	- 0 or LAMERROR
 */
int
_shm_req_send_long(ps, req)

struct c2c_proc		*ps;
MPI_Request		req;

{
	int		done;

	if ((done = shm_push_env(ps, req)) <= 0) {
		return(done);
	}
/*
 * Prepare to read long protocol ack.
 */
	_c2c_haveadv = 1;
	ps->cp_wreq = 0;
	req->rq_rpi.c2c.cq_state = C2CREAD;
	req->rq_rpi.c2c.cq_env.ce_flags |= C2CACK;
	req->rq_rpi.c2c.cq_env.ce_rank = req->rq_rpi.c2c.cq_peer;
	req->rq_rpi.c2c.cq_adv = shm_req_rcvd_long_ack;
	return(0);
}

/*
 *	_shm_req_send_short
 *
 *	Function:	- short protocol transition from writing envelope
 *			  and message body to done
 *	Accepts:	- destination process
 *			- request
 *	Returns:	- 0 or LAMERROR
 */
int
_shm_req_send_short(ps, req)

struct c2c_proc		*ps;
MPI_Request		req;

{
	int		done;

	if ((done = shm_push_body(ps, req)) <= 0) {
		return(done);
	}

	_c2c_haveadv = 1;
	ps->cp_wreq = 0;
	req->rq_rpi.c2c.cq_state = C2CDONE;
	req->rq_state = LAM_RQSDONE;
	return(0);
}

/*
 *	_shm_req_send_synch
 *
 *	Function:	- short synchronous protocol transition from writing
 *			  envelope and message body to reading ack
 *	Accepts:	- destination process
 *			- request
 *	Returns:	- 0 or LAMERROR
 */
int
_shm_req_send_synch(ps, req)

struct c2c_proc		*ps;
MPI_Request		req;

{
	int		done;

	if ((done = shm_push_body(ps, req)) <= 0) {
		return(done);
	}

	_c2c_haveadv = 1;
	ps->cp_wreq = 0;
	req->rq_rpi.c2c.cq_state = C2CREAD;
	req->rq_rpi.c2c.cq_env.ce_flags |= C2CACK;
	req->rq_rpi.c2c.cq_env.ce_rank = req->rq_rpi.c2c.cq_peer;
	req->rq_rpi.c2c.cq_adv = shm_req_done;
	return(0);
}

/*
 *	shm_req_rcvd_2nd
 *
 *	Function:	- long protocol transition from reading the envelope at
 *			  the start of the message tail to reading the body
 *			  of the message
 *	Accepts:	- source process
 *			- request
 *	Returns:	- 0 or LAMERROR
 */
static int
shm_req_rcvd_2nd(ps, req)

struct c2c_proc		*ps;
MPI_Request		req;

{
	ps->cp_rreq = req;
	ps->cp_msgbuf = req->rq_packbuf;
	ps->cp_nmsgin = ((envp_t) ps->cp_shmin)->ce_len;
	ps->cp_readfn = shm_proc_read_body;
	req->rq_rpi.c2c.cq_adv = shm_req_done;

	return(shm_proc_read_body(ps));
}

/*
 *	shm_req_done
 *
 *	Function:	- protocol transition to done
 *	Accepts:	- source process
 *			- request
 *	Returns:	- 0 or LAMERROR
 */
static int
shm_req_done(ps, req)

struct c2c_proc		*ps;
MPI_Request		req;

{
	if (ps->cp_locked) {
		if (_shm_readunlock(ps)) return(LAMERROR);
		ps->cp_locked = 0;
	}
	_c2c_haveadv = 1;
	req->rq_rpi.c2c.cq_state = C2CDONE;
	req->rq_state = LAM_RQSDONE;
	return(0);
}

/*
 *	shm_req_rcvd_body_synch
 *
 *	Function:	- synchronous protocol transition from reading
 *			  message body to sending ack
 *	Accepts:	- source process (ignored)
 *			- request
 *	Returns:	- 0 or LAMERROR
 */
static int
shm_req_rcvd_body_synch(ps, req)

struct c2c_proc		*ps;
MPI_Request		req;

{
	_c2c_haveadv = 1;
	req->rq_rpi.c2c.cq_state = C2CWRITE;
	req->rq_rpi.c2c.cq_env.ce_flags |= C2CACK;
	req->rq_rpi.c2c.cq_env.ce_rank = req->rq_comm->c_group->g_myrank;
	req->rq_rpi.c2c.cq_adv = shm_req_send_ack_only;
	return(0);
}

/*
 *	shm_req_rcvd_long_ack
 *
 *	Function:	- long protocol transition from reading ack to
 *			  done (if receiver wants 0 bytes) or sending
 *			  requested # of bytes
 *	Accepts:	- destination process
 *			- request
 *	Returns:	- 0 or LAMERROR
 */
static int
shm_req_rcvd_long_ack(ps, req)

struct c2c_proc		*ps;
MPI_Request		req;

{
	if (_shm_readunlock(ps)) return(LAMERROR);
	ps->cp_locked = 0;
	_c2c_haveadv = 1;
/*
 * Set message length to minimum of what sender and receiver specified.
 */
	if (req->rq_packsize < ((envp_t) ps->cp_shmin)->ce_len) {
		req->rq_rpi.c2c.cq_nmsgout = req->rq_packsize;
	} else {
		req->rq_rpi.c2c.cq_nmsgout = ((envp_t) ps->cp_shmin)->ce_len;
	}

	if (req->rq_rpi.c2c.cq_nmsgout == 0) {
		req->rq_rpi.c2c.cq_state = C2CDONE;
		req->rq_state = LAM_RQSDONE;
	} else {
		req->rq_rpi.c2c.cq_state = C2CWRITE;
		req->rq_rpi.c2c.cq_env.ce_len = req->rq_rpi.c2c.cq_nmsgout;
		req->rq_rpi.c2c.cq_env.ce_flags &= ~C2CACK;
		req->rq_rpi.c2c.cq_env.ce_flags |= C2C2ND;
		req->rq_rpi.c2c.cq_env.ce_rank =
				req->rq_comm->c_group->g_myrank;
		req->rq_rpi.c2c.cq_adv = shm_req_send_body;
		req->rq_rpi.c2c.cq_nenvout = ENVSIZE;
	}
	return(0);
}

/*
 *	_shmtcp_req_probe
 *
 *	Function:	- probe protocol transition to done
 *			- the incoming envelope/message is buffered
 *	Accepts:	- source process
 *			- request
 *	Returns:	- 0 or LAMERROR
 */
int
_shmtcp_req_probe(ps, req)

struct c2c_proc		*ps;
MPI_Request		req;

{
	envp_t		env;			/* matched envelope */

	if (ps->cp_sock >= 0) {
		return(_tcp_req_probe(ps, req));
	}

	_c2c_haveadv = 1;
	req->rq_rpi.c2c.cq_state = C2CDONE;
	req->rq_state = LAM_RQSDONE;

	env = (envp_t) ps->cp_shmin;
	_c2c_fill_mpi_status(req, env->ce_rank, env->ce_tag, env->ce_len);
	return(shm_buffer(ps));
}

/*
 *	_shmtcp_req_recv
 *
 *	Function:	- protocol transition for read request on
 *			  matched incoming envelope
 *	Accepts:	- source process
 *			- request
 *	Returns:	- 0 or LAMERROR
 */
int
_shmtcp_req_recv(ps, req)

struct c2c_proc		*ps;
MPI_Request		req;

{
    envp_t		env;			/* the envelope */

    if (ps->cp_sock >= 0) {
	return(_tcp_req_recv(ps, req));
    }

    env = (envp_t) ps->cp_shmin;
    _c2c_fill_wildcards(req, env);
    _c2c_haveadv = 1;
/*
 * Check for message length mismatch and set status.
 */
    if (env->ce_len > req->rq_packsize) {
	req->rq_flags |= LAM_RQFTRUNC;
	env->ce_len = req->rq_packsize;
    }
    _c2c_fill_mpi_status(req, env->ce_rank, env->ce_tag, env->ce_len);

    if (env->ce_flags & C2CLONG) {
/*
 * Got a long protocol envelope.
 * Set status and reply with an ack.
 */
	req->rq_rpi.c2c.cq_env.ce_len = env->ce_len;
	if (_shm_readunlock(ps)) return(LAMERROR);
	ps->cp_locked = 0;
	req->rq_state = LAM_RQSACTIVE;
	req->rq_rpi.c2c.cq_state = C2CWRITE;
	req->rq_rpi.c2c.cq_env.ce_flags |= (C2CACK | C2CLONG);
	req->rq_rpi.c2c.cq_env.ce_rank = req->rq_comm->c_group->g_myrank;
	req->rq_rpi.c2c.cq_adv =
	    (env->ce_len > 0) ? shm_req_send_ack_long : shm_req_send_ack_only;
	req->rq_rpi.c2c.cq_nenvout = ENVSIZE;
    }
    else {
/*
 * Got a short protocol envelope.  Copy the message from shared buffer.
 */
	if (env->ce_len > 0) {
	    memcpy(req->rq_packbuf, ps->cp_shmin + ENVSIZE, env->ce_len);
	}
	if (_shm_readunlock(ps)) return(LAMERROR);
	ps->cp_locked = 0;
/*
 * Send ack if matched a synchronous send otherwise complete the request.
 */
	if (env->ce_flags & C2CSSEND) {
	    req->rq_state = LAM_RQSACTIVE;
	    if (shm_req_rcvd_body_synch((struct c2c_proc *) 0, req)) {
		return(LAMERROR);
	    }
	} else {
	    req->rq_rpi.c2c.cq_state = C2CDONE;
	    req->rq_state = LAM_RQSDONE;
	}
    }

    return(0);
}

/*
 *	_shm_buffered_adv
 *
 *	Function:	- protocol transition for a request matching
 *			  a buffered envelope/message
 *	Accepts:	- request
 *			- buffered envelope/message
 *	Returns:	- 0 or LAMERROR
 */
int
_shm_buffered_adv(req, msg)

MPI_Request		req;
struct cbuf_msg		*msg;

{
	envp_t		env;			/* matching incoming env. */

	env = &msg->cm_env;

	if (req->rq_type == LAM_RQIPROBE) {
/*
 * The request is a probe.  Set the status and leave the envelope buffered.
 */
		_c2c_fill_mpi_status(req, env->ce_rank,
					env->ce_tag, env->ce_len);
		req->rq_state = LAM_RQSDONE;
		req->rq_rpi.c2c.cq_state = C2CDONE;

		return(0);
	}

	if (env->ce_len > req->rq_packsize) {
		req->rq_flags |= LAM_RQFTRUNC;
		env->ce_len = req->rq_packsize;
	}
	_c2c_fill_mpi_status(req, env->ce_rank, env->ce_tag, env->ce_len);

	if (env->ce_flags & C2CLONG) {
/*
 * Got a long protocol envelope, reply with an ack.
 */
		req->rq_state = LAM_RQSACTIVE;
		req->rq_rpi.c2c.cq_state = C2CWRITE;
		req->rq_rpi.c2c.cq_env.ce_flags |= (C2CACK | C2CLONG);
		req->rq_rpi.c2c.cq_env.ce_len = env->ce_len;
		req->rq_rpi.c2c.cq_env.ce_rank =
			req->rq_comm->c_group->g_myrank;
		req->rq_rpi.c2c.cq_adv = (env->ce_len > 0) ?
			shm_req_send_ack_long : shm_req_send_ack_only;
		req->rq_rpi.c2c.cq_nenvout = ENVSIZE;
	}
	else {
/*
 * Got a short protocol envelope.
 * Copy the message (if any) from the buffer and advance the request.
 */
		if (env->ce_len) {
			memcpy(req->rq_packbuf, msg->cm_buf, env->ce_len);
		}
		if (env->ce_flags & C2CSSEND) {
			req->rq_state = LAM_RQSACTIVE;
			if (shm_req_rcvd_body_synch((struct c2c_proc*)0, req)) {
				return(LAMERROR);
			}
		} else {
			req->rq_rpi.c2c.cq_state = C2CDONE;
			req->rq_state = LAM_RQSDONE;
		}
	}
/*
 * Discard the buffered message.
 */
	_cbuf_delete(msg);

	return(0);
}

/*
 *	shm_match_adv
 *
 *	Function:	- match env read from process with a read request
 *			  and advance the matched request
 *			- if no match is found then the env/msg is buffered
 *	Accepts:	- envelope's source process
 */
static int
shm_match_adv(ps)

struct c2c_proc		*ps;

{
	MPI_Request	req;			/* request */
	envp_t		env;			/* envelope */
/*
 * There cannot be any matching recvs after a matching probe because
 * probes are blocking.	 Thus we may return upon the first match
 * (buffering the envelope in the case of a probe) and maintain the
 * invariant "no requests in the list match buffered envelopes".  This
 * means once a request is in the list after being checked against
 * buffered envelopes it need never again be checked against any
 * buffered envelopes.
 */
	env = (envp_t) ps->cp_shmin;

	for (req = ps->cp_mreq; req; req = req->rq_next) {

		if ((req->rq_rpi.c2c.cq_state == C2CREAD)
			&& (!_c2c_envl_cmp(env, &req->rq_rpi.c2c.cq_env))) {

			return(req->rq_rpi.c2c.cq_adv(ps, req));
		}
	}

	return(shm_buffer(ps));
}

/*
 *	shm_buffer
 *
 *	Function:	- buffer incoming envelope/message
 *			- there is never any data to be read to the
 *			  data sink when buffering
 *	Accepts:	- process envelope came in from
 *	Returns:	- 0 or LAMERROR
 */
static int
shm_buffer(ps)

struct c2c_proc		*ps;

{
	struct cbuf_msg	msg;			/* buffer list entry */

	memcpy((char *) &msg.cm_env, ps->cp_shmin, ENVSIZE);

	if (msg.cm_env.ce_len > 0 && !(msg.cm_env.ce_flags & C2CLONG)) {
/*
 * Set up the buffer for the message body and copy into it.
 */
		msg.cm_buf = (char *) malloc(msg.cm_env.ce_len);
		if (msg.cm_buf == 0) return(LAMERROR);

		memcpy(msg.cm_buf, ps->cp_shmin + ENVSIZE, msg.cm_env.ce_len);
	}
	else {
		msg.cm_buf = 0;
	}

	if (_shm_readunlock(ps)) return(LAMERROR);
	ps->cp_locked = 0;

	msg.cm_proc = 0;
	msg.cm_req = 0;
	return(_cbuf_append(&msg) ? 0 : LAMERROR);
}

/*
 *	shm_push_body
 *
 *	Function:	- push request envelope and message body down the pike
 *	Accepts:	- process
 *			- request
 *	Returns:	- 0 or LAMERROR
 */
static int
shm_push_body(ps, req)

struct c2c_proc		*ps;
MPI_Request		req;

{
    int			len;
    int			lock;
    double		starttime;

    if (_c2c_flblock) {
/*
 * Blocking case.
 */
	if (req->rq_rpi.c2c.cq_nenvout > 0) {
	    LAM_TRACE(starttime = ttime());
	    if (_shm_writelock(ps)) {
		return(LAMERROR);
	    }
	    LAM_TRACE(_kio.ki_blktime += (ttime() - starttime));
	    memcpy(ps->cp_shmout, req->rq_rpi.c2c.cq_envbuf, ENVSIZE);

	    if (req->rq_rpi.c2c.cq_nmsgout > 0) {
		len = min(req->rq_rpi.c2c.cq_nmsgout, SHMSHORTMSGLEN);
		memcpy(ps->cp_shmout + ENVSIZE, req->rq_rpi.c2c.cq_msgbuf, len);
		req->rq_rpi.c2c.cq_msgbuf += len;
		req->rq_rpi.c2c.cq_nmsgout -= len;
	    }

	    if (_shm_writeunlock(ps)) {
		return(LAMERROR);
	    }
	}

	while (req->rq_rpi.c2c.cq_nmsgout > 0) {
	    if (_shm_writelock(ps)) {
		return(LAMERROR);
	    }
	    len = min(req->rq_rpi.c2c.cq_nmsgout, SHMBUFSIZE);
	    memcpy(ps->cp_shmout, req->rq_rpi.c2c.cq_msgbuf, len);
	    req->rq_rpi.c2c.cq_msgbuf += len;
	    req->rq_rpi.c2c.cq_nmsgout -= len;

	    if (_shm_writeunlock(ps)) {
		return(LAMERROR);
	    }
	}
    }
    else {
/*
 * Non-blocking case.  Exit to try again another day as soon as we fail
 * to obtain the lock.
 */
	if (req->rq_rpi.c2c.cq_nenvout > 0) {
	    if ((lock = _shm_writetrylock(ps)) < 0) {
		return(LAMERROR);
	    } else if (lock == 1) {
		return(0);
	    }

	    memcpy(ps->cp_shmout, req->rq_rpi.c2c.cq_envbuf, ENVSIZE);
	    req->rq_rpi.c2c.cq_nenvout = 0;

	    if (req->rq_rpi.c2c.cq_nmsgout > 0) {
		len = min(req->rq_rpi.c2c.cq_nmsgout, SHMSHORTMSGLEN);
		memcpy(ps->cp_shmout + ENVSIZE, req->rq_rpi.c2c.cq_msgbuf, len);
		req->rq_rpi.c2c.cq_msgbuf += len;
		req->rq_rpi.c2c.cq_nmsgout -= len;
	    }

	    if (_shm_writeunlock(ps)) {
		return(LAMERROR);
	    }
	}

	while (req->rq_rpi.c2c.cq_nmsgout > 0) {
	    if ((lock = _shm_writetrylock(ps)) < 0) {
		return(LAMERROR);
	    } else if (lock == 1) {
		return(0);
	    }
	    len = min(req->rq_rpi.c2c.cq_nmsgout, SHMBUFSIZE);
	    memcpy(ps->cp_shmout, req->rq_rpi.c2c.cq_msgbuf, len);
	    req->rq_rpi.c2c.cq_msgbuf += len;
	    req->rq_rpi.c2c.cq_nmsgout -= len;

	    if (_shm_writeunlock(ps)) {
		return(LAMERROR);
	    }
	}
    }

    return(1);
}

/*
 *	shm_push_env
 *
 *	Function:	- push request envelope down the pike
 *	Accepts:	- process
 *			- request
 *	Returns:	- 0 or LAMERROR
 */
static int
shm_push_env(ps, req)

struct c2c_proc		*ps;
MPI_Request		req;

{
	int		lock;
	double		starttime;

	if (_c2c_flblock) {
		LAM_TRACE(starttime = ttime());
		if (_shm_writelock(ps)) {
			return(LAMERROR);
		}
		LAM_TRACE(_kio.ki_blktime += (ttime() - starttime));
	}
	else if ((lock = _shm_writetrylock(ps)) < 0) {
		return(LAMERROR);
	}
	else if (lock == 1) {
		return(0);
	}
/*
 * Got the lock.
 */
	memcpy(ps->cp_shmout, req->rq_rpi.c2c.cq_envbuf, ENVSIZE);

	if (_shm_writeunlock(ps)) {
		return(LAMERROR);
	}

	return(1);
}
