flipc.c   [plain text]


/*
 * Copyright (c) 2015-2020 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 *
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 *
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 *
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */
/*	File:	ipc/flipc.h
 *	Author:	Dean Reece
 *	Date:	2016
 *
 *	Implementation of fast local ipc (flipc).
 */


#include <mach/mach_types.h>
#include <mach/boolean.h>
#include <mach/kern_return.h>

#include <kern/kern_types.h>
#include <kern/assert.h>
#include <kern/host.h>
#include <kern/kalloc.h>
#include <kern/mach_node.h>

#include <ipc/port.h>
#include <ipc/ipc_types.h>
#include <ipc/ipc_init.h>
#include <ipc/ipc_kmsg.h>
#include <ipc/ipc_port.h>
#include <ipc/ipc_pset.h>
#include <ipc/ipc_table.h>
#include <ipc/ipc_entry.h>
#include <ipc/flipc.h>

#pragma pack(4)


/*** FLIPC Internal Implementation (private to flipc.c) ***/

ZONE_DECLARE(flipc_port_zone, "flipc ports",
    sizeof(struct flipc_port), ZC_NOENCRYPT);

/*  Get the mnl_name associated with local ipc_port <lport>.
 *  Returns MNL_NAME_NULL if <lport> is invalid or not a flipc port.
 */
static inline mnl_name_t
mnl_name_from_port(ipc_port_t lport)
{
	mnl_name_t name = MNL_NAME_NULL;

	if (IP_VALID(lport)) {
		flipc_port_t fport = lport->ip_messages.data.port.fport;
		if (FPORT_VALID(fport)) {
			name = fport->obj.name;
		}
	}
	return name;
}


/*  Lookup the ipc_port associated with mnl_name <name>.
 *  Returns IP_NULL if <name> is invalid or not a known mnl object.
 */
static inline ipc_port_t
mnl_name_to_port(mnl_name_t name)
{
	ipc_port_t lport = IP_NULL;

	if (MNL_NAME_VALID(name)) {
		flipc_port_t fport = (flipc_port_t)mnl_obj_lookup(name);
		if (FPORT_VALID(fport)) {
			lport = fport->lport;
		}
	}
	return lport;
}


/*  flipc_port_create() is called to convert a regular mach port into a
 *  flipc port (i.e., the port has one or more rights off-node).
 *  <lport> must be locked on entry and is not unlocked on return.
 */
static kern_return_t
flipc_port_create(ipc_port_t lport, mach_node_t node, mnl_name_t name)
{
	/* Ensure parameters are valid and not already linked */
	assert(IP_VALID(lport));
	assert(MACH_NODE_VALID(node));
	assert(MNL_NAME_VALID(name));
	assert(!FPORT_VALID(lport->ip_messages.imq_fport));

	/* Allocate and initialize a flipc port */
	flipc_port_t fport = (flipc_port_t) zalloc(flipc_port_zone);
	if (!FPORT_VALID(fport)) {
		return KERN_RESOURCE_SHORTAGE;
	}
	bzero(fport, sizeof(struct flipc_port));
	fport->obj.name = name;
	fport->hostnode = node;
	if (node == localnode) {
		fport->state = FPORT_STATE_PRINCIPAL;
	} else {
		fport->state = FPORT_STATE_PROXY;
	}

	/* Link co-structures (lport is locked) */
	fport->lport = lport;
	lport->ip_messages.imq_fport = fport;

	/* Add fport to the name hash table; revert link if insert fails */
	kern_return_t kr =  mnl_obj_insert((mnl_obj_t)fport);
	if (kr != KERN_SUCCESS) {
		lport->ip_messages.imq_fport = FPORT_NULL;
		fport->lport = IP_NULL;
		zfree(flipc_port_zone, fport);
	}

	return kr;
}


/*  flipc_port_destroy() is called to convert a flipc port back to a
 *  local-only ipc port (i.e., the port has no remaining off-node rights).
 *  This will dispose of any undelivered flipc messages, generating NAKs if
 *  needed.  <lport> must be locked on entry and is not unlocked on return.
 */
static void
flipc_port_destroy(ipc_port_t lport)
{
	/* Ensure parameter is valid, and linked to an fport with a valid name */
	assert(IP_VALID(lport));
	ipc_mqueue_t port_mq = &lport->ip_messages;
	flipc_port_t fport = port_mq->data.port.fport;
	assert(FPORT_VALID(fport));
	assert(MNL_NAME_VALID(fport->obj.name));

	/* Dispose of any undelivered messages */
	int m = port_mq->data.port.msgcount;
	if (m > 0) {
		ipc_kmsg_t kmsg;
#if DEBUG
		printf("flipc: destroying %p with %d undelivered msgs\n", lport, m);
#endif

		/* Logic was lifted from ipc_mqueue_select_on_thread() */
		while (m--) {
			kmsg = ipc_kmsg_queue_first(&port_mq->imq_messages);
			assert(kmsg != IKM_NULL);
			ipc_kmsg_rmqueue(&port_mq->imq_messages, kmsg);
			if (fport->state == FPORT_STATE_PRINCIPAL) {
				flipc_msg_ack(kmsg->ikm_node, port_mq, FALSE);
			}
			ipc_mqueue_release_msgcount(port_mq, NULL);
			port_mq->imq_seqno++;
		}
	}

	/* Remove from name hash table, unlink co-structures, and free fport */
	mnl_obj_remove(fport->obj.name);
	lport->ip_messages.data.port.fport = FPORT_NULL;
	fport->lport = IP_NULL;
	zfree(flipc_port_zone, fport);
}


/*
 *	Routine:	flipc_msg_size_from_kmsg(ipc_kmsg_t kmsg)
 *	Purpose:
 *		Compute the size of the buffer needed to hold the translated flipc
 *      message.  All identifiers are converted to flipc_names which are 64b.
 *      If this node's pointers are a different size, we have to allow for
 *      expansion of the descriptors as appropriate.
 *	Conditions:
 *		Nothing locked.
 *	Returns:
 *		size of the message as it would be sent over the flipc link.
 */
static mach_msg_size_t
flipc_msg_size_from_kmsg(ipc_kmsg_t kmsg)
{
	mach_msg_size_t fsize = kmsg->ikm_header->msgh_size;

	if (kmsg->ikm_header->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
		PE_enter_debugger("flipc_msg_size_from_kmsg(): Complex messages not supported.");
	}

	return fsize;
}


/*  Translate a kmsg into a flipc msg suitable to transmit over the mach node
 *  link.  All in-line rights and objects are similarly processed.  If the msg
 *  moves a receive right, then queued messages may need to be moved as a
 *  result, causing this function to ultimately be recursive.
 */
static kern_return_t
mnl_msg_from_kmsg(ipc_kmsg_t kmsg, mnl_msg_t *fmsgp)
{
	if (kmsg->ikm_header->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
		printf("mnl_msg_from_kmsg(): Complex messages not supported.");
		return KERN_FAILURE;
	}

	mach_msg_size_t fsize = flipc_msg_size_from_kmsg(kmsg);

	mnl_msg_t fmsg = mnl_msg_alloc(fsize, 0);

	if (fmsg == MNL_MSG_NULL) {
		return KERN_RESOURCE_SHORTAGE;
	}

	/* Setup flipc message header */
	fmsg->sub = MACH_NODE_SUB_FLIPC;
	fmsg->cmd = FLIPC_CMD_IPCMESSAGE;
	fmsg->node_id = localnode_id;   // Message is from us
	fmsg->qos = 0; // not used
	fmsg->size = fsize; // Payload size (does NOT include mnl_msg header)
	fmsg->object = kmsg->ikm_header->msgh_remote_port->ip_messages.data.port.fport->obj.name;

	/* Copy body of message */
	bcopy((const void*)kmsg->ikm_header, (void*)MNL_MSG_PAYLOAD(fmsg), fsize);

	// Convert port fields
	mach_msg_header_t *mmsg = (mach_msg_header_t*)MNL_MSG_PAYLOAD(fmsg);
	mmsg->msgh_remote_port = (mach_port_t)fmsg->object;
	mmsg->msgh_local_port = (mach_port_t)
	    mnl_name_from_port(mmsg->msgh_local_port);
	mmsg->msgh_voucher_port = (mach_port_name_t)MNL_NAME_NULL;

	*fmsgp = (mnl_msg_t)fmsg;

	return KERN_SUCCESS;
}


/* lifted from ipc_mig.c:mach_msg_send_from_kernel_proper() */
static mach_msg_return_t
mach_msg_send_from_remote_kernel(mach_msg_header_t      *msg,
    mach_msg_size_t        send_size,
    mach_node_t            node)
{
	ipc_kmsg_t kmsg;
	mach_msg_return_t mr;

	mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg);
	if (mr != MACH_MSG_SUCCESS) {
		return mr;
	}

	mr = ipc_kmsg_copyin_from_kernel(kmsg);
	if (mr != MACH_MSG_SUCCESS) {
		ipc_kmsg_free(kmsg);
		return mr;
	}

	kmsg->ikm_node = node;  // node that needs to receive message ack
	mr = ipc_kmsg_send(kmsg,
	    MACH_SEND_KERNEL_DEFAULT,
	    MACH_MSG_TIMEOUT_NONE);
	if (mr != MACH_MSG_SUCCESS) {
		ipc_kmsg_destroy(kmsg);
	}

	return mr;
}


/*  Translate a flipc msg <fmsg> into a kmsg and post it to the appropriate
 *	port.  <node> is the node that originated the message, not necessarily the
 *	node we received it from.  This will block if the receiving port is full.
 */
static mach_msg_return_t
flipc_cmd_ipc(mnl_msg_t     fmsg,
    mach_node_t   node,
    uint32_t      flags   __unused)
{
	mach_msg_header_t *mmsg;

	// Convert flipc message into mach message in place to avoid alloc/copy
	mmsg = (mach_msg_header_t*)MNL_MSG_PAYLOAD(fmsg);
	mmsg->msgh_size = fmsg->size;
	mmsg->msgh_remote_port = mnl_name_to_port(fmsg->object);
	mmsg->msgh_local_port = mnl_name_to_port((mnl_name_t)mmsg->msgh_local_port);
	mmsg->msgh_voucher_port = (mach_port_name_t)MACH_PORT_NULL;
	mmsg->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0);
	// unchanged: msgh_id

	return mach_msg_send_from_remote_kernel(mmsg, fmsg->size, node);
}


/*  Called when an ACKMESSAGE packet is received. <name> indicates
 *	the flipc name of the port holding the messages to be acknowledged.
 *	<msg_count> indicates the number of messages being acked for this node:port.
 */
static void
flipc_cmd_ack(flipc_ack_msg_t   fmsg,
    mach_node_t       node    __unused,
    uint32_t          flags   __unused)
{
	unsigned int msg_count = fmsg->msg_count;
	thread_t thread = current_thread();
	boolean_t kick = FALSE;

	flipc_port_t fport = (flipc_port_t)mnl_obj_lookup(fmsg->mnl.object);

	ipc_port_t lport = fport->lport;
	ip_lock(lport);

	ipc_mqueue_t lport_mq = &lport->ip_messages;
	imq_lock(lport_mq);

	assert(fport->peek_count >= msg_count); // Can't ack what we haven't peeked!

	while (msg_count--) {
		ipc_mqueue_select_on_thread(lport_mq, IMQ_NULL, 0, 0, thread);
		fport->peek_count--;
		kick |= ipc_kmsg_delayed_destroy(thread->ith_kmsg);
	}

	imq_unlock(lport_mq);
	ip_unlock(lport);

	if (kick) {
		ipc_kmsg_reap_delayed();
	}
}



/*** FLIPC Node Managment Functions (called by mach node layer) ***/


/*  flipc_node_prepare() is called by mach node layer when a remote node is
 *  registered by a link driver, or when the bootstrap port changes for the
 *  local node.  This is the flipc layer's opportunity to initialize per-node
 *  flipc state, and to convert the node's bootstrap port into a flipc port.
 *  Note that the node is not yet in the mach node table.
 *  Returns KERN_SUCCESS on success; otherwise node is not prepared.
 */
kern_return_t
flipc_node_prepare(mach_node_t node)
{
	kern_return_t kr;

	assert(MACH_NODE_VALID(node));
	ipc_port_t bs_port = node->bootstrap_port;
	assert(IP_VALID(bs_port));

	ip_lock(bs_port);

	kr = flipc_port_create(bs_port,
	    node,
	    MNL_NAME_BOOTSTRAP(node->info.node_id));
	ip_unlock(bs_port);

	return kr;
}


/*  flipc_node_retire() is called by mach node layer when a remote node is
 *  terminated by a link driver, or when the local node's bootstrap port
 *  becomes invalid.  This is the flipc layer's opportunity to free per-node
 *  flipc state, and to revert the node's bootstrap port to a local ipc port.
 *  <node> must be locked by the caller.
 *  Returns KERN_SUCCESS on success.
 */
kern_return_t
flipc_node_retire(mach_node_t node)
{
	if (!MACH_NODE_VALID(node)) {
		return KERN_NODE_DOWN;
	}

	ipc_port_t bs_port = node->bootstrap_port;
	if (IP_VALID(bs_port)) {
		ip_lock(bs_port);
		flipc_port_destroy(bs_port);
		ip_unlock(bs_port);
	}

	return KERN_SUCCESS;
}


/*** FLIPC Message Functions (called by mach node layer) ***/


/*  The node layer calls flipc_msg_to_remote_node() to fetch the next message
 *  for <node>.  This function will block until a message is available or the
 *  node is terminated, in which case it returns MNL_MSG_NULL.
 */
mnl_msg_t
flipc_msg_to_remote_node(mach_node_t  to_node,
    uint32_t     flags __unused)
{
	mach_port_seqno_t msgoff;
	ipc_kmsg_t kmsg = IKM_NULL;
	mnl_msg_t fmsg = MNL_MSG_NULL;

	assert(to_node != localnode);
	assert(get_preemption_level() == 0);

	ipc_mqueue_t portset_mq = &to_node->proxy_port_set->ips_messages;
	ipc_mqueue_t port_mq = IMQ_NULL;

	while (!to_node->dead) {
		/* Fetch next message from proxy port */
		ipc_mqueue_receive(portset_mq, MACH_PEEK_MSG, 0, 0, THREAD_ABORTSAFE);

		thread_t thread = current_thread();
		if (thread->ith_state == MACH_PEEK_READY) {
			port_mq = thread->ith_peekq;
			thread->ith_peekq = IMQ_NULL;
		} else {
			panic("Unexpected thread state %d after ipc_mqueue_receive()",
			    thread->ith_state);
		}

		assert(get_preemption_level() == 0);
		imq_lock(port_mq);

		flipc_port_t fport = port_mq->data.port.fport;

		if (FPORT_VALID(fport)) {
			msgoff = port_mq->data.port.fport->peek_count;

			ipc_mqueue_peek_locked(port_mq, &msgoff, NULL, NULL, NULL, &kmsg);
			if (kmsg != IKM_NULL) {
				port_mq->data.port.fport->peek_count++;
			}

			/* Clean up outstanding prepost on port_mq.
			 * This also unlocks port_mq.
			 */
			ipc_mqueue_release_peek_ref(port_mq);
			assert(get_preemption_level() == 0);

			/* DANGER:  The code below must be allowed to allocate so it can't
			 * run under the protection of the imq_lock, but that leaves mqueue
			 * open for business for a small window before we examine kmsg.
			 * This SHOULD be OK, since we are the only thread looking.
			 */
			if (kmsg != IKM_NULL) {
				mnl_msg_from_kmsg(kmsg, (mnl_msg_t*)&fmsg);
			}
		} else {
			/* Must be from the control_port, which is not a flipc port */
			assert(!FPORT_VALID(port_mq->data.port.fport));

			/* This is a simplified copy of ipc_mqueue_select_on_thread() */
			kmsg = ipc_kmsg_queue_first(&port_mq->imq_messages);
			assert(kmsg != IKM_NULL);
			ipc_kmsg_rmqueue(&port_mq->imq_messages, kmsg);
			ipc_mqueue_release_msgcount(port_mq, portset_mq);
			imq_unlock(port_mq);
			current_task()->messages_received++;
			ip_release(to_node->control_port); // Should derive ref from port_mq

			/* We just pass the kmsg payload as the fmsg.
			 * flipc_msg_free() will notice and free the kmsg properly.
			 */
			mach_msg_header_t *hdr = kmsg->ikm_header;
			fmsg = (mnl_msg_t)(&hdr[1]);
			/* Stash kmsg pointer just before fmsg */
			*(ipc_kmsg_t*)((vm_offset_t)fmsg - sizeof(vm_offset_t)) = kmsg;
		}

		if (MNL_MSG_VALID(fmsg)) {
			break;
		}
	}
	assert(MNL_MSG_VALID(fmsg));
	return fmsg;
}


/*  The mach node layer calls this to deliver an incoming message.  It is the
 *  responsibility of the caller to release the received message buffer after
 *  return.
 */
void
flipc_msg_from_node(mach_node_t from_node   __unused,
    mnl_msg_t   msg,
    uint32_t    flags)
{
	/*  Note that if flipc message forwarding is supported, the from_node arg
	 *  may not match fmsg->node_id.  The former is the node from which we
	 *	received the message; the latter is the node that originated the
	 *	message.  We use the originating node, which is where the ack goes.
	 */
	assert(msg->sub == MACH_NODE_SUB_FLIPC);
	mach_node_t node = mach_node_for_id_locked(msg->node_id, FALSE, FALSE);
	MACH_NODE_UNLOCK(node);

	switch (msg->cmd) {
	case FLIPC_CMD_IPCMESSAGE:
		flipc_cmd_ipc(msg, node, flags);
		break;

	case FLIPC_CMD_ACKMESSAGE:
	case FLIPC_CMD_NAKMESSAGE:
		flipc_cmd_ack((flipc_ack_msg_t)msg, node, flags);
		break;

	default:
#if DEBUG
		PE_enter_debugger("flipc_incoming(): Invalid command");
#endif
		break;
	}
}


/*  The node layer calls flipc_msg_free() to dispose of sent messages that
 *  originated in the FLIPC layer.  This allows us to repurpose the payload
 *  of an ack or nak kmsg as a flipc message to avoid a copy - we detect
 *  such messages here and free them appropriately.
 */
void
flipc_msg_free(mnl_msg_t    msg,
    uint32_t     flags)
{
	switch (msg->cmd) {
	case FLIPC_CMD_ACKMESSAGE:  // Flipc msg is a kmsg in disguise...
	case FLIPC_CMD_NAKMESSAGE:  // Convert back to kmsg for disposal
		ipc_kmsg_free(*(ipc_kmsg_t*)((vm_offset_t)msg - sizeof(vm_offset_t)));
		break;

	default:    // Flipc msg is not a kmsg in disguise; dispose of normally
		mnl_msg_free(msg, flags);
		break;
	}
}


/*** FLIPC Message Functions (called by mach ipc subsystem) ***/

/*	Ack's one message sent to <mqueue> from <node>.  A new kmsg is allocated
 *  and filled in as an ack, then posted to the node's contol port.  This will
 *  wake the link driver (if sleeping) and cause the ack to be included with
 *  normal IPC traffic.
 *
 *  This function immediately returns if <fport> or <node> is invalid, so it
 *  is safe & quick to call speculatively.
 *
 *	Called from mach ipc_mqueue.c when a flipc-originated message is consumed.
 */
void
flipc_msg_ack(mach_node_t   node,
    ipc_mqueue_t  mqueue,
    boolean_t     delivered)
{
	flipc_port_t fport = mqueue->imq_fport;

	assert(FPORT_VALID(fport));
	assert(MACH_NODE_VALID(node));

	mnl_name_t name = MNL_NAME_NULL;
	mach_node_id_t nid = HOST_LOCAL_NODE;
	ipc_port_t ack_port = IP_NULL;

	ip_lock(fport->lport);
	name = fport->obj.name;
	ip_unlock(fport->lport);

	if (!MNL_NAME_VALID(name)) {
		return;
	}

	MACH_NODE_LOCK(node);
	if (node->active) {
		nid = node->info.node_id;
		ack_port = node->control_port;
	}
	MACH_NODE_UNLOCK(node);

	if (!IP_VALID(ack_port) || !MACH_NODE_ID_VALID(nid)) {
		return;
	}

	/* We have a valid node id & obj name, and a port to send the ack to. */
	ipc_kmsg_t kmsg = ipc_kmsg_alloc(sizeof(struct flipc_ack_msg) + MAX_TRAILER_SIZE);
	assert((unsigned long long)kmsg >= 4ULL);//!= IKM_NULL);
	mach_msg_header_t *msg = kmsg->ikm_header;

	/* Fill in the mach_msg_header struct */
	msg->msgh_bits = MACH_MSGH_BITS_SET(0, 0, 0, 0);
	msg->msgh_size = sizeof(msg);
	msg->msgh_remote_port = ack_port;
	msg->msgh_local_port = MACH_PORT_NULL;
	msg->msgh_voucher_port = MACH_PORT_NULL;
	msg->msgh_id = FLIPC_CMD_ID;

	/* Fill in the flipc_ack_msg struct */
	flipc_ack_msg_t fmsg = (flipc_ack_msg_t)(&msg[1]);
	fmsg->resend_to = HOST_LOCAL_NODE;
	fmsg->msg_count = 1; // Might want to coalesce acks to a node/name pair

	/* Fill in the mnl_msg struct */
	fmsg->mnl.sub = MACH_NODE_SUB_FLIPC;
	fmsg->mnl.cmd = delivered ? FLIPC_CMD_ACKMESSAGE : FLIPC_CMD_NAKMESSAGE;
	fmsg->mnl.qos = 0;    // Doesn't do anything yet
	fmsg->mnl.flags = 0;
	fmsg->mnl.node_id = nid;
	fmsg->mnl.object = name;
	fmsg->mnl.options = 0;
	fmsg->mnl.size = sizeof(struct flipc_ack_msg) - sizeof(struct mnl_msg);

#if (0)
	mach_msg_return_t mmr;
	ipc_mqueue_t ack_mqueue;

	ip_lock(ack_port);
	ack_mqueue = &ack_port->ip_messages;
	imq_lock(ack_mqueue);
	ip_unlock(ack_port);

	/* ipc_mqueue_send() unlocks ack_mqueue */
	mmr = ipc_mqueue_send(ack_mqueue, kmsg, 0, 0);
#else
	kern_return_t kr;
	kr = ipc_kmsg_send(kmsg,
	    MACH_SEND_KERNEL_DEFAULT,
	    MACH_MSG_TIMEOUT_NONE);
#endif
}