#include <sys/param.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/mcache.h>
#include <sys/resourcevar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/syslog.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/sysctl.h>
#include <kern/zalloc.h>
#include <kern/locks.h>
#include <mach/thread_act.h>
#include <mach/sdt.h>
#include <net/if.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_var.h>
#include <netinet/mptcp_var.h>
#include <netinet/mptcp.h>
#include <netinet/mptcp_seq.h>
#include <netinet/mptcp_timer.h>
#include <libkern/crypto/sha1.h>
#if INET6
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6protosw.h>
#endif
#include <dev/random/randomdev.h>
static void mptcp_sesdestroy(struct mptses *);
static void mptcp_thread_signal_locked(struct mptses *);
static void mptcp_thread_terminate_signal(struct mptses *);
static void mptcp_thread_dowork(struct mptses *);
static void mptcp_thread_func(void *, wait_result_t);
static void mptcp_thread_destroy(struct mptses *);
static void mptcp_key_pool_init(void);
static void mptcp_attach_to_subf(struct socket *, struct mptcb *, connid_t);
static void mptcp_detach_mptcb_from_subf(struct mptcb *, struct socket *);
static void mptcp_conn_properties(struct mptcb *);
static void mptcp_init_statevars(struct mptcb *);
static uint32_t mptcp_gc(struct mppcbinfo *);
static int mptcp_subflow_socreate(struct mptses *, struct mptsub *,
int, struct proc *, struct socket **);
static int mptcp_subflow_soclose(struct mptsub *, struct socket *);
static int mptcp_subflow_soconnectx(struct mptses *, struct mptsub *);
static int mptcp_subflow_soreceive(struct socket *, struct sockaddr **,
struct uio *, struct mbuf **, struct mbuf **, int *);
static void mptcp_subflow_rupcall(struct socket *, void *, int);
static void mptcp_subflow_input(struct mptses *, struct mptsub *);
static void mptcp_subflow_wupcall(struct socket *, void *, int);
static void mptcp_subflow_eupcall(struct socket *, void *, uint32_t);
static void mptcp_update_last_owner(struct mptsub *, struct socket *);
typedef enum {
MPTS_EVRET_DELETE = 1,
MPTS_EVRET_OK = 2,
MPTS_EVRET_CONNECT_PENDING = 3,
MPTS_EVRET_DISCONNECT_FALLBACK = 4,
MPTS_EVRET_OK_UPDATE = 5,
} ev_ret_t;
static ev_ret_t mptcp_subflow_events(struct mptses *, struct mptsub *);
static ev_ret_t mptcp_subflow_connreset_ev(struct mptses *, struct mptsub *);
static ev_ret_t mptcp_subflow_cantrcvmore_ev(struct mptses *, struct mptsub *);
static ev_ret_t mptcp_subflow_cantsendmore_ev(struct mptses *, struct mptsub *);
static ev_ret_t mptcp_subflow_timeout_ev(struct mptses *, struct mptsub *);
static ev_ret_t mptcp_subflow_nosrcaddr_ev(struct mptses *, struct mptsub *);
static ev_ret_t mptcp_subflow_failover_ev(struct mptses *, struct mptsub *);
static ev_ret_t mptcp_subflow_ifdenied_ev(struct mptses *, struct mptsub *);
static ev_ret_t mptcp_subflow_suspend_ev(struct mptses *, struct mptsub *);
static ev_ret_t mptcp_subflow_resume_ev(struct mptses *, struct mptsub *);
static ev_ret_t mptcp_subflow_connected_ev(struct mptses *, struct mptsub *);
static ev_ret_t mptcp_subflow_disconnected_ev(struct mptses *, struct mptsub *);
static ev_ret_t mptcp_subflow_mpstatus_ev(struct mptses *, struct mptsub *);
static ev_ret_t mptcp_subflow_mustrst_ev(struct mptses *, struct mptsub *);
static const char *mptcp_evret2str(ev_ret_t);
static mptcp_key_t *mptcp_reserve_key(void);
static int mptcp_do_sha1(mptcp_key_t *, char *, int);
static int mptcp_init_authparms(struct mptcb *);
static int mptcp_delete_ok(struct mptses *mpte, struct mptsub *mpts);
static unsigned int mptsub_zone_size;
static struct zone *mptsub_zone;
static unsigned int mptopt_zone_size;
static struct zone *mptopt_zone;
static unsigned int mpt_subauth_entry_size;
static struct zone *mpt_subauth_zone;
struct mppcbinfo mtcbinfo;
static struct mptcp_keys_pool_head mptcp_keys_pool;
#define MPTCP_SUBFLOW_WRITELEN (8 * 1024)
#define MPTCP_SUBFLOW_READLEN (8 * 1024)
SYSCTL_DECL(_net_inet);
SYSCTL_NODE(_net_inet, OID_AUTO, mptcp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "MPTCP");
uint32_t mptcp_verbose = 0;
SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, verbose, CTLFLAG_RW|CTLFLAG_LOCKED,
&mptcp_verbose, 0, "MPTCP verbosity level");
SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, pcbcount, CTLFLAG_RD|CTLFLAG_LOCKED,
&mtcbinfo.mppi_count, 0, "Number of active PCBs");
uint32_t mptcp_socket_limit = MPPCB_LIMIT;
SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, sk_lim, CTLFLAG_RW|CTLFLAG_LOCKED,
&mptcp_socket_limit, 0, "MPTCP socket limit");
static struct protosw mptcp_subflow_protosw;
static struct pr_usrreqs mptcp_subflow_usrreqs;
#if INET6
static struct ip6protosw mptcp_subflow_protosw6;
static struct pr_usrreqs mptcp_subflow_usrreqs6;
#endif
void
mptcp_init(struct protosw *pp, struct domain *dp)
{
#pragma unused(dp)
static int mptcp_initialized = 0;
struct protosw *prp;
#if INET6
struct ip6protosw *prp6;
#endif
VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED);
if (mptcp_initialized)
return;
mptcp_initialized = 1;
prp = pffindproto_locked(PF_INET, IPPROTO_TCP, SOCK_STREAM);
VERIFY(prp != NULL);
bcopy(prp, &mptcp_subflow_protosw, sizeof (*prp));
bcopy(prp->pr_usrreqs, &mptcp_subflow_usrreqs,
sizeof (mptcp_subflow_usrreqs));
mptcp_subflow_protosw.pr_entry.tqe_next = NULL;
mptcp_subflow_protosw.pr_entry.tqe_prev = NULL;
mptcp_subflow_protosw.pr_usrreqs = &mptcp_subflow_usrreqs;
mptcp_subflow_usrreqs.pru_soreceive = mptcp_subflow_soreceive;
mptcp_subflow_usrreqs.pru_rcvoob = pru_rcvoob_notsupp;
mptcp_subflow_protosw.pr_filter_head.tqh_first =
(struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
mptcp_subflow_protosw.pr_filter_head.tqh_last =
(struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
#if INET6
prp6 = (struct ip6protosw *)pffindproto_locked(PF_INET6,
IPPROTO_TCP, SOCK_STREAM);
VERIFY(prp6 != NULL);
bcopy(prp6, &mptcp_subflow_protosw6, sizeof (*prp6));
bcopy(prp6->pr_usrreqs, &mptcp_subflow_usrreqs6,
sizeof (mptcp_subflow_usrreqs6));
mptcp_subflow_protosw6.pr_entry.tqe_next = NULL;
mptcp_subflow_protosw6.pr_entry.tqe_prev = NULL;
mptcp_subflow_protosw6.pr_usrreqs = &mptcp_subflow_usrreqs6;
mptcp_subflow_usrreqs6.pru_soreceive = mptcp_subflow_soreceive;
mptcp_subflow_usrreqs6.pru_rcvoob = pru_rcvoob_notsupp;
mptcp_subflow_protosw6.pr_filter_head.tqh_first =
(struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
mptcp_subflow_protosw6.pr_filter_head.tqh_last =
(struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
#endif
bzero(&mtcbinfo, sizeof (mtcbinfo));
TAILQ_INIT(&mtcbinfo.mppi_pcbs);
mtcbinfo.mppi_size = sizeof (struct mpp_mtp);
if ((mtcbinfo.mppi_zone = zinit(mtcbinfo.mppi_size,
1024 * mtcbinfo.mppi_size, 8192, "mptcb")) == NULL) {
panic("%s: unable to allocate MPTCP PCB zone\n", __func__);
}
zone_change(mtcbinfo.mppi_zone, Z_CALLERACCT, FALSE);
zone_change(mtcbinfo.mppi_zone, Z_EXPAND, TRUE);
mtcbinfo.mppi_lock_grp_attr = lck_grp_attr_alloc_init();
mtcbinfo.mppi_lock_grp = lck_grp_alloc_init("mppcb",
mtcbinfo.mppi_lock_grp_attr);
mtcbinfo.mppi_lock_attr = lck_attr_alloc_init();
lck_mtx_init(&mtcbinfo.mppi_lock, mtcbinfo.mppi_lock_grp,
mtcbinfo.mppi_lock_attr);
mtcbinfo.mppi_gc = mptcp_gc;
mtcbinfo.mppi_timer = mptcp_timer;
mp_pcbinfo_attach(&mtcbinfo);
mptsub_zone_size = sizeof (struct mptsub);
if ((mptsub_zone = zinit(mptsub_zone_size, 1024 * mptsub_zone_size,
8192, "mptsub")) == NULL) {
panic("%s: unable to allocate MPTCP subflow zone\n", __func__);
}
zone_change(mptsub_zone, Z_CALLERACCT, FALSE);
zone_change(mptsub_zone, Z_EXPAND, TRUE);
mptopt_zone_size = sizeof (struct mptopt);
if ((mptopt_zone = zinit(mptopt_zone_size, 128 * mptopt_zone_size,
1024, "mptopt")) == NULL) {
panic("%s: unable to allocate MPTCP option zone\n", __func__);
}
zone_change(mptopt_zone, Z_CALLERACCT, FALSE);
zone_change(mptopt_zone, Z_EXPAND, TRUE);
mpt_subauth_entry_size = sizeof (struct mptcp_subf_auth_entry);
if ((mpt_subauth_zone = zinit(mpt_subauth_entry_size,
1024 * mpt_subauth_entry_size, 8192, "mptauth")) == NULL) {
panic("%s: unable to allocate MPTCP address auth zone \n",
__func__);
}
zone_change(mpt_subauth_zone, Z_CALLERACCT, FALSE);
zone_change(mpt_subauth_zone, Z_EXPAND, TRUE);
mptcp_key_pool_init();
}
struct mptses *
mptcp_sescreate(struct socket *mp_so, struct mppcb *mpp)
{
struct mppcbinfo *mppi;
struct mptses *mpte;
struct mptcb *mp_tp;
int error = 0;
VERIFY(mpp != NULL);
mppi = mpp->mpp_pcbinfo;
VERIFY(mppi != NULL);
mpte = &((struct mpp_mtp *)mpp)->mpp_ses;
mp_tp = &((struct mpp_mtp *)mpp)->mtcb;
bzero(mpte, sizeof (*mpte));
VERIFY(mpp->mpp_pcbe == NULL);
mpp->mpp_pcbe = mpte;
mpte->mpte_mppcb = mpp;
mpte->mpte_mptcb = mp_tp;
TAILQ_INIT(&mpte->mpte_sopts);
TAILQ_INIT(&mpte->mpte_subflows);
mpte->mpte_associd = ASSOCID_ANY;
mpte->mpte_connid_last = CONNID_ANY;
lck_mtx_init(&mpte->mpte_thread_lock, mppi->mppi_lock_grp,
mppi->mppi_lock_attr);
if (kernel_thread_start(mptcp_thread_func, mpte,
&mpte->mpte_thread) != KERN_SUCCESS) {
error = ENOBUFS;
goto out;
}
mp_so->so_usecount++;
bzero(mp_tp, sizeof (*mp_tp));
lck_mtx_init(&mp_tp->mpt_lock, mppi->mppi_lock_grp,
mppi->mppi_lock_attr);
mp_tp->mpt_mpte = mpte;
out:
if (error != 0)
lck_mtx_destroy(&mpte->mpte_thread_lock, mppi->mppi_lock_grp);
DTRACE_MPTCP5(session__create, struct socket *, mp_so,
struct sockbuf *, &mp_so->so_rcv,
struct sockbuf *, &mp_so->so_snd,
struct mppcb *, mpp, int, error);
return ((error != 0) ? NULL : mpte);
}
static void
mptcp_sesdestroy(struct mptses *mpte)
{
struct mptcb *mp_tp;
MPTE_LOCK_ASSERT_HELD(mpte);
mp_tp = mpte->mpte_mptcb;
VERIFY(mp_tp != NULL);
mptcp_flush_sopts(mpte);
VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows) && mpte->mpte_numflows == 0);
lck_mtx_destroy(&mpte->mpte_thread_lock,
mpte->mpte_mppcb->mpp_pcbinfo->mppi_lock_grp);
lck_mtx_destroy(&mp_tp->mpt_lock,
mpte->mpte_mppcb->mpp_pcbinfo->mppi_lock_grp);
DTRACE_MPTCP2(session__destroy, struct mptses *, mpte,
struct mptcb *, mp_tp);
}
struct mptopt *
mptcp_sopt_alloc(int how)
{
struct mptopt *mpo;
mpo = (how == M_WAITOK) ? zalloc(mptopt_zone) :
zalloc_noblock(mptopt_zone);
if (mpo != NULL) {
bzero(mpo, mptopt_zone_size);
}
return (mpo);
}
void
mptcp_sopt_free(struct mptopt *mpo)
{
VERIFY(!(mpo->mpo_flags & MPOF_ATTACHED));
zfree(mptopt_zone, mpo);
}
void
mptcp_sopt_insert(struct mptses *mpte, struct mptopt *mpo)
{
MPTE_LOCK_ASSERT_HELD(mpte);
VERIFY(!(mpo->mpo_flags & MPOF_ATTACHED));
mpo->mpo_flags |= MPOF_ATTACHED;
TAILQ_INSERT_TAIL(&mpte->mpte_sopts, mpo, mpo_entry);
}
void
mptcp_sopt_remove(struct mptses *mpte, struct mptopt *mpo)
{
MPTE_LOCK_ASSERT_HELD(mpte);
VERIFY(mpo->mpo_flags & MPOF_ATTACHED);
mpo->mpo_flags &= ~MPOF_ATTACHED;
TAILQ_REMOVE(&mpte->mpte_sopts, mpo, mpo_entry);
}
struct mptopt *
mptcp_sopt_find(struct mptses *mpte, struct sockopt *sopt)
{
struct mptopt *mpo;
MPTE_LOCK_ASSERT_HELD(mpte);
TAILQ_FOREACH(mpo, &mpte->mpte_sopts, mpo_entry) {
if (mpo->mpo_level == sopt->sopt_level &&
mpo->mpo_name == sopt->sopt_name)
break;
}
VERIFY(mpo == NULL || sopt->sopt_valsize == sizeof (int));
return (mpo);
}
void
mptcp_flush_sopts(struct mptses *mpte)
{
struct mptopt *mpo, *tmpo;
MPTE_LOCK_ASSERT_HELD(mpte);
TAILQ_FOREACH_SAFE(mpo, &mpte->mpte_sopts, mpo_entry, tmpo) {
mptcp_sopt_remove(mpte, mpo);
mptcp_sopt_free(mpo);
}
VERIFY(TAILQ_EMPTY(&mpte->mpte_sopts));
}
struct mptsub *
mptcp_subflow_alloc(int how)
{
struct mptsub *mpts;
mpts = (how == M_WAITOK) ? zalloc(mptsub_zone) :
zalloc_noblock(mptsub_zone);
if (mpts != NULL) {
bzero(mpts, mptsub_zone_size);
lck_mtx_init(&mpts->mpts_lock, mtcbinfo.mppi_lock_grp,
mtcbinfo.mppi_lock_attr);
}
return (mpts);
}
void
mptcp_subflow_free(struct mptsub *mpts)
{
MPTS_LOCK_ASSERT_HELD(mpts);
VERIFY(mpts->mpts_refcnt == 0);
VERIFY(!(mpts->mpts_flags & MPTSF_ATTACHED));
VERIFY(mpts->mpts_mpte == NULL);
VERIFY(mpts->mpts_socket == NULL);
if (mpts->mpts_src_sl != NULL) {
sockaddrlist_free(mpts->mpts_src_sl);
mpts->mpts_src_sl = NULL;
}
if (mpts->mpts_dst_sl != NULL) {
sockaddrlist_free(mpts->mpts_dst_sl);
mpts->mpts_dst_sl = NULL;
}
MPTS_UNLOCK(mpts);
lck_mtx_destroy(&mpts->mpts_lock, mtcbinfo.mppi_lock_grp);
zfree(mptsub_zone, mpts);
}
static int
mptcp_subflow_socreate(struct mptses *mpte, struct mptsub *mpts, int dom,
struct proc *p, struct socket **so)
{
struct mptopt smpo, *mpo, *tmpo;
struct socket *mp_so;
int error;
*so = NULL;
MPTE_LOCK_ASSERT_HELD(mpte);
mp_so = mpte->mpte_mppcb->mpp_socket;
if ((error = socreate_internal(dom, so, SOCK_STREAM,
IPPROTO_TCP, p, SOCF_ASYNC | SOCF_MP_SUBFLOW, PROC_NULL)) != 0) {
mptcplog((LOG_ERR, "MPTCP ERROR %s: mp_so 0x%llx unable to "
"create subflow socket error %d\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so), error));
return (error);
}
socket_lock(*so, 0);
VERIFY((*so)->so_flags & SOF_MP_SUBFLOW);
VERIFY(((*so)->so_state & (SS_NBIO|SS_NOFDREF)) ==
(SS_NBIO|SS_NOFDREF));
(*so)->so_rcv.sb_flags |= SB_NOCOMPRESS;
(*so)->so_snd.sb_flags |= SB_NOCOMPRESS;
bzero(&smpo, sizeof (smpo));
smpo.mpo_flags |= MPOF_SUBFLOW_OK;
smpo.mpo_level = SOL_SOCKET;
smpo.mpo_intval = 1;
smpo.mpo_name = SO_NOSIGPIPE;
if ((error = mptcp_subflow_sosetopt(mpte, *so, &smpo)) != 0)
goto out;
smpo.mpo_name = SO_NOADDRERR;
if ((error = mptcp_subflow_sosetopt(mpte, *so, &smpo)) != 0)
goto out;
smpo.mpo_name = SO_KEEPALIVE;
if ((error = mptcp_subflow_sosetopt(mpte, *so, &smpo)) != 0)
goto out;
smpo.mpo_name = SO_RCVBUF;
smpo.mpo_intval = MPTCP_RWIN_MAX;
if ((error = mptcp_subflow_sosetopt(mpte, *so, &smpo)) != 0)
goto out;
VERIFY(!((*so)->so_rcv.sb_flags & SB_AUTOSIZE));
(*so)->so_snd.sb_flags &= ~SB_AUTOSIZE;
smpo.mpo_level = IPPROTO_TCP;
smpo.mpo_intval = mptcp_subflow_keeptime;
smpo.mpo_name = TCP_KEEPALIVE;
if ((error = mptcp_subflow_sosetopt(mpte, *so, &smpo)) != 0)
goto out;
TAILQ_FOREACH_SAFE(mpo, &mpte->mpte_sopts, mpo_entry, tmpo) {
int interim;
if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK))
continue;
if (mpo->mpo_level == SOL_SOCKET &&
(mpo->mpo_name == SO_NOSIGPIPE ||
mpo->mpo_name == SO_NOADDRERR ||
mpo->mpo_name == SO_KEEPALIVE))
continue;
interim = (mpo->mpo_flags & MPOF_INTERIM);
if (mptcp_subflow_sosetopt(mpte, *so, mpo) != 0 && interim) {
char buf[32];
mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s val %d "
"interim record removed\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name,
buf, sizeof (buf)), mpo->mpo_intval));
mptcp_sopt_remove(mpte, mpo);
mptcp_sopt_free(mpo);
continue;
}
}
mpts->mpts_oprotosw = (*so)->so_proto;
switch (dom) {
case PF_INET:
(*so)->so_proto = &mptcp_subflow_protosw;
break;
#if INET6
case PF_INET6:
(*so)->so_proto = (struct protosw *)&mptcp_subflow_protosw6;
break;
#endif
default:
VERIFY(0);
}
out:
socket_unlock(*so, 0);
DTRACE_MPTCP4(subflow__create, struct mptses *, mpte,
struct mptsub *, mpts, int, dom, int, error);
return (error);
}
static int
mptcp_subflow_soclose(struct mptsub *mpts, struct socket *so)
{
MPTS_LOCK_ASSERT_HELD(mpts);
socket_lock(so, 0);
VERIFY(so->so_flags & SOF_MP_SUBFLOW);
VERIFY((so->so_state & (SS_NBIO|SS_NOFDREF)) == (SS_NBIO|SS_NOFDREF));
VERIFY(mpts->mpts_oprotosw != NULL);
so->so_proto = mpts->mpts_oprotosw;
socket_unlock(so, 0);
mpts->mpts_socket = NULL;
DTRACE_MPTCP5(subflow__close, struct mptsub *, mpts,
struct socket *, so,
struct sockbuf *, &so->so_rcv,
struct sockbuf *, &so->so_snd,
struct mptses *, mpts->mpts_mpte);
return (soclose(so));
}
static int
mptcp_subflow_soconnectx(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *so;
int af, error;
MPTE_LOCK_ASSERT_HELD(mpte);
MPTS_LOCK_ASSERT_HELD(mpts);
VERIFY((mpts->mpts_flags & (MPTSF_CONNECTING|MPTSF_CONNECTED)) ==
MPTSF_CONNECTING);
VERIFY(mpts->mpts_socket != NULL);
so = mpts->mpts_socket;
af = mpts->mpts_family;
if (af == AF_INET || af == AF_INET6) {
struct sockaddr_entry *dst_se;
char dbuf[MAX_IPv6_STR_LEN];
dst_se = TAILQ_FIRST(&mpts->mpts_dst_sl->sl_head);
VERIFY(dst_se != NULL);
mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx dst %s[%d] cid %d "
"[pended %s]\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mpte->mpte_mppcb->mpp_socket),
inet_ntop(af, ((af == AF_INET) ?
(void *)&SIN(dst_se->se_addr)->sin_addr.s_addr :
(void *)&SIN6(dst_se->se_addr)->sin6_addr),
dbuf, sizeof (dbuf)), ((af == AF_INET) ?
ntohs(SIN(dst_se->se_addr)->sin_port) :
ntohs(SIN6(dst_se->se_addr)->sin6_port)),
mpts->mpts_connid,
((mpts->mpts_flags & MPTSF_CONNECT_PENDING) ?
"YES" : "NO")));
}
mpts->mpts_flags &= ~MPTSF_CONNECT_PENDING;
socket_lock(so, 0);
mptcp_attach_to_subf(so, mpte->mpte_mptcb, mpts->mpts_connid);
error = soconnectxlocked(so, &mpts->mpts_src_sl, &mpts->mpts_dst_sl,
mpts->mpts_mpcr.mpcr_proc, mpts->mpts_mpcr.mpcr_ifscope,
mpte->mpte_associd, NULL, TCP_CONNREQF_MPTCP,
&mpts->mpts_mpcr, sizeof (mpts->mpts_mpcr));
socket_unlock(so, 0);
DTRACE_MPTCP3(subflow__connect, struct mptses *, mpte,
struct mptsub *, mpts, int, error);
return (error);
}
static int
mptcp_subflow_soreceive(struct socket *so, struct sockaddr **psa,
struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
{
#pragma unused(uio)
int flags, error = 0;
struct proc *p = current_proc();
struct mbuf *m, **mp = mp0;
struct mbuf *nextrecord;
socket_lock(so, 1);
VERIFY(so->so_proto->pr_flags & PR_CONNREQUIRED);
#ifdef MORE_LOCKING_DEBUG
if (so->so_usecount == 1) {
panic("%s: so=%x no other reference on socket\n", __func__, so);
}
#endif
if (mp == NULL || controlp != NULL) {
socket_unlock(so, 1);
return (EINVAL);
}
*mp = NULL;
if (psa != NULL)
*psa = NULL;
if (flagsp != NULL)
flags = *flagsp &~ MSG_EOR;
else
flags = 0;
if (flags & (MSG_PEEK|MSG_OOB|MSG_NEEDSA|MSG_WAITALL|MSG_WAITSTREAM)) {
socket_unlock(so, 1);
return (EOPNOTSUPP);
}
flags |= (MSG_DONTWAIT|MSG_NBIO);
if (so->so_flags & SOF_DEFUNCT) {
struct sockbuf *sb = &so->so_rcv;
error = ENOTCONN;
SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n",
__func__, proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so),
SOCK_DOM(so), SOCK_TYPE(so), error));
if (so->so_state & SS_DEFUNCT)
sb_empty_assert(sb, __func__);
socket_unlock(so, 1);
return (error);
}
if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) ==
(SS_NOFDREF | SS_CANTRCVMORE) && !(so->so_flags & SOF_MP_SUBFLOW)) {
socket_unlock(so, 1);
return (0);
}
error = sblock(&so->so_rcv, 0);
if (error != 0) {
socket_unlock(so, 1);
return (error);
}
m = so->so_rcv.sb_mb;
if (m == NULL) {
SB_MB_CHECK(&so->so_rcv);
if (so->so_error != 0) {
error = so->so_error;
so->so_error = 0;
goto release;
}
if (so->so_state & SS_CANTRCVMORE) {
goto release;
}
if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING))) {
error = ENOTCONN;
goto release;
}
error = EWOULDBLOCK;
goto release;
}
OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv);
SBLASTRECORDCHK(&so->so_rcv, "mptcp_subflow_soreceive 1");
SBLASTMBUFCHK(&so->so_rcv, "mptcp_subflow_soreceive 1");
while (m != NULL) {
nextrecord = m->m_nextpkt;
sbfree(&so->so_rcv, m);
if (mp != NULL) {
*mp = m;
mp = &m->m_next;
so->so_rcv.sb_mb = m = m->m_next;
*mp = NULL;
}
if (m != NULL) {
m->m_nextpkt = nextrecord;
if (nextrecord == NULL)
so->so_rcv.sb_lastrecord = m;
} else {
m = so->so_rcv.sb_mb = nextrecord;
SB_EMPTY_FIXUP(&so->so_rcv);
}
SBLASTRECORDCHK(&so->so_rcv, "mptcp_subflow_soreceive 2");
SBLASTMBUFCHK(&so->so_rcv, "mptcp_subflow_soreceive 2");
}
DTRACE_MPTCP3(subflow__receive, struct socket *, so,
struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd);
if ((so->so_proto->pr_flags & PR_WANTRCVD) && so->so_pcb != NULL)
(*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags);
if (flagsp != NULL)
*flagsp |= flags;
release:
sbunlock(&so->so_rcv, FALSE);
return (error);
}
void
mptcp_subflow_sopeeloff(struct mptses *mpte, struct mptsub *mpts,
struct socket *so)
{
struct mptopt smpo;
struct socket *mp_so;
int p, c;
MPTE_LOCK_ASSERT_HELD(mpte);
mp_so = mpte->mpte_mppcb->mpp_socket;
MPTS_LOCK_ASSERT_HELD(mpts);
socket_lock(so, 0);
VERIFY(so->so_flags & SOF_MP_SUBFLOW);
VERIFY((so->so_state & (SS_NBIO|SS_NOFDREF)) == (SS_NBIO|SS_NOFDREF));
if (!(mp_so->so_state & SS_NBIO))
so->so_state &= ~SS_NBIO;
so->so_flags &= ~SOF_MP_SUBFLOW;
so->so_state &= ~SS_NOFDREF;
so->so_state &= ~SOF_MPTCP_TRUE;
so->so_rcv.sb_flags &= ~SB_NOCOMPRESS;
so->so_snd.sb_flags &= ~SB_NOCOMPRESS;
so->so_rcv.sb_flags |= SB_AUTOSIZE;
so->so_snd.sb_flags |= SB_AUTOSIZE;
VERIFY(mpts->mpts_oprotosw != NULL);
so->so_proto = mpts->mpts_oprotosw;
bzero(&smpo, sizeof (smpo));
smpo.mpo_flags |= MPOF_SUBFLOW_OK;
smpo.mpo_level = SOL_SOCKET;
p = (mp_so->so_flags & SOF_NOSIGPIPE);
c = (so->so_flags & SOF_NOSIGPIPE);
smpo.mpo_intval = ((p - c) > 0) ? 1 : 0;
smpo.mpo_name = SO_NOSIGPIPE;
if ((p - c) != 0)
(void) mptcp_subflow_sosetopt(mpte, so, &smpo);
p = (mp_so->so_flags & SOF_NOADDRAVAIL);
c = (so->so_flags & SOF_NOADDRAVAIL);
smpo.mpo_intval = ((p - c) > 0) ? 1 : 0;
smpo.mpo_name = SO_NOADDRERR;
if ((p - c) != 0)
(void) mptcp_subflow_sosetopt(mpte, so, &smpo);
p = (mp_so->so_options & SO_KEEPALIVE);
c = (so->so_options & SO_KEEPALIVE);
smpo.mpo_intval = ((p - c) > 0) ? 1 : 0;
smpo.mpo_name = SO_KEEPALIVE;
if ((p - c) != 0)
(void) mptcp_subflow_sosetopt(mpte, so, &smpo);
p = (intotcpcb(sotoinpcb(mp_so)))->t_keepidle;
c = (intotcpcb(sotoinpcb(so)))->t_keepidle;
smpo.mpo_level = IPPROTO_TCP;
smpo.mpo_intval = 0;
smpo.mpo_name = TCP_KEEPALIVE;
if ((p - c) != 0)
(void) mptcp_subflow_sosetopt(mpte, so, &smpo);
socket_unlock(so, 0);
DTRACE_MPTCP5(subflow__peeloff, struct mptses *, mpte,
struct mptsub *, mpts, struct socket *, so,
struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd);
}
int
mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts,
struct proc *p, uint32_t ifscope)
{
struct sockaddr_entry *se, *src_se = NULL, *dst_se = NULL;
struct socket *mp_so, *so = NULL;
struct mptsub_connreq mpcr;
struct mptcb *mp_tp;
int af, error = 0;
MPTE_LOCK_ASSERT_HELD(mpte);
mp_so = mpte->mpte_mppcb->mpp_socket;
mp_tp = mpte->mpte_mptcb;
MPTS_LOCK(mpts);
VERIFY(!(mpts->mpts_flags & (MPTSF_CONNECTING|MPTSF_CONNECTED)));
VERIFY(mpts->mpts_mpte == NULL);
VERIFY(mpts->mpts_socket == NULL);
VERIFY(mpts->mpts_dst_sl != NULL);
VERIFY(mpts->mpts_connid == CONNID_ANY);
if ((error = in_selectaddrs(AF_UNSPEC, &mpts->mpts_src_sl, &src_se,
&mpts->mpts_dst_sl, &dst_se)) != 0)
goto out;
VERIFY(mpts->mpts_dst_sl != NULL && dst_se != NULL);
VERIFY(src_se == NULL || mpts->mpts_src_sl != NULL);
af = mpts->mpts_family = dst_se->se_addr->sa_family;
VERIFY(src_se == NULL || src_se->se_addr->sa_family == af);
VERIFY(af == AF_INET || af == AF_INET6);
if (mpts->mpts_src_sl == NULL) {
mpts->mpts_src_sl =
sockaddrlist_dup(mpts->mpts_dst_sl, M_WAITOK);
if (mpts->mpts_src_sl == NULL) {
error = ENOBUFS;
goto out;
}
se = TAILQ_FIRST(&mpts->mpts_src_sl->sl_head);
VERIFY(se != NULL && se->se_addr != NULL &&
se->se_addr->sa_len == dst_se->se_addr->sa_len);
bzero(se->se_addr, se->se_addr->sa_len);
se->se_addr->sa_len = dst_se->se_addr->sa_len;
se->se_addr->sa_family = dst_se->se_addr->sa_family;
}
if ((error = mptcp_subflow_socreate(mpte, mpts, af, p, &so)) != 0)
goto out;
mpte->mpte_connid_last++;
if (mpte->mpte_connid_last == CONNID_ALL ||
mpte->mpte_connid_last == CONNID_ANY)
mpte->mpte_connid_last++;
mpts->mpts_connid = mpte->mpte_connid_last;
VERIFY(mpts->mpts_connid != CONNID_ANY &&
mpts->mpts_connid != CONNID_ALL);
if (ifscope != IFSCOPE_NONE) {
socket_lock(so, 0);
error = inp_bindif(sotoinpcb(so), ifscope, &mpts->mpts_outif);
if (error != 0) {
socket_unlock(so, 0);
(void) mptcp_subflow_soclose(mpts, so);
goto out;
}
VERIFY(mpts->mpts_outif != NULL);
mpts->mpts_flags |= MPTSF_BOUND_IF;
mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx bindif %s[%d] "
"cid %d\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mpts->mpts_outif->if_xname,
ifscope, mpts->mpts_connid));
socket_unlock(so, 0);
}
if (src_se != NULL) {
struct sockaddr *sa = src_se->se_addr;
uint32_t mpts_flags = 0;
in_port_t lport;
switch (af) {
case AF_INET:
if (SIN(sa)->sin_addr.s_addr != INADDR_ANY)
mpts_flags |= MPTSF_BOUND_IP;
if ((lport = SIN(sa)->sin_port) != 0)
mpts_flags |= MPTSF_BOUND_PORT;
break;
#if INET6
case AF_INET6:
VERIFY(af == AF_INET6);
if (!IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa)->sin6_addr))
mpts_flags |= MPTSF_BOUND_IP;
if ((lport = SIN6(sa)->sin6_port) != 0)
mpts_flags |= MPTSF_BOUND_PORT;
break;
#endif
}
error = sobindlock(so, sa, 1);
if (error != 0) {
(void) mptcp_subflow_soclose(mpts, so);
goto out;
}
mpts->mpts_flags |= mpts_flags;
if (af == AF_INET || af == AF_INET6) {
char sbuf[MAX_IPv6_STR_LEN];
mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx bindip %s[%d] "
"cid %d\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
inet_ntop(af, ((af == AF_INET) ?
(void *)&SIN(sa)->sin_addr.s_addr :
(void *)&SIN6(sa)->sin6_addr), sbuf, sizeof (sbuf)),
ntohs(lport), mpts->mpts_connid));
}
}
TAILQ_INSERT_TAIL(&mpte->mpte_subflows, mpts, mpts_entry);
mpte->mpte_numflows++;
atomic_bitset_32(&mpts->mpts_flags, MPTSF_ATTACHED);
mpts->mpts_mpte = mpte;
mpts->mpts_socket = so;
MPTS_ADDREF_LOCKED(mpts);
MPTS_ADDREF_LOCKED(mpts);
mp_so->so_usecount++;
(void) sock_setupcalls(so, mptcp_subflow_rupcall, mpts,
mptcp_subflow_wupcall, mpts);
(void) sock_catchevents(so, mptcp_subflow_eupcall, mpts,
SO_FILT_HINT_CONNRESET | SO_FILT_HINT_CANTRCVMORE |
SO_FILT_HINT_CANTSENDMORE | SO_FILT_HINT_TIMEOUT |
SO_FILT_HINT_NOSRCADDR | SO_FILT_HINT_IFDENIED |
SO_FILT_HINT_SUSPEND | SO_FILT_HINT_RESUME |
SO_FILT_HINT_CONNECTED | SO_FILT_HINT_DISCONNECTED |
SO_FILT_HINT_MPFAILOVER | SO_FILT_HINT_MPSTATUS |
SO_FILT_HINT_MUSTRST);
VERIFY(!(mpts->mpts_flags &
(MPTSF_CONNECTING|MPTSF_CONNECTED|MPTSF_CONNECT_PENDING)));
bzero(&mpcr, sizeof (mpcr));
mpcr.mpcr_proc = p;
mpcr.mpcr_ifscope = ifscope;
MPT_LOCK(mp_tp);
if (mp_tp->mpt_state < MPTCPS_ESTABLISHED && mpte->mpte_numflows == 1) {
if (mp_tp->mpt_state == MPTCPS_CLOSED) {
mp_tp->mpt_localkey = mptcp_reserve_key();
mptcp_conn_properties(mp_tp);
}
MPT_UNLOCK(mp_tp);
soisconnecting(mp_so);
mpcr.mpcr_type = MPTSUB_CONNREQ_MP_ENABLE;
} else {
if (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY))
mpts->mpts_flags |= MPTSF_CONNECT_PENDING;
MPT_UNLOCK(mp_tp);
mpcr.mpcr_type = MPTSUB_CONNREQ_MP_ADD;
}
mpts->mpts_mpcr = mpcr;
mpts->mpts_flags |= MPTSF_CONNECTING;
if (af == AF_INET || af == AF_INET6) {
char dbuf[MAX_IPv6_STR_LEN];
mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx dst %s[%d] cid %d "
"[pending %s]\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
inet_ntop(af, ((af == AF_INET) ?
(void *)&SIN(dst_se->se_addr)->sin_addr.s_addr :
(void *)&SIN6(dst_se->se_addr)->sin6_addr),
dbuf, sizeof (dbuf)), ((af == AF_INET) ?
ntohs(SIN(dst_se->se_addr)->sin_port) :
ntohs(SIN6(dst_se->se_addr)->sin6_port)),
mpts->mpts_connid,
((mpts->mpts_flags & MPTSF_CONNECT_PENDING) ?
"YES" : "NO")));
}
if (!(mpts->mpts_flags & MPTSF_CONNECT_PENDING))
error = mptcp_subflow_soconnectx(mpte, mpts);
out:
MPTS_UNLOCK(mpts);
if (error == 0) {
soevent(mp_so, SO_FILT_HINT_LOCKED |
SO_FILT_HINT_CONNINFO_UPDATED);
}
return (error);
}
static int
mptcp_delete_ok(struct mptses *mpte, struct mptsub *mpts)
{
int ret = 1;
struct mptcb *mp_tp = NULL;
MPTE_LOCK_ASSERT_HELD(mpte);
mp_tp = mpte->mpte_mptcb;
VERIFY(mp_tp != NULL);
MPTS_LOCK(mpts);
MPT_LOCK(mp_tp);
if ((mpts->mpts_soerror == 0) &&
(mpts->mpts_flags & MPTSF_ACTIVE) &&
(mp_tp->mpt_state != MPTCPS_CLOSED) &&
(mp_tp->mpt_state <= MPTCPS_TIME_WAIT))
ret = 0;
MPT_UNLOCK(mp_tp);
MPTS_UNLOCK(mpts);
return (ret);
}
void
mptcp_subflow_del(struct mptses *mpte, struct mptsub *mpts, boolean_t close)
{
struct socket *mp_so, *so;
MPTE_LOCK_ASSERT_HELD(mpte);
mp_so = mpte->mpte_mppcb->mpp_socket;
MPTS_LOCK(mpts);
so = mpts->mpts_socket;
VERIFY(so != NULL);
mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx [u=%d,r=%d] cid %d "
"[close %s] %d %x\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mp_so->so_usecount,
mp_so->so_retaincnt, mpts->mpts_connid,
(close ? "YES" : "NO"), mpts->mpts_soerror,
mpts->mpts_flags));
VERIFY(mpts->mpts_mpte == mpte);
VERIFY(mpts->mpts_connid != CONNID_ANY &&
mpts->mpts_connid != CONNID_ALL);
VERIFY(mpts->mpts_flags & MPTSF_ATTACHED);
atomic_bitclear_32(&mpts->mpts_flags, MPTSF_ATTACHED);
TAILQ_REMOVE(&mpte->mpte_subflows, mpts, mpts_entry);
VERIFY(mpte->mpte_numflows != 0);
mpte->mpte_numflows--;
(void) sock_setupcalls(so, NULL, NULL, NULL, NULL);
(void) sock_catchevents(so, NULL, NULL, 0);
mptcp_detach_mptcb_from_subf(mpte->mpte_mptcb, so);
if (close)
(void) mptcp_subflow_soclose(mpts, so);
VERIFY(mp_so->so_usecount != 0);
mp_so->so_usecount--;
mpts->mpts_mpte = NULL;
mpts->mpts_socket = NULL;
MPTS_UNLOCK(mpts);
MPTS_REMREF(mpts);
MPTS_REMREF(mpts);
soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED);
}
void
mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts,
boolean_t deleteok)
{
struct socket *so;
struct mptcb *mp_tp;
int send_dfin = 0;
MPTE_LOCK_ASSERT_HELD(mpte);
MPTS_LOCK_ASSERT_HELD(mpts);
VERIFY(mpts->mpts_mpte == mpte);
VERIFY(mpts->mpts_socket != NULL);
VERIFY(mpts->mpts_connid != CONNID_ANY &&
mpts->mpts_connid != CONNID_ALL);
if (mpts->mpts_flags & (MPTSF_DISCONNECTING|MPTSF_DISCONNECTED))
return;
mpts->mpts_flags |= MPTSF_DISCONNECTING;
if (deleteok)
mpts->mpts_flags |= MPTSF_DELETEOK;
so = mpts->mpts_socket;
mp_tp = mpte->mpte_mptcb;
MPT_LOCK(mp_tp);
if (mp_tp->mpt_state > MPTCPS_ESTABLISHED)
send_dfin = 1;
MPT_UNLOCK(mp_tp);
socket_lock(so, 0);
if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) &&
(so->so_state & SS_ISCONNECTED)) {
mptcplog((LOG_DEBUG, "%s: cid %d fin %d [linger %s]\n",
__func__, mpts->mpts_connid, send_dfin,
(deleteok ? "NO" : "YES")));
if (send_dfin)
mptcp_send_dfin(so);
(void) soshutdownlock(so, SHUT_RD);
(void) soshutdownlock(so, SHUT_WR);
(void) sodisconnectlocked(so);
}
socket_unlock(so, 0);
mptcp_subflow_eupcall(so, mpts, SO_FILT_HINT_DISCONNECTED);
}
static void
mptcp_subflow_rupcall(struct socket *so, void *arg, int waitf)
{
#pragma unused(so, waitf)
struct mptsub *mpts = arg;
struct mptses *mpte = mpts->mpts_mpte;
VERIFY(mpte != NULL);
lck_mtx_lock(&mpte->mpte_thread_lock);
mptcp_thread_signal_locked(mpte);
lck_mtx_unlock(&mpte->mpte_thread_lock);
}
static void
mptcp_subflow_input(struct mptses *mpte, struct mptsub *mpts)
{
struct mbuf *m = NULL;
struct socket *so;
int error;
struct mptsub *mpts_alt = NULL;
MPTE_LOCK_ASSERT_HELD(mpte);
MPTS_LOCK_ASSERT_HELD(mpts);
DTRACE_MPTCP2(subflow__input, struct mptses *, mpte,
struct mptsub *, mpts);
if (!(mpts->mpts_flags & MPTSF_CONNECTED))
return;
so = mpts->mpts_socket;
error = sock_receive_internal(so, NULL, &m, 0, NULL);
if (error != 0 && error != EWOULDBLOCK) {
mptcplog((LOG_ERR, "%s: cid %d error %d\n",
__func__, mpts->mpts_connid, error));
MPTS_UNLOCK(mpts);
mpts_alt = mptcp_get_subflow(mpte, mpts);
if (mpts_alt == NULL) {
mptcplog((LOG_ERR, "%s: no alt path cid %d\n",
__func__, mpts->mpts_connid));
mpte->mpte_mppcb->mpp_socket->so_error = error;
}
MPTS_LOCK(mpts);
} else if (error == 0) {
mptcplog3((LOG_DEBUG, "%s: cid %d \n",
__func__, mpts->mpts_connid));
}
if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) &&
(!(mpts->mpts_flags & MPTSF_ACTIVE))) {
m_freem(m);
return;
}
if (m != NULL) {
MPTS_UNLOCK(mpts);
mptcp_input(mpte, m);
MPTS_LOCK(mpts);
}
}
static void
mptcp_subflow_wupcall(struct socket *so, void *arg, int waitf)
{
#pragma unused(so, waitf)
struct mptsub *mpts = arg;
struct mptses *mpte = mpts->mpts_mpte;
VERIFY(mpte != NULL);
lck_mtx_lock(&mpte->mpte_thread_lock);
mptcp_thread_signal_locked(mpte);
lck_mtx_unlock(&mpte->mpte_thread_lock);
}
int
mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *mp_so, *so;
size_t sb_cc = 0, tot_sent = 0;
struct mbuf *sb_mb;
int error = 0;
u_int64_t mpt_dsn = 0;
struct mptcb *mp_tp = mpte->mpte_mptcb;
struct mbuf *mpt_mbuf = NULL;
unsigned int off = 0;
MPTE_LOCK_ASSERT_HELD(mpte);
MPTS_LOCK_ASSERT_HELD(mpts);
mp_so = mpte->mpte_mppcb->mpp_socket;
so = mpts->mpts_socket;
DTRACE_MPTCP2(subflow__output, struct mptses *, mpte,
struct mptsub *, mpts);
if (mpts->mpts_flags & MPTSF_SUSPENDED) {
mptcplog((LOG_ERR, "%s: mp_so 0x%llx cid %d is flow "
"controlled\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid));
goto out;
}
if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE) &&
!(mpts->mpts_flags & MPTSF_MP_DEGRADED)) {
mptcplog((LOG_ERR, "%s: mp_so 0x%llx cid %d not "
"MPTCP capable\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid));
goto out;
}
if (mpte->mpte_flags & MPTE_SND_REM_ADDR) {
struct tcpcb *tp = intotcpcb(sotoinpcb(so));
tp->t_rem_aid = mpte->mpte_lost_aid;
if (mptcp_remaddr_enable)
tp->t_mpflags |= TMPF_SND_REM_ADDR;
mpte->mpte_flags &= ~MPTE_SND_REM_ADDR;
}
sb_mb = mp_so->so_snd.sb_mb;
if (sb_mb == NULL) {
goto out;
}
VERIFY(sb_mb->m_pkthdr.pkt_flags & PKTF_MPTCP);
mpt_mbuf = sb_mb;
while (mpt_mbuf && mpt_mbuf->m_pkthdr.mp_rlen == 0) {
mpt_mbuf = mpt_mbuf->m_next;
}
if (mpt_mbuf && (mpt_mbuf->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
mpt_dsn = mpt_mbuf->m_pkthdr.mp_dsn;
} else {
goto out;
}
MPT_LOCK(mp_tp);
if (MPTCP_SEQ_LT(mpt_dsn, mp_tp->mpt_snduna)) {
int len = 0;
len = mp_tp->mpt_snduna - mpt_dsn;
sbdrop(&mp_so->so_snd, len);
}
mpt_dsn = mp_so->so_snd.sb_mb->m_pkthdr.mp_dsn;
if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) &&
MPTCP_SEQ_LT(mpt_dsn, mp_tp->mpt_sndnxt)) {
int len = 0;
len = mp_tp->mpt_sndnxt - mpt_dsn;
sbdrop(&mp_so->so_snd, len);
mp_tp->mpt_snduna = mp_tp->mpt_sndnxt;
}
if (MPTCP_SEQ_LT(mpts->mpts_sndnxt, mp_tp->mpt_snduna)) {
mpts->mpts_sndnxt = mp_tp->mpt_snduna;
}
if (MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mp_tp->mpt_snduna)) {
mp_tp->mpt_sndnxt = mp_tp->mpt_snduna;
}
sb_mb = mp_so->so_snd.sb_mb;
sb_cc = mp_so->so_snd.sb_cc;
if (sb_mb == NULL) {
MPT_UNLOCK(mp_tp);
goto out;
}
if (MPTCP_SEQ_LT(mpts->mpts_sndnxt, mp_tp->mpt_sndmax)) {
off = mpts->mpts_sndnxt - mp_tp->mpt_snduna;
sb_cc -= off;
} else {
MPT_UNLOCK(mp_tp);
goto out;
}
MPT_UNLOCK(mp_tp);
mpt_mbuf = sb_mb;
mpt_dsn = mpt_mbuf->m_pkthdr.mp_dsn;
while (mpt_mbuf && ((mpt_mbuf->m_pkthdr.mp_rlen == 0) ||
(mpt_mbuf->m_pkthdr.mp_rlen <= off))) {
off -= mpt_mbuf->m_pkthdr.mp_rlen;
mpt_mbuf = mpt_mbuf->m_next;
mpt_dsn = mpt_mbuf->m_pkthdr.mp_dsn;
}
if ((mpts->mpts_connid == 2) || (mpts->mpts_flags & MPTSF_MP_DEGRADED))
mptcplog((LOG_INFO, "%s: snduna = %llu off = %d id = %d"
" %llu \n",
__func__,
mp_tp->mpt_snduna, off, mpts->mpts_connid,
mpts->mpts_sndnxt));
VERIFY(mpt_mbuf && (mpt_mbuf->m_pkthdr.pkt_flags & PKTF_MPTCP));
while (tot_sent < sb_cc) {
struct mbuf *m;
size_t mlen, len = 0;
mlen = mpt_mbuf->m_pkthdr.mp_rlen;
mlen -= off;
if (mlen == 0)
goto out;
if (mlen > sb_cc) {
panic("%s: unexpected %lu %lu \n", __func__,
mlen, sb_cc);
}
m = m_copym_mode(mpt_mbuf, off, mlen, M_DONTWAIT,
M_COPYM_COPY_HDR);
if (m == NULL) {
error = ENOBUFS;
break;
}
mpt_dsn = mpt_mbuf->m_pkthdr.mp_dsn;
m->m_pkthdr.pkt_flags |= PKTF_MPTCP;
m->m_pkthdr.pkt_flags &= ~PKTF_MPSO;
m->m_pkthdr.mp_dsn = mpt_dsn + off;
m->m_pkthdr.mp_rseq = mpts->mpts_rel_seq;
m->m_pkthdr.mp_rlen = mlen;
mpts->mpts_rel_seq += mlen;
m->m_pkthdr.len = mlen;
if (mpts->mpts_lastmap.mptsl_dsn +
mpts->mpts_lastmap.mptsl_len == mpt_dsn) {
mpts->mpts_lastmap.mptsl_len += tot_sent;
} else if (MPTCP_SEQ_LT((mpts->mpts_lastmap.mptsl_dsn +
mpts->mpts_lastmap.mptsl_len), mpt_dsn)) {
if (m->m_pkthdr.mp_dsn == 0)
panic("%s %llu", __func__, mpt_dsn);
mpts->mpts_lastmap.mptsl_dsn = m->m_pkthdr.mp_dsn;
mpts->mpts_lastmap.mptsl_sseq = m->m_pkthdr.mp_rseq;
mpts->mpts_lastmap.mptsl_len = m->m_pkthdr.mp_rlen;
}
error = sock_sendmbuf(so, NULL, m, 0, &len);
DTRACE_MPTCP7(send, struct mbuf *, m, struct socket *, so,
struct sockbuf *, &so->so_rcv,
struct sockbuf *, &so->so_snd,
struct mptses *, mpte, struct mptsub *, mpts,
size_t, mlen);
if (error != 0) {
mptcplog((LOG_ERR, "%s: len = %zd error = %d \n",
__func__, len, error));
break;
}
mpts->mpts_sndnxt += mlen;
MPT_LOCK(mp_tp);
if (MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mpts->mpts_sndnxt)) {
if (MPTCP_DATASEQ_HIGH32(mpts->mpts_sndnxt) >
MPTCP_DATASEQ_HIGH32(mp_tp->mpt_sndnxt))
mp_tp->mpt_flags |= MPTCPF_SND_64BITDSN;
mp_tp->mpt_sndnxt = mpts->mpts_sndnxt;
}
MPT_UNLOCK(mp_tp);
if (len != mlen) {
mptcplog((LOG_ERR, "%s: cid %d wrote %d "
"(expected %d)\n", __func__,
mpts->mpts_connid, len, mlen));
}
tot_sent += mlen;
off = 0;
mpt_mbuf = mpt_mbuf->m_next;
}
if (error != 0 && error != EWOULDBLOCK) {
mptcplog((LOG_ERR, "MPTCP ERROR %s: cid %d error %d\n",
__func__, mpts->mpts_connid, error));
} if (error == 0) {
if ((mpts->mpts_connid == 2) ||
(mpts->mpts_flags & MPTSF_MP_DEGRADED))
mptcplog((LOG_DEBUG, "%s: cid %d wrote %d %d\n",
__func__, mpts->mpts_connid, tot_sent,
sb_cc));
MPT_LOCK(mp_tp);
mptcp_cancel_timer(mp_tp, MPTT_REXMT);
MPT_UNLOCK(mp_tp);
}
out:
return (error);
}
static void
mptcp_subflow_eupcall(struct socket *so, void *arg, uint32_t events)
{
#pragma unused(so)
struct mptsub *mpts = arg;
struct mptses *mpte = mpts->mpts_mpte;
VERIFY(mpte != NULL);
lck_mtx_lock(&mpte->mpte_thread_lock);
atomic_bitset_32(&mpts->mpts_evctl, events);
mptcp_thread_signal_locked(mpte);
lck_mtx_unlock(&mpte->mpte_thread_lock);
}
static ev_ret_t
mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts)
{
uint32_t events;
ev_ret_t ret = MPTS_EVRET_OK;
MPTE_LOCK_ASSERT_HELD(mpte);
MPTS_LOCK_ASSERT_HELD(mpts);
if ((events = mpts->mpts_evctl) == 0)
return (ret);
if (events & (SO_FILT_HINT_CONNRESET|SO_FILT_HINT_MUSTRST|
SO_FILT_HINT_CANTRCVMORE|SO_FILT_HINT_CANTSENDMORE|
SO_FILT_HINT_TIMEOUT|SO_FILT_HINT_NOSRCADDR|
SO_FILT_HINT_IFDENIED|SO_FILT_HINT_SUSPEND|
SO_FILT_HINT_DISCONNECTED)) {
events |= SO_FILT_HINT_MPFAILOVER;
}
DTRACE_MPTCP3(subflow__events, struct mptses *, mpte,
struct mptsub *, mpts, uint32_t, events);
mptcplog2((LOG_DEBUG, "%s: cid %d events=%b\n", __func__,
mpts->mpts_connid, events, SO_FILT_HINT_BITS));
if ((events & SO_FILT_HINT_MPFAILOVER) && (ret >= MPTS_EVRET_OK)) {
ev_ret_t error = mptcp_subflow_failover_ev(mpte, mpts);
events &= ~SO_FILT_HINT_MPFAILOVER;
ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
}
if ((events & SO_FILT_HINT_CONNRESET) && (ret >= MPTS_EVRET_OK)) {
ev_ret_t error = mptcp_subflow_connreset_ev(mpte, mpts);
events &= ~SO_FILT_HINT_CONNRESET;
ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
}
if ((events & SO_FILT_HINT_MUSTRST) && (ret >= MPTS_EVRET_OK)) {
ev_ret_t error = mptcp_subflow_mustrst_ev(mpte, mpts);
events &= ~SO_FILT_HINT_MUSTRST;
ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
}
if ((events & SO_FILT_HINT_CANTRCVMORE) && (ret >= MPTS_EVRET_OK)) {
ev_ret_t error = mptcp_subflow_cantrcvmore_ev(mpte, mpts);
events &= ~SO_FILT_HINT_CANTRCVMORE;
ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
}
if ((events & SO_FILT_HINT_CANTSENDMORE) && (ret >= MPTS_EVRET_OK)) {
ev_ret_t error = mptcp_subflow_cantsendmore_ev(mpte, mpts);
events &= ~SO_FILT_HINT_CANTSENDMORE;
ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
}
if ((events & SO_FILT_HINT_TIMEOUT) && (ret >= MPTS_EVRET_OK)) {
ev_ret_t error = mptcp_subflow_timeout_ev(mpte, mpts);
events &= ~SO_FILT_HINT_TIMEOUT;
ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
}
if ((events & SO_FILT_HINT_NOSRCADDR) && (ret >= MPTS_EVRET_OK)) {
ev_ret_t error = mptcp_subflow_nosrcaddr_ev(mpte, mpts);
events &= ~SO_FILT_HINT_NOSRCADDR;
ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
}
if ((events & SO_FILT_HINT_IFDENIED) && (ret >= MPTS_EVRET_OK)) {
ev_ret_t error = mptcp_subflow_ifdenied_ev(mpte, mpts);
events &= ~SO_FILT_HINT_IFDENIED;
ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
}
if ((events & SO_FILT_HINT_SUSPEND) && (ret >= MPTS_EVRET_OK)) {
ev_ret_t error = mptcp_subflow_suspend_ev(mpte, mpts);
events &= ~SO_FILT_HINT_SUSPEND;
ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
}
if ((events & SO_FILT_HINT_RESUME) && (ret >= MPTS_EVRET_OK)) {
ev_ret_t error = mptcp_subflow_resume_ev(mpte, mpts);
events &= ~SO_FILT_HINT_RESUME;
ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
}
if ((events & SO_FILT_HINT_CONNECTED) && (ret >= MPTS_EVRET_OK)) {
ev_ret_t error = mptcp_subflow_connected_ev(mpte, mpts);
events &= ~SO_FILT_HINT_CONNECTED;
ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
}
if ((events & SO_FILT_HINT_MPSTATUS) && (ret >= MPTS_EVRET_OK)) {
ev_ret_t error = mptcp_subflow_mpstatus_ev(mpte, mpts);
events &= ~SO_FILT_HINT_MPSTATUS;
ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
}
if ((events & SO_FILT_HINT_DISCONNECTED) && (ret >= MPTS_EVRET_OK)) {
ev_ret_t error = mptcp_subflow_disconnected_ev(mpte, mpts);
events &= ~SO_FILT_HINT_DISCONNECTED;
ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
}
if (events != 0 || ret < MPTS_EVRET_OK) {
mptcplog((LOG_ERR, "%s%s: cid %d evret %s (%d)"
" unhandled events=%b\n",
(events != 0) ? "MPTCP_ERROR " : "",
__func__, mpts->mpts_connid,
mptcp_evret2str(ret), ret, events, SO_FILT_HINT_BITS));
}
atomic_bitclear_32(&mpts->mpts_evctl, ~events);
return (ret);
}
static ev_ret_t
mptcp_subflow_connreset_ev(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *mp_so, *so;
struct mptcb *mp_tp;
boolean_t linger;
MPTE_LOCK_ASSERT_HELD(mpte);
MPTS_LOCK_ASSERT_HELD(mpts);
VERIFY(mpte->mpte_mppcb != NULL);
mp_so = mpte->mpte_mppcb->mpp_socket;
mp_tp = mpte->mpte_mptcb;
so = mpts->mpts_socket;
linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) &&
!(mp_so->so_flags & SOF_PCBCLEARING));
mptcplog((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__,
mpts->mpts_connid, (linger ? "YES" : "NO")));
if (mpts->mpts_soerror == 0)
mpts->mpts_soerror = ECONNREFUSED;
mptcp_subflow_disconnect(mpte, mpts, !linger);
MPT_LOCK(mp_tp);
if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
mp_so->so_error = ECONNREFUSED;
}
MPT_UNLOCK(mp_tp);
return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE);
}
static ev_ret_t
mptcp_subflow_cantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *so;
MPTE_LOCK_ASSERT_HELD(mpte);
MPTS_LOCK_ASSERT_HELD(mpts);
so = mpts->mpts_socket;
mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid));
return (MPTS_EVRET_OK);
}
static ev_ret_t
mptcp_subflow_cantsendmore_ev(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *so;
MPTE_LOCK_ASSERT_HELD(mpte);
MPTS_LOCK_ASSERT_HELD(mpts);
so = mpts->mpts_socket;
mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid));
return (MPTS_EVRET_OK);
}
static ev_ret_t
mptcp_subflow_timeout_ev(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *mp_so, *so;
struct mptcb *mp_tp;
boolean_t linger;
MPTE_LOCK_ASSERT_HELD(mpte);
MPTS_LOCK_ASSERT_HELD(mpts);
VERIFY(mpte->mpte_mppcb != NULL);
mp_so = mpte->mpte_mppcb->mpp_socket;
mp_tp = mpte->mpte_mptcb;
so = mpts->mpts_socket;
linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) &&
!(mp_so->so_flags & SOF_PCBCLEARING));
mptcplog((LOG_NOTICE, "%s: cid %d [linger %s]\n", __func__,
mpts->mpts_connid, (linger ? "YES" : "NO")));
if (mpts->mpts_soerror == 0)
mpts->mpts_soerror = ETIMEDOUT;
mptcp_subflow_disconnect(mpte, mpts, !linger);
MPT_LOCK(mp_tp);
if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
mp_so->so_error = ETIMEDOUT;
}
MPT_UNLOCK(mp_tp);
return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE);
}
static ev_ret_t
mptcp_subflow_nosrcaddr_ev(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *mp_so, *so;
struct mptcb *mp_tp;
boolean_t linger;
struct tcpcb *tp = NULL;
MPTE_LOCK_ASSERT_HELD(mpte);
MPTS_LOCK_ASSERT_HELD(mpts);
VERIFY(mpte->mpte_mppcb != NULL);
mp_so = mpte->mpte_mppcb->mpp_socket;
mp_tp = mpte->mpte_mptcb;
so = mpts->mpts_socket;
tp = intotcpcb(sotoinpcb(so));
mpte->mpte_flags |= MPTE_SND_REM_ADDR;
mpte->mpte_lost_aid = tp->t_local_aid;
linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) &&
!(mp_so->so_flags & SOF_PCBCLEARING));
mptcplog((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__,
mpts->mpts_connid, (linger ? "YES" : "NO")));
if (mpts->mpts_soerror == 0)
mpts->mpts_soerror = EADDRNOTAVAIL;
mptcp_subflow_disconnect(mpte, mpts, !linger);
MPT_LOCK(mp_tp);
if ((mp_tp->mpt_state < MPTCPS_ESTABLISHED) &&
(mp_so->so_flags & SOF_NOADDRAVAIL)) {
mp_so->so_error = EADDRNOTAVAIL;
}
MPT_UNLOCK(mp_tp);
return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE);
}
static ev_ret_t
mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts)
{
struct mptsub *mpts_alt = NULL;
struct socket *so = NULL;
struct socket *mp_so;
int altpath_exists = 0;
MPTE_LOCK_ASSERT_HELD(mpte);
MPTS_LOCK_ASSERT_HELD(mpts);
mp_so = mpte->mpte_mppcb->mpp_socket;
mptcplog2((LOG_NOTICE, "%s: mp_so 0x%llx\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so)));
MPTS_UNLOCK(mpts);
mpts_alt = mptcp_get_subflow(mpte, mpts);
if (mpts_alt == NULL) {
mptcplog2((LOG_WARNING, "%s: no alternate path\n", __func__));
MPTS_LOCK(mpts);
goto done;
}
MPTS_LOCK(mpts_alt);
altpath_exists = 1;
so = mpts_alt->mpts_socket;
if (mpts_alt->mpts_flags & MPTSF_FAILINGOVER) {
socket_lock(so, 1);
if (so->so_snd.sb_cc == 0) {
so->so_flags &= ~SOF_MP_TRYFAILOVER;
mpts_alt->mpts_flags &= ~MPTSF_FAILINGOVER;
} else {
altpath_exists = 0;
}
socket_unlock(so, 1);
}
if (altpath_exists) {
mpts_alt->mpts_flags |= MPTSF_ACTIVE;
struct mptcb *mp_tp = mpte->mpte_mptcb;
MPT_LOCK(mp_tp);
mpts_alt->mpts_sndnxt = mp_tp->mpt_snduna;
MPT_UNLOCK(mp_tp);
mpte->mpte_active_sub = mpts_alt;
socket_lock(so, 1);
sowwakeup(so);
socket_unlock(so, 1);
}
MPTS_UNLOCK(mpts_alt);
if (altpath_exists) {
soevent(mp_so,
SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED);
mptcplog((LOG_NOTICE, "%s: mp_so 0x%llx switched from "
"%d to %d\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mpts->mpts_connid, mpts_alt->mpts_connid));
tcpstat.tcps_mp_switches++;
}
MPTS_LOCK(mpts);
if (altpath_exists) {
mpts->mpts_flags |= MPTSF_FAILINGOVER;
mpts->mpts_flags &= ~MPTSF_ACTIVE;
} else {
so = mpts->mpts_socket;
socket_lock(so, 1);
so->so_flags &= ~SOF_MP_TRYFAILOVER;
socket_unlock(so, 1);
}
done:
MPTS_LOCK_ASSERT_HELD(mpts);
return (MPTS_EVRET_OK);
}
static ev_ret_t
mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *mp_so, *so;
struct mptcb *mp_tp;
boolean_t linger;
MPTE_LOCK_ASSERT_HELD(mpte);
MPTS_LOCK_ASSERT_HELD(mpts);
VERIFY(mpte->mpte_mppcb != NULL);
mp_so = mpte->mpte_mppcb->mpp_socket;
mp_tp = mpte->mpte_mptcb;
so = mpts->mpts_socket;
linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) &&
!(mp_so->so_flags & SOF_PCBCLEARING));
mptcplog((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__,
mpts->mpts_connid, (linger ? "YES" : "NO")));
if (mpts->mpts_soerror == 0)
mpts->mpts_soerror = EHOSTUNREACH;
mptcp_subflow_disconnect(mpte, mpts, !linger);
MPTS_UNLOCK(mpts);
soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED);
MPT_LOCK(mp_tp);
if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
mp_so->so_error = EHOSTUNREACH;
}
MPT_UNLOCK(mp_tp);
MPTS_LOCK(mpts);
return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE);
}
static ev_ret_t
mptcp_subflow_suspend_ev(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *so;
MPTE_LOCK_ASSERT_HELD(mpte);
MPTS_LOCK_ASSERT_HELD(mpts);
so = mpts->mpts_socket;
mpts->mpts_flags |= MPTSF_SUSPENDED;
mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__,
mpts->mpts_connid));
return (MPTS_EVRET_OK);
}
static ev_ret_t
mptcp_subflow_resume_ev(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *so;
MPTE_LOCK_ASSERT_HELD(mpte);
MPTS_LOCK_ASSERT_HELD(mpts);
so = mpts->mpts_socket;
mpts->mpts_flags &= ~MPTSF_SUSPENDED;
mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid));
return (MPTS_EVRET_OK);
}
static ev_ret_t
mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts)
{
char buf0[MAX_IPv6_STR_LEN], buf1[MAX_IPv6_STR_LEN];
struct sockaddr_entry *src_se, *dst_se;
struct sockaddr_storage src;
struct socket *mp_so, *so;
struct mptcb *mp_tp;
struct ifnet *outifp;
int af, error = 0;
boolean_t mpok = FALSE;
MPTE_LOCK_ASSERT_HELD(mpte);
VERIFY(mpte->mpte_mppcb != NULL);
mp_so = mpte->mpte_mppcb->mpp_socket;
mp_tp = mpte->mpte_mptcb;
MPTS_LOCK_ASSERT_HELD(mpts);
so = mpts->mpts_socket;
af = mpts->mpts_family;
if (mpts->mpts_flags & MPTSF_CONNECTED)
return (MPTS_EVRET_OK);
if ((mpts->mpts_flags & MPTSF_DISCONNECTED) ||
(mpts->mpts_flags & MPTSF_DISCONNECTING)) {
return (MPTS_EVRET_OK);
}
socket_lock(so, 0);
if (so->so_state & SS_ISDISCONNECTED) {
socket_unlock(so, 0);
return (MPTS_EVRET_OK);
}
mpts->mpts_soerror = 0;
mpts->mpts_flags &= ~MPTSF_CONNECTING;
mpts->mpts_flags |= MPTSF_CONNECTED;
if (sototcpcb(so)->t_mpflags & TMPF_MPTCP_TRUE)
mpts->mpts_flags |= MPTSF_MP_CAPABLE;
VERIFY(mpts->mpts_dst_sl != NULL);
dst_se = TAILQ_FIRST(&mpts->mpts_dst_sl->sl_head);
VERIFY(dst_se != NULL && dst_se->se_addr != NULL &&
dst_se->se_addr->sa_family == af);
VERIFY(mpts->mpts_src_sl != NULL);
src_se = TAILQ_FIRST(&mpts->mpts_src_sl->sl_head);
VERIFY(src_se != NULL && src_se->se_addr != NULL &&
src_se->se_addr->sa_family == af);
switch (af) {
case AF_INET: {
error = in_getsockaddr_s(so, &src);
if (error == 0) {
struct sockaddr_in *ms = SIN(src_se->se_addr);
struct sockaddr_in *s = SIN(&src);
VERIFY(s->sin_len == ms->sin_len);
VERIFY(ms->sin_family == AF_INET);
if ((mpts->mpts_flags & MPTSF_BOUND_IP) &&
bcmp(&ms->sin_addr, &s->sin_addr,
sizeof (ms->sin_addr)) != 0) {
mptcplog((LOG_ERR, "%s: cid %d local "
"address %s (expected %s)\n", __func__,
mpts->mpts_connid, inet_ntop(AF_INET,
(void *)&s->sin_addr.s_addr, buf0,
sizeof (buf0)), inet_ntop(AF_INET,
(void *)&ms->sin_addr.s_addr, buf1,
sizeof (buf1))));
}
bcopy(s, ms, sizeof (*s));
}
break;
}
#if INET6
case AF_INET6: {
error = in6_getsockaddr_s(so, &src);
if (error == 0) {
struct sockaddr_in6 *ms = SIN6(src_se->se_addr);
struct sockaddr_in6 *s = SIN6(&src);
VERIFY(s->sin6_len == ms->sin6_len);
VERIFY(ms->sin6_family == AF_INET6);
if ((mpts->mpts_flags & MPTSF_BOUND_IP) &&
bcmp(&ms->sin6_addr, &s->sin6_addr,
sizeof (ms->sin6_addr)) != 0) {
mptcplog((LOG_ERR, "%s: cid %d local "
"address %s (expected %s)\n", __func__,
mpts->mpts_connid, inet_ntop(AF_INET6,
(void *)&s->sin6_addr, buf0,
sizeof (buf0)), inet_ntop(AF_INET6,
(void *)&ms->sin6_addr, buf1,
sizeof (buf1))));
}
bcopy(s, ms, sizeof (*s));
}
break;
}
#endif
default:
VERIFY(0);
}
if (error != 0) {
mptcplog((LOG_ERR, "%s: cid %d getsockaddr failed (%d)\n",
__func__, mpts->mpts_connid, error));
}
outifp = sotoinpcb(so)->inp_last_outifp;
if (mpts->mpts_flags & MPTSF_BOUND_IF) {
VERIFY(mpts->mpts_outif != NULL);
if (mpts->mpts_outif != outifp) {
mptcplog((LOG_ERR, "%s: cid %d outif %s "
"(expected %s)\n", __func__, mpts->mpts_connid,
((outifp != NULL) ? outifp->if_xname : "NULL"),
mpts->mpts_outif->if_xname));
if (outifp == NULL)
outifp = mpts->mpts_outif;
}
} else {
mpts->mpts_outif = outifp;
}
socket_unlock(so, 0);
mptcplog((LOG_DEBUG, "%s: cid %d outif %s %s[%d] -> %s[%d] "
"is %s\n", __func__, mpts->mpts_connid, ((outifp != NULL) ?
outifp->if_xname : "NULL"), inet_ntop(af, (af == AF_INET) ?
(void *)&SIN(src_se->se_addr)->sin_addr.s_addr :
(void *)&SIN6(src_se->se_addr)->sin6_addr, buf0, sizeof (buf0)),
((af == AF_INET) ? ntohs(SIN(src_se->se_addr)->sin_port) :
ntohs(SIN6(src_se->se_addr)->sin6_port)),
inet_ntop(af, ((af == AF_INET) ?
(void *)&SIN(dst_se->se_addr)->sin_addr.s_addr :
(void *)&SIN6(dst_se->se_addr)->sin6_addr), buf1, sizeof (buf1)),
((af == AF_INET) ? ntohs(SIN(dst_se->se_addr)->sin_port) :
ntohs(SIN6(dst_se->se_addr)->sin6_port)),
((mpts->mpts_flags & MPTSF_MP_CAPABLE) ?
"MPTCP capable" : "a regular TCP")));
mpok = (mpts->mpts_flags & MPTSF_MP_CAPABLE);
MPTS_UNLOCK(mpts);
soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED);
MPT_LOCK(mp_tp);
if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
if (!mpok) {
mp_tp->mpt_flags |= MPTCPF_PEEL_OFF;
(void) mptcp_drop(mpte, mp_tp, EPROTO);
MPT_UNLOCK(mp_tp);
} else {
if (mptcp_init_authparms(mp_tp) != 0) {
mp_tp->mpt_flags |= MPTCPF_PEEL_OFF;
(void) mptcp_drop(mpte, mp_tp, EPROTO);
MPT_UNLOCK(mp_tp);
mpok = FALSE;
} else {
mp_tp->mpt_state = MPTCPS_ESTABLISHED;
mpte->mpte_associd = mpts->mpts_connid;
DTRACE_MPTCP2(state__change,
struct mptcb *, mp_tp,
uint32_t, 0 );
mptcp_init_statevars(mp_tp);
MPT_UNLOCK(mp_tp);
(void) mptcp_setconnorder(mpte,
mpts->mpts_connid, 1);
soisconnected(mp_so);
}
}
MPTS_LOCK(mpts);
if (mpok) {
mpts->mpts_rel_seq = 1;
mpts->mpts_flags |= MPTSF_MPCAP_CTRSET;
mpte->mpte_nummpcapflows++;
MPT_LOCK_SPIN(mp_tp);
mpts->mpts_sndnxt = mp_tp->mpt_snduna;
MPT_UNLOCK(mp_tp);
}
} else if (mpok) {
MPT_UNLOCK(mp_tp);
MPTS_LOCK(mpts);
mpts->mpts_flags |= MPTSF_MPCAP_CTRSET;
mpte->mpte_nummpcapflows++;
mpts->mpts_rel_seq = 1;
MPT_LOCK_SPIN(mp_tp);
mpts->mpts_sndnxt = mp_tp->mpt_snduna;
MPT_UNLOCK(mp_tp);
}
MPTS_LOCK_ASSERT_HELD(mpts);
return (MPTS_EVRET_OK);
}
static ev_ret_t
mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *mp_so, *so;
struct mptcb *mp_tp;
boolean_t linger;
MPTE_LOCK_ASSERT_HELD(mpte);
MPTS_LOCK_ASSERT_HELD(mpts);
VERIFY(mpte->mpte_mppcb != NULL);
mp_so = mpte->mpte_mppcb->mpp_socket;
mp_tp = mpte->mpte_mptcb;
so = mpts->mpts_socket;
linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) &&
!(mp_so->so_flags & SOF_PCBCLEARING));
mptcplog2((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__,
mpts->mpts_connid, (linger ? "YES" : "NO")));
if (mpts->mpts_flags & MPTSF_DISCONNECTED)
return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE);
mpts->mpts_flags &= ~(MPTSF_CONNECTING|MPTSF_CONNECT_PENDING|
MPTSF_CONNECTED|MPTSF_DISCONNECTING|MPTSF_PREFERRED|
MPTSF_MP_CAPABLE|MPTSF_MP_READY|MPTSF_MP_DEGRADED|
MPTSF_SUSPENDED|MPTSF_ACTIVE);
mpts->mpts_flags |= MPTSF_DISCONNECTED;
MPTS_UNLOCK(mpts);
soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED);
if (mpts->mpts_flags & MPTSF_MPCAP_CTRSET) {
mpte->mpte_nummpcapflows--;
mpts->mpts_flags &= ~MPTSF_MPCAP_CTRSET;
}
MPT_LOCK(mp_tp);
if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
MPT_UNLOCK(mp_tp);
soisdisconnected(mp_so);
} else {
MPT_UNLOCK(mp_tp);
}
MPTS_LOCK(mpts);
return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE);
}
static ev_ret_t
mptcp_subflow_mpstatus_ev(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *mp_so, *so;
struct mptcb *mp_tp;
ev_ret_t ret = MPTS_EVRET_OK_UPDATE;
MPTE_LOCK_ASSERT_HELD(mpte);
VERIFY(mpte->mpte_mppcb != NULL);
mp_so = mpte->mpte_mppcb->mpp_socket;
mp_tp = mpte->mpte_mptcb;
MPTS_LOCK_ASSERT_HELD(mpts);
so = mpts->mpts_socket;
socket_lock(so, 0);
MPT_LOCK(mp_tp);
if (sototcpcb(so)->t_mpflags & TMPF_MPTCP_TRUE)
mpts->mpts_flags |= MPTSF_MP_CAPABLE;
else
mpts->mpts_flags &= ~MPTSF_MP_CAPABLE;
if (sototcpcb(so)->t_mpflags & TMPF_TCP_FALLBACK) {
if (mpts->mpts_flags & MPTSF_MP_DEGRADED)
goto done;
mpts->mpts_flags |= MPTSF_MP_DEGRADED;
}
else
mpts->mpts_flags &= ~MPTSF_MP_DEGRADED;
if (sototcpcb(so)->t_mpflags & TMPF_MPTCP_READY)
mpts->mpts_flags |= MPTSF_MP_READY;
else
mpts->mpts_flags &= ~MPTSF_MP_READY;
if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
mp_tp->mpt_flags |= MPTCPF_FALLBACK_TO_TCP;
mp_tp->mpt_flags &= ~MPTCPF_JOIN_READY;
}
if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
VERIFY(!(mp_tp->mpt_flags & MPTCPF_JOIN_READY));
ret = MPTS_EVRET_DISCONNECT_FALLBACK;
} else if (mpts->mpts_flags & MPTSF_MP_READY) {
mp_tp->mpt_flags |= MPTCPF_JOIN_READY;
ret = MPTS_EVRET_CONNECT_PENDING;
}
mptcplog2((LOG_DEBUG, "%s: mp_so 0x%llx mpt_flags=%b cid %d "
"mptsf=%b\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mpte->mpte_mppcb->mpp_socket),
mp_tp->mpt_flags, MPTCPF_BITS, mpts->mpts_connid,
mpts->mpts_flags, MPTSF_BITS));
done:
MPT_UNLOCK(mp_tp);
socket_unlock(so, 0);
return (ret);
}
static ev_ret_t
mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *mp_so, *so;
struct mptcb *mp_tp;
boolean_t linger;
MPTE_LOCK_ASSERT_HELD(mpte);
MPTS_LOCK_ASSERT_HELD(mpts);
VERIFY(mpte->mpte_mppcb != NULL);
mp_so = mpte->mpte_mppcb->mpp_socket;
mp_tp = mpte->mpte_mptcb;
so = mpts->mpts_socket;
linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) &&
!(mp_so->so_flags & SOF_PCBCLEARING));
if (mpts->mpts_soerror == 0)
mpts->mpts_soerror = ECONNABORTED;
so->so_error = ECONNABORTED;
socket_lock(so, 0);
struct tcptemp *t_template;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp = NULL;
tp = intotcpcb(inp);
t_template = tcp_maketemplate(tp);
if (t_template) {
unsigned int ifscope, nocell = 0;
if (inp->inp_flags & INP_BOUND_IF)
ifscope = inp->inp_boundifp->if_index;
else
ifscope = IFSCOPE_NONE;
if (inp->inp_flags & INP_NO_IFT_CELLULAR)
nocell = 1;
tcp_respond(tp, t_template->tt_ipgen,
&t_template->tt_t, (struct mbuf *)NULL,
tp->rcv_nxt, tp->snd_una, TH_RST, ifscope, nocell);
(void) m_free(dtom(t_template));
mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx cid %d \n",
__func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
so, mpts->mpts_connid));
}
socket_unlock(so, 0);
mptcp_subflow_disconnect(mpte, mpts, !linger);
MPTS_UNLOCK(mpts);
soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED);
MPT_LOCK(mp_tp);
if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
mp_so->so_error = ECONNABORTED;
}
MPT_UNLOCK(mp_tp);
MPTS_LOCK(mpts);
return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE);
}
static const char *
mptcp_evret2str(ev_ret_t ret)
{
const char *c = "UNKNOWN";
switch (ret) {
case MPTS_EVRET_DELETE:
c = "MPTS_EVRET_DELETE";
break;
case MPTS_EVRET_CONNECT_PENDING:
c = "MPTS_EVRET_CONNECT_PENDING";
break;
case MPTS_EVRET_DISCONNECT_FALLBACK:
c = "MPTS_EVRET_DISCONNECT_FALLBACK";
break;
case MPTS_EVRET_OK:
c = "MPTS_EVRET_OK";
break;
case MPTS_EVRET_OK_UPDATE:
c = "MPTS_EVRET_OK_UPDATE";
break;
}
return (c);
}
void
mptcp_subflow_addref(struct mptsub *mpts, int locked)
{
if (!locked)
MPTS_LOCK(mpts);
else
MPTS_LOCK_ASSERT_HELD(mpts);
if (++mpts->mpts_refcnt == 0) {
panic("%s: mpts %p wraparound refcnt\n", __func__, mpts);
}
if (!locked)
MPTS_UNLOCK(mpts);
}
void
mptcp_subflow_remref(struct mptsub *mpts)
{
MPTS_LOCK(mpts);
if (mpts->mpts_refcnt == 0) {
panic("%s: mpts %p negative refcnt\n", __func__, mpts);
}
if (--mpts->mpts_refcnt > 0) {
MPTS_UNLOCK(mpts);
return;
}
mptcp_subflow_free(mpts);
}
int
mptcp_subflow_sosetopt(struct mptses *mpte, struct socket *so,
struct mptopt *mpo)
{
struct socket *mp_so;
struct sockopt sopt;
char buf[32];
int error;
VERIFY(mpo->mpo_flags & MPOF_SUBFLOW_OK);
mpo->mpo_flags &= ~MPOF_INTERIM;
MPTE_LOCK_ASSERT_HELD(mpte);
mp_so = mpte->mpte_mppcb->mpp_socket;
bzero(&sopt, sizeof (sopt));
sopt.sopt_dir = SOPT_SET;
sopt.sopt_level = mpo->mpo_level;
sopt.sopt_name = mpo->mpo_name;
sopt.sopt_val = CAST_USER_ADDR_T(&mpo->mpo_intval);
sopt.sopt_valsize = sizeof (int);
sopt.sopt_p = kernproc;
error = sosetoptlock(so, &sopt, 0);
if (error == 0) {
mptcplog2((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s "
"val %d set successful\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name,
buf, sizeof (buf)), mpo->mpo_intval));
} else {
mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s "
"val %d set error %d\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name,
buf, sizeof (buf)), mpo->mpo_intval, error));
}
return (error);
}
int
mptcp_subflow_sogetopt(struct mptses *mpte, struct socket *so,
struct mptopt *mpo)
{
struct socket *mp_so;
struct sockopt sopt;
char buf[32];
int error;
VERIFY(mpo->mpo_flags & MPOF_SUBFLOW_OK);
MPTE_LOCK_ASSERT_HELD(mpte);
mp_so = mpte->mpte_mppcb->mpp_socket;
bzero(&sopt, sizeof (sopt));
sopt.sopt_dir = SOPT_GET;
sopt.sopt_level = mpo->mpo_level;
sopt.sopt_name = mpo->mpo_name;
sopt.sopt_val = CAST_USER_ADDR_T(&mpo->mpo_intval);
sopt.sopt_valsize = sizeof (int);
sopt.sopt_p = kernproc;
error = sogetoptlock(so, &sopt, 0);
if (error == 0) {
mptcplog2((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s "
"val %d get successful\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name,
buf, sizeof (buf)), mpo->mpo_intval));
} else {
mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s get error %d\n",
__func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mptcp_sopt2str(mpo->mpo_level,
mpo->mpo_name, buf, sizeof (buf)), error));
}
return (error);
}
static uint32_t
mptcp_gc(struct mppcbinfo *mppi)
{
struct mppcb *mpp, *tmpp;
uint32_t active = 0;
lck_mtx_assert(&mppi->mppi_lock, LCK_MTX_ASSERT_OWNED);
mptcplog3((LOG_DEBUG, "%s: running\n", __func__));
TAILQ_FOREACH_SAFE(mpp, &mppi->mppi_pcbs, mpp_entry, tmpp) {
struct socket *mp_so;
struct mptses *mpte;
struct mptcb *mp_tp;
VERIFY(mpp->mpp_flags & MPP_ATTACHED);
mp_so = mpp->mpp_socket;
VERIFY(mp_so != NULL);
mpte = mptompte(mpp);
VERIFY(mpte != NULL);
mp_tp = mpte->mpte_mptcb;
VERIFY(mp_tp != NULL);
mptcplog3((LOG_DEBUG, "%s: mp_so 0x%llx found "
"(u=%d,r=%d,s=%d)\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mp_so->so_usecount,
mp_so->so_retaincnt, mpp->mpp_state));
if (!lck_mtx_try_lock(&mpp->mpp_lock)) {
mptcplog3((LOG_DEBUG, "%s: mp_so 0x%llx skipped "
"(u=%d,r=%d)\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mp_so->so_usecount, mp_so->so_retaincnt));
active++;
continue;
}
if (mp_so->so_usecount > 1) {
boolean_t wakeup = FALSE;
struct mptsub *mpts, *tmpts;
mptcplog3((LOG_DEBUG, "%s: mp_so 0x%llx skipped "
"[u=%d,r=%d] %d %d\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mp_so->so_usecount, mp_so->so_retaincnt,
mp_tp->mpt_gc_ticks,
mp_tp->mpt_state));
MPT_LOCK(mp_tp);
if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1) {
if (mp_tp->mpt_gc_ticks > 0)
mp_tp->mpt_gc_ticks--;
if (mp_tp->mpt_gc_ticks == 0) {
wakeup = TRUE;
if (mp_tp->mpt_localkey != NULL) {
mptcp_free_key(
mp_tp->mpt_localkey);
mp_tp->mpt_localkey = NULL;
}
}
}
MPT_UNLOCK(mp_tp);
if (wakeup) {
TAILQ_FOREACH_SAFE(mpts,
&mpte->mpte_subflows, mpts_entry, tmpts) {
MPTS_LOCK(mpts);
mpts->mpts_flags |= MPTSF_DELETEOK;
if (mpts->mpts_soerror == 0)
mpts->mpts_soerror = ETIMEDOUT;
mptcp_subflow_eupcall(mpts->mpts_socket,
mpts, SO_FILT_HINT_DISCONNECTED);
MPTS_UNLOCK(mpts);
}
}
lck_mtx_unlock(&mpp->mpp_lock);
active++;
continue;
}
if (mpp->mpp_state != MPPCB_STATE_DEAD) {
mptcplog3((LOG_DEBUG, "%s: mp_so 0x%llx skipped "
"[u=%d,r=%d,s=%d]\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mp_so->so_usecount, mp_so->so_retaincnt,
mpp->mpp_state));
lck_mtx_unlock(&mpp->mpp_lock);
active++;
continue;
}
if (mp_so->so_usecount == 1) {
mptcplog2((LOG_DEBUG, "%s: mp_so 0x%llx scheduled for "
"termination [u=%d,r=%d]\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mp_so->so_usecount, mp_so->so_retaincnt));
mptcp_thread_terminate_signal(mpte);
lck_mtx_unlock(&mpp->mpp_lock);
active++;
continue;
}
mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx destroyed [u=%d,r=%d]\n",
__func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mp_so->so_usecount, mp_so->so_retaincnt));
DTRACE_MPTCP4(dispose, struct socket *, mp_so,
struct sockbuf *, &mp_so->so_rcv,
struct sockbuf *, &mp_so->so_snd,
struct mppcb *, mpp);
mp_pcbdispose(mpp);
}
return (active);
}
struct mptses *
mptcp_drop(struct mptses *mpte, struct mptcb *mp_tp, int errno)
{
struct socket *mp_so;
MPTE_LOCK_ASSERT_HELD(mpte);
MPT_LOCK_ASSERT_HELD(mp_tp);
VERIFY(mpte->mpte_mptcb == mp_tp);
mp_so = mpte->mpte_mppcb->mpp_socket;
mp_tp->mpt_state = MPTCPS_CLOSED;
DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp,
uint32_t, 0 );
if (errno == ETIMEDOUT && mp_tp->mpt_softerror != 0)
errno = mp_tp->mpt_softerror;
mp_so->so_error = errno;
return (mptcp_close(mpte, mp_tp));
}
struct mptses *
mptcp_close(struct mptses *mpte, struct mptcb *mp_tp)
{
struct socket *mp_so;
struct mptsub *mpts, *tmpts;
MPTE_LOCK_ASSERT_HELD(mpte);
MPT_LOCK_ASSERT_HELD(mp_tp);
VERIFY(mpte->mpte_mptcb == mp_tp);
mp_so = mpte->mpte_mppcb->mpp_socket;
if (mp_tp->mpt_localkey != NULL) {
mptcp_free_key(mp_tp->mpt_localkey);
mp_tp->mpt_localkey = NULL;
}
MPT_UNLOCK(mp_tp);
soisdisconnected(mp_so);
MPT_LOCK(mp_tp);
if (mp_tp->mpt_flags & MPTCPF_PEEL_OFF) {
return (NULL);
}
MPT_UNLOCK(mp_tp);
TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
MPTS_LOCK(mpts);
mptcp_subflow_disconnect(mpte, mpts, TRUE);
MPTS_UNLOCK(mpts);
mptcp_subflow_del(mpte, mpts, TRUE);
}
MPT_LOCK(mp_tp);
return (NULL);
}
void
mptcp_notify_close(struct socket *so)
{
soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED));
}
void
mptcp_thread_signal(struct mptses *mpte)
{
lck_mtx_lock(&mpte->mpte_thread_lock);
mptcp_thread_signal_locked(mpte);
lck_mtx_unlock(&mpte->mpte_thread_lock);
}
static void
mptcp_thread_signal_locked(struct mptses *mpte)
{
lck_mtx_assert(&mpte->mpte_thread_lock, LCK_MTX_ASSERT_OWNED);
mpte->mpte_thread_reqs++;
if (!mpte->mpte_thread_active && mpte->mpte_thread != THREAD_NULL)
wakeup_one((caddr_t)&mpte->mpte_thread);
}
static void
mptcp_thread_terminate_signal(struct mptses *mpte)
{
lck_mtx_lock(&mpte->mpte_thread_lock);
if (mpte->mpte_thread != THREAD_NULL) {
mpte->mpte_thread = THREAD_NULL;
mpte->mpte_thread_reqs++;
if (!mpte->mpte_thread_active)
wakeup_one((caddr_t)&mpte->mpte_thread);
}
lck_mtx_unlock(&mpte->mpte_thread_lock);
}
static void
mptcp_thread_dowork(struct mptses *mpte)
{
struct socket *mp_so;
struct mptsub *mpts, *tmpts;
boolean_t connect_pending = FALSE, disconnect_fallback = FALSE;
boolean_t conninfo_update = FALSE;
MPTE_LOCK(mpte);
VERIFY(mpte->mpte_mppcb != NULL);
mp_so = mpte->mpte_mppcb->mpp_socket;
VERIFY(mp_so != NULL);
TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
ev_ret_t ret;
MPTS_LOCK(mpts);
MPTS_ADDREF_LOCKED(mpts);
mptcp_update_last_owner(mpts, mp_so);
mptcp_subflow_input(mpte, mpts);
ret = mptcp_subflow_events(mpte, mpts);
if (mpts->mpts_flags & MPTSF_ACTIVE) {
mptcplog3((LOG_INFO, "%s: cid %d \n", __func__,
mpts->mpts_connid));
(void) mptcp_subflow_output(mpte, mpts);
}
if (mp_so->so_flags & SOF_PCBCLEARING)
mptcp_subflow_disconnect(mpte, mpts, FALSE);
MPTS_UNLOCK(mpts);
switch (ret) {
case MPTS_EVRET_OK_UPDATE:
conninfo_update = TRUE;
break;
case MPTS_EVRET_OK:
break;
case MPTS_EVRET_DELETE:
if (mptcp_delete_ok(mpte, mpts)) {
mptcp_subflow_del(mpte, mpts, TRUE);
}
break;
case MPTS_EVRET_CONNECT_PENDING:
connect_pending = TRUE;
break;
case MPTS_EVRET_DISCONNECT_FALLBACK:
disconnect_fallback = TRUE;
break;
}
MPTS_REMREF(mpts);
}
if (conninfo_update) {
soevent(mp_so, SO_FILT_HINT_LOCKED |
SO_FILT_HINT_CONNINFO_UPDATED);
}
if (!connect_pending && !disconnect_fallback) {
MPTE_UNLOCK(mpte);
return;
}
TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
MPTS_LOCK(mpts);
if (disconnect_fallback) {
struct socket *so = NULL;
struct inpcb *inp = NULL;
struct tcpcb *tp = NULL;
if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
MPTS_UNLOCK(mpts);
continue;
}
mpts->mpts_flags |= MPTSF_MP_DEGRADED;
if (mpts->mpts_flags & (MPTSF_DISCONNECTING|
MPTSF_DISCONNECTED)) {
MPTS_UNLOCK(mpts);
continue;
}
so = mpts->mpts_socket;
socket_lock(so, 1);
inp = sotoinpcb(so);
tp = intotcpcb(inp);
tp->t_mpflags &=
~(TMPF_MPTCP_READY|TMPF_MPTCP_TRUE);
tp->t_mpflags |= TMPF_TCP_FALLBACK;
if (mpts->mpts_flags & MPTSF_ACTIVE) {
socket_unlock(so, 1);
MPTS_UNLOCK(mpts);
continue;
}
tp->t_mpflags |= TMPF_RESET;
soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
socket_unlock(so, 1);
} else if (connect_pending) {
if (mpts->mpts_flags & MPTSF_CONNECT_PENDING) {
(void) mptcp_subflow_soconnectx(mpte, mpts);
}
}
MPTS_UNLOCK(mpts);
}
MPTE_UNLOCK(mpte);
}
static void
mptcp_thread_func(void *v, wait_result_t w)
{
#pragma unused(w)
struct mptses *mpte = v;
struct timespec *ts = NULL;
VERIFY(mpte != NULL);
lck_mtx_lock_spin(&mpte->mpte_thread_lock);
for (;;) {
lck_mtx_assert(&mpte->mpte_thread_lock, LCK_MTX_ASSERT_OWNED);
if (mpte->mpte_thread != THREAD_NULL) {
(void) msleep(&mpte->mpte_thread,
&mpte->mpte_thread_lock, (PZERO - 1) | PSPIN,
__func__, ts);
}
if (mpte->mpte_thread == THREAD_NULL) {
lck_mtx_unlock(&mpte->mpte_thread_lock);
mptcp_thread_destroy(mpte);
return;
}
mpte->mpte_thread_active = 1;
for (;;) {
uint32_t reqs = mpte->mpte_thread_reqs;
lck_mtx_unlock(&mpte->mpte_thread_lock);
mptcp_thread_dowork(mpte);
lck_mtx_lock_spin(&mpte->mpte_thread_lock);
if (reqs == mpte->mpte_thread_reqs ||
mpte->mpte_thread == THREAD_NULL)
break;
}
mpte->mpte_thread_reqs = 0;
mpte->mpte_thread_active = 0;
}
}
static void
mptcp_thread_destroy(struct mptses *mpte)
{
struct socket *mp_so;
MPTE_LOCK(mpte);
VERIFY(mpte->mpte_thread == THREAD_NULL);
VERIFY(mpte->mpte_mppcb != NULL);
mptcp_sesdestroy(mpte);
mp_so = mpte->mpte_mppcb->mpp_socket;
VERIFY(mp_so != NULL);
VERIFY(mp_so->so_usecount != 0);
mp_so->so_usecount--;
mpte->mpte_mppcb->mpp_flags |= MPP_DEFUNCT;
MPTE_UNLOCK(mpte);
thread_deallocate(current_thread());
thread_terminate(current_thread());
}
int
mptcp_lock(struct socket *mp_so, int refcount, void *lr)
{
struct mppcb *mpp = sotomppcb(mp_so);
void *lr_saved;
if (lr == NULL)
lr_saved = __builtin_return_address(0);
else
lr_saved = lr;
if (mpp == NULL) {
panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__,
mp_so, lr_saved, solockhistory_nr(mp_so));
}
lck_mtx_lock(&mpp->mpp_lock);
if (mp_so->so_usecount < 0) {
panic("%s: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", __func__,
mp_so, mp_so->so_pcb, lr_saved, mp_so->so_usecount,
solockhistory_nr(mp_so));
}
if (refcount != 0)
mp_so->so_usecount++;
mp_so->lock_lr[mp_so->next_lock_lr] = lr_saved;
mp_so->next_lock_lr = (mp_so->next_lock_lr + 1) % SO_LCKDBG_MAX;
return (0);
}
int
mptcp_unlock(struct socket *mp_so, int refcount, void *lr)
{
struct mppcb *mpp = sotomppcb(mp_so);
void *lr_saved;
if (lr == NULL)
lr_saved = __builtin_return_address(0);
else
lr_saved = lr;
if (mpp == NULL) {
panic("%s: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", __func__,
mp_so, mp_so->so_usecount, lr_saved,
solockhistory_nr(mp_so));
}
lck_mtx_assert(&mpp->mpp_lock, LCK_MTX_ASSERT_OWNED);
if (refcount != 0)
mp_so->so_usecount--;
if (mp_so->so_usecount < 0) {
panic("%s: so=%p usecount=%x lrh= %s\n", __func__,
mp_so, mp_so->so_usecount, solockhistory_nr(mp_so));
}
mp_so->unlock_lr[mp_so->next_unlock_lr] = lr_saved;
mp_so->next_unlock_lr = (mp_so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
lck_mtx_unlock(&mpp->mpp_lock);
return (0);
}
lck_mtx_t *
mptcp_getlock(struct socket *mp_so, int locktype)
{
#pragma unused(locktype)
struct mppcb *mpp = sotomppcb(mp_so);
if (mpp == NULL) {
panic("%s: so=%p NULL so_pcb %s\n", __func__, mp_so,
solockhistory_nr(mp_so));
}
if (mp_so->so_usecount < 0) {
panic("%s: so=%p usecount=%x lrh= %s\n", __func__,
mp_so, mp_so->so_usecount, solockhistory_nr(mp_so));
}
return (&mpp->mpp_lock);
}
static void
mptcp_generate_unique_key(struct mptcp_key_entry *key_entry)
{
struct mptcp_key_entry *key_elm;
try_again:
read_random(&key_entry->mkey_value, sizeof (key_entry->mkey_value));
if (key_entry->mkey_value == 0)
goto try_again;
mptcp_do_sha1(&key_entry->mkey_value, key_entry->mkey_digest,
sizeof (key_entry->mkey_digest));
LIST_FOREACH(key_elm, &mptcp_keys_pool, mkey_next) {
if (key_elm->mkey_value == key_entry->mkey_value) {
goto try_again;
}
if (bcmp(key_elm->mkey_digest, key_entry->mkey_digest, 4) ==
0) {
goto try_again;
}
}
}
static mptcp_key_t *
mptcp_reserve_key(void)
{
struct mptcp_key_entry *key_elm;
struct mptcp_key_entry *found_elm = NULL;
lck_mtx_lock(&mptcp_keys_pool.mkph_lock);
LIST_FOREACH(key_elm, &mptcp_keys_pool, mkey_next) {
if (key_elm->mkey_flags == MKEYF_FREE) {
key_elm->mkey_flags = MKEYF_INUSE;
found_elm = key_elm;
break;
}
}
lck_mtx_unlock(&mptcp_keys_pool.mkph_lock);
if (found_elm) {
return (&found_elm->mkey_value);
}
key_elm = (struct mptcp_key_entry *)
zalloc(mptcp_keys_pool.mkph_key_entry_zone);
key_elm->mkey_flags = MKEYF_INUSE;
lck_mtx_lock(&mptcp_keys_pool.mkph_lock);
mptcp_generate_unique_key(key_elm);
LIST_INSERT_HEAD(&mptcp_keys_pool, key_elm, mkey_next);
mptcp_keys_pool.mkph_count += 1;
lck_mtx_unlock(&mptcp_keys_pool.mkph_lock);
return (&key_elm->mkey_value);
}
static caddr_t
mptcp_get_stored_digest(mptcp_key_t *key)
{
struct mptcp_key_entry *key_holder;
caddr_t digest = NULL;
lck_mtx_lock(&mptcp_keys_pool.mkph_lock);
key_holder = (struct mptcp_key_entry *)(void *)((caddr_t)key -
offsetof(struct mptcp_key_entry, mkey_value));
if (key_holder->mkey_flags != MKEYF_INUSE)
panic_plain("%s", __func__);
digest = &key_holder->mkey_digest[0];
lck_mtx_unlock(&mptcp_keys_pool.mkph_lock);
return (digest);
}
void
mptcp_free_key(mptcp_key_t *key)
{
struct mptcp_key_entry *key_holder;
struct mptcp_key_entry *key_elm;
int pt = RandomULong();
mptcplog((LOG_INFO, "%s\n", __func__));
lck_mtx_lock(&mptcp_keys_pool.mkph_lock);
key_holder = (struct mptcp_key_entry *)(void*)((caddr_t)key -
offsetof(struct mptcp_key_entry, mkey_value));
key_holder->mkey_flags = MKEYF_FREE;
LIST_REMOVE(key_holder, mkey_next);
mptcp_keys_pool.mkph_count -= 1;
if (pt & 0x01) {
zfree(mptcp_keys_pool.mkph_key_entry_zone, key_holder);
} else {
int i = 0;
if (mptcp_keys_pool.mkph_count > 1) {
pt = pt % (mptcp_keys_pool.mkph_count - 1);
LIST_FOREACH(key_elm, &mptcp_keys_pool, mkey_next) {
if (++i >= pt) {
LIST_INSERT_AFTER(key_elm, key_holder,
mkey_next);
break;
}
}
if (i < pt)
panic("missed insertion");
} else {
LIST_INSERT_HEAD(&mptcp_keys_pool, key_holder,
mkey_next);
}
mptcp_keys_pool.mkph_count += 1;
}
lck_mtx_unlock(&mptcp_keys_pool.mkph_lock);
}
static void
mptcp_key_pool_init(void)
{
int i;
struct mptcp_key_entry *key_entry;
LIST_INIT(&mptcp_keys_pool);
mptcp_keys_pool.mkph_count = 0;
mptcp_keys_pool.mkph_key_elm_sz = (vm_size_t)
(sizeof (struct mptcp_key_entry));
mptcp_keys_pool.mkph_key_entry_zone = zinit(
mptcp_keys_pool.mkph_key_elm_sz,
MPTCP_MX_KEY_ALLOCS * mptcp_keys_pool.mkph_key_elm_sz,
MPTCP_MX_PREALLOC_ZONE_SZ, "mptkeys");
if (mptcp_keys_pool.mkph_key_entry_zone == NULL) {
panic("%s: unable to allocate MPTCP keys zone \n", __func__);
}
zone_change(mptcp_keys_pool.mkph_key_entry_zone, Z_CALLERACCT, FALSE);
zone_change(mptcp_keys_pool.mkph_key_entry_zone, Z_EXPAND, TRUE);
for (i = 0; i < MPTCP_KEY_PREALLOCS_MX; i++) {
key_entry = (struct mptcp_key_entry *)
zalloc(mptcp_keys_pool.mkph_key_entry_zone);
key_entry->mkey_flags = MKEYF_FREE;
mptcp_generate_unique_key(key_entry);
LIST_INSERT_HEAD(&mptcp_keys_pool, key_entry, mkey_next);
mptcp_keys_pool.mkph_count += 1;
}
lck_mtx_init(&mptcp_keys_pool.mkph_lock, mtcbinfo.mppi_lock_grp,
mtcbinfo.mppi_lock_attr);
}
static void
mptcp_attach_to_subf(struct socket *so, struct mptcb *mp_tp,
connid_t conn_id)
{
struct tcpcb *tp = sototcpcb(so);
struct mptcp_subf_auth_entry *sauth_entry;
MPT_LOCK_ASSERT_NOTHELD(mp_tp);
MPT_LOCK_SPIN(mp_tp);
tp->t_mptcb = mp_tp;
MPT_UNLOCK(mp_tp);
if (mp_tp->mpt_state == MPTCPS_CLOSED) {
tp->t_local_aid = 0;
} else {
tp->t_local_aid = conn_id;
tp->t_mpflags |= (TMPF_PREESTABLISHED | TMPF_JOINED_FLOW);
so->so_flags |= SOF_MP_SEC_SUBFLOW;
}
sauth_entry = zalloc(mpt_subauth_zone);
sauth_entry->msae_laddr_id = tp->t_local_aid;
sauth_entry->msae_raddr_id = 0;
sauth_entry->msae_raddr_rand = 0;
try_again:
sauth_entry->msae_laddr_rand = RandomULong();
if (sauth_entry->msae_laddr_rand == 0)
goto try_again;
LIST_INSERT_HEAD(&mp_tp->mpt_subauth_list, sauth_entry, msae_next);
}
static void
mptcp_detach_mptcb_from_subf(struct mptcb *mp_tp, struct socket *so)
{
struct mptcp_subf_auth_entry *sauth_entry;
struct tcpcb *tp = sototcpcb(so);
int found = 0;
if (tp == NULL)
return;
MPT_LOCK(mp_tp);
LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) {
if (sauth_entry->msae_laddr_id == tp->t_local_aid) {
found = 1;
break;
}
}
if (found) {
LIST_REMOVE(sauth_entry, msae_next);
zfree(mpt_subauth_zone, sauth_entry);
}
tp->t_mptcb = NULL;
MPT_UNLOCK(mp_tp);
}
void
mptcp_get_rands(mptcp_addr_id addr_id, struct mptcb *mp_tp, u_int32_t *lrand,
u_int32_t *rrand)
{
struct mptcp_subf_auth_entry *sauth_entry;
MPT_LOCK_ASSERT_NOTHELD(mp_tp);
MPT_LOCK(mp_tp);
LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) {
if (sauth_entry->msae_laddr_id == addr_id) {
if (lrand)
*lrand = sauth_entry->msae_laddr_rand;
if (rrand)
*rrand = sauth_entry->msae_raddr_rand;
break;
}
}
MPT_UNLOCK(mp_tp);
}
void
mptcp_set_raddr_rand(mptcp_addr_id laddr_id, struct mptcb *mp_tp,
mptcp_addr_id raddr_id, u_int32_t raddr_rand)
{
struct mptcp_subf_auth_entry *sauth_entry;
MPT_LOCK_ASSERT_NOTHELD(mp_tp);
MPT_LOCK(mp_tp);
LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) {
if (sauth_entry->msae_laddr_id == laddr_id) {
if ((sauth_entry->msae_raddr_id != 0) &&
(sauth_entry->msae_raddr_id != raddr_id)) {
mptcplog((LOG_ERR, "MPTCP ERROR %s: mismatched"
" address ids %d %d \n", __func__, raddr_id,
sauth_entry->msae_raddr_id));
MPT_UNLOCK(mp_tp);
return;
}
sauth_entry->msae_raddr_id = raddr_id;
if ((sauth_entry->msae_raddr_rand != 0) &&
(sauth_entry->msae_raddr_rand != raddr_rand)) {
mptcplog((LOG_ERR, "%s: dup SYN_ACK %d %d \n",
__func__, raddr_rand,
sauth_entry->msae_raddr_rand));
MPT_UNLOCK(mp_tp);
return;
}
sauth_entry->msae_raddr_rand = raddr_rand;
MPT_UNLOCK(mp_tp);
return;
}
}
MPT_UNLOCK(mp_tp);
}
static int
mptcp_do_sha1(mptcp_key_t *key, char *sha_digest, int digest_len)
{
SHA1_CTX sha1ctxt;
const unsigned char *sha1_base;
int sha1_size;
if (digest_len != SHA1_RESULTLEN) {
return (FALSE);
}
sha1_base = (const unsigned char *) key;
sha1_size = sizeof (mptcp_key_t);
SHA1Init(&sha1ctxt);
SHA1Update(&sha1ctxt, sha1_base, sha1_size);
SHA1Final(sha_digest, &sha1ctxt);
return (TRUE);
}
void
mptcp_hmac_sha1(mptcp_key_t key1, mptcp_key_t key2,
u_int32_t rand1, u_int32_t rand2, u_char *digest, int digest_len)
{
SHA1_CTX sha1ctxt;
mptcp_key_t key_ipad[8] = {0};
mptcp_key_t key_opad[8] = {0};
u_int32_t data[2];
int i;
bzero(digest, digest_len);
key_ipad[0] = key1;
key_ipad[1] = key2;
key_opad[0] = key1;
key_opad[1] = key2;
data[0] = rand1;
data[1] = rand2;
for (i = 0; i < 8; i++) {
key_ipad[i] ^= 0x3636363636363636;
key_opad[i] ^= 0x5c5c5c5c5c5c5c5c;
}
SHA1Init(&sha1ctxt);
SHA1Update(&sha1ctxt, (unsigned char *)key_ipad, sizeof (key_ipad));
SHA1Update(&sha1ctxt, (unsigned char *)data, sizeof (data));
SHA1Final(digest, &sha1ctxt);
SHA1Init(&sha1ctxt);
SHA1Update(&sha1ctxt, (unsigned char *)key_opad, sizeof (key_opad));
SHA1Update(&sha1ctxt, (unsigned char *)digest, SHA1_RESULTLEN);
SHA1Final(digest, &sha1ctxt);
}
void
mptcp_get_hmac(mptcp_addr_id aid, struct mptcb *mp_tp, u_char *digest,
int digest_len)
{
uint32_t lrand, rrand;
mptcp_key_t localkey, remotekey;
MPT_LOCK_ASSERT_NOTHELD(mp_tp);
if (digest_len != SHA1_RESULTLEN)
return;
lrand = rrand = 0;
mptcp_get_rands(aid, mp_tp, &lrand, &rrand);
MPT_LOCK_SPIN(mp_tp);
localkey = *mp_tp->mpt_localkey;
remotekey = mp_tp->mpt_remotekey;
MPT_UNLOCK(mp_tp);
mptcp_hmac_sha1(localkey, remotekey, lrand, rrand, digest,
digest_len);
}
u_int64_t
mptcp_get_trunced_hmac(mptcp_addr_id aid, struct mptcb *mp_tp)
{
u_char digest[SHA1_RESULTLEN];
u_int64_t trunced_digest;
mptcp_get_hmac(aid, mp_tp, &digest[0], sizeof (digest));
bcopy(digest, &trunced_digest, 8);
return (trunced_digest);
}
int
mptcp_generate_token(char *sha_digest, int sha_digest_len, caddr_t token,
int token_len)
{
VERIFY(token_len == sizeof (u_int32_t));
VERIFY(sha_digest_len == SHA1_RESULTLEN);
bcopy(sha_digest, token, sizeof (u_int32_t));
return (TRUE);
}
int
mptcp_generate_idsn(char *sha_digest, int sha_digest_len, caddr_t idsn,
int idsn_len)
{
VERIFY(idsn_len == sizeof (u_int64_t));
VERIFY(sha_digest_len == SHA1_RESULTLEN);
idsn[7] = sha_digest[12];
idsn[6] = sha_digest[13];
idsn[5] = sha_digest[14];
idsn[4] = sha_digest[15];
idsn[3] = sha_digest[16];
idsn[2] = sha_digest[17];
idsn[1] = sha_digest[18];
idsn[0] = sha_digest[19];
return (TRUE);
}
static int
mptcp_init_authparms(struct mptcb *mp_tp)
{
caddr_t local_digest = NULL;
char remote_digest[MPTCP_SHA1_RESULTLEN];
MPT_LOCK_ASSERT_HELD(mp_tp);
if (mp_tp->mpt_version != MP_DRAFT_VERSION_12)
return (-1);
local_digest = mptcp_get_stored_digest(mp_tp->mpt_localkey);
mptcp_generate_token(local_digest, SHA1_RESULTLEN,
(caddr_t)&mp_tp->mpt_localtoken, sizeof (mp_tp->mpt_localtoken));
mptcp_generate_idsn(local_digest, SHA1_RESULTLEN,
(caddr_t)&mp_tp->mpt_local_idsn, sizeof (u_int64_t));
if (!mptcp_do_sha1(&mp_tp->mpt_remotekey, remote_digest,
SHA1_RESULTLEN)) {
mptcplog((LOG_ERR, "MPTCP ERROR %s: unexpected failure",
__func__));
return (-1);
}
mptcp_generate_token(remote_digest, SHA1_RESULTLEN,
(caddr_t)&mp_tp->mpt_remotetoken, sizeof (mp_tp->mpt_localtoken));
mptcp_generate_idsn(remote_digest, SHA1_RESULTLEN,
(caddr_t)&mp_tp->mpt_remote_idsn, sizeof (u_int64_t));
return (0);
}
static void
mptcp_init_statevars(struct mptcb *mp_tp)
{
MPT_LOCK_ASSERT_HELD(mp_tp);
mp_tp->mpt_snduna = mp_tp->mpt_sndmax = mp_tp->mpt_local_idsn + 1;
mp_tp->mpt_sndnxt = mp_tp->mpt_snduna;
mp_tp->mpt_rcvatmark = mp_tp->mpt_rcvnxt = mp_tp->mpt_remote_idsn + 1;
}
static void
mptcp_conn_properties(struct mptcb *mp_tp)
{
mp_tp->mpt_version = MP_DRAFT_VERSION_12;
if (mptcp_dss_csum)
mp_tp->mpt_flags |= MPTCPF_CHECKSUM;
mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp);
mp_tp->mpt_gc_ticks = MPT_GC_TICKS;
}
mptcp_token_t
mptcp_get_localtoken(void* mptcb_arg)
{
struct mptcb *mp_tp = (struct mptcb *)mptcb_arg;
return (mp_tp->mpt_localtoken);
}
mptcp_token_t
mptcp_get_remotetoken(void* mptcb_arg)
{
struct mptcb *mp_tp = (struct mptcb *)mptcb_arg;
return (mp_tp->mpt_remotetoken);
}
u_int64_t
mptcp_get_localkey(void* mptcb_arg)
{
struct mptcb *mp_tp = (struct mptcb *)mptcb_arg;
if (mp_tp->mpt_localkey != NULL)
return (*mp_tp->mpt_localkey);
else
return (0);
}
u_int64_t
mptcp_get_remotekey(void* mptcb_arg)
{
struct mptcb *mp_tp = (struct mptcb *)mptcb_arg;
return (mp_tp->mpt_remotekey);
}
void
mptcp_send_dfin(struct socket *so)
{
struct tcpcb *tp = NULL;
struct inpcb *inp = NULL;
inp = sotoinpcb(so);
if (!inp)
return;
tp = intotcpcb(inp);
if (!tp)
return;
if (!(tp->t_mpflags & TMPF_RESET))
tp->t_mpflags |= TMPF_SEND_DFIN;
}
void
mptcp_insert_dsn(struct mppcb *mpp, struct mbuf *m)
{
struct mptcb *mp_tp;
if (m == NULL)
return;
mp_tp = &((struct mpp_mtp *)mpp)->mtcb;
MPT_LOCK(mp_tp);
if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
MPT_UNLOCK(mp_tp);
panic("%s: data write before establishment.",
__func__);
return;
}
while (m) {
VERIFY(m->m_flags & M_PKTHDR);
m->m_pkthdr.pkt_flags |= (PKTF_MPTCP | PKTF_MPSO);
m->m_pkthdr.mp_dsn = mp_tp->mpt_sndmax;
m->m_pkthdr.mp_rlen = m_pktlen(m);
mp_tp->mpt_sndmax += m_pktlen(m);
m = m->m_next;
}
MPT_UNLOCK(mp_tp);
}
void
mptcp_preproc_sbdrop(struct mbuf *m, unsigned int len)
{
u_int32_t sub_len = 0;
while (m) {
VERIFY(m->m_flags & M_PKTHDR);
if (m->m_pkthdr.pkt_flags & PKTF_MPTCP) {
sub_len = m->m_pkthdr.mp_rlen;
if (sub_len < len) {
m->m_pkthdr.mp_dsn += sub_len;
if (!(m->m_pkthdr.pkt_flags & PKTF_MPSO)) {
m->m_pkthdr.mp_rseq += sub_len;
}
m->m_pkthdr.mp_rlen = 0;
len -= sub_len;
} else {
m->m_pkthdr.mp_dsn += len;
if (!(m->m_pkthdr.pkt_flags & PKTF_MPSO)) {
m->m_pkthdr.mp_rseq += len;
}
mptcplog3((LOG_INFO,
"%s: %llu %u %d %d\n", __func__,
m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rseq,
m->m_pkthdr.mp_rlen, len));
m->m_pkthdr.mp_rlen -= len;
return;
}
} else {
panic("%s: MPTCP tag not set", __func__);
}
m = m->m_next;
}
}
void
mptcp_output_getm_dsnmap32(struct socket *so, int off, uint32_t datalen,
u_int32_t *dsn, u_int32_t *relseq, u_int16_t *data_len, u_int64_t *dsn64p)
{
u_int64_t dsn64;
mptcp_output_getm_dsnmap64(so, off, datalen, &dsn64, relseq, data_len);
*dsn = (u_int32_t)MPTCP_DATASEQ_LOW32(dsn64);
*dsn64p = dsn64;
}
void
mptcp_output_getm_dsnmap64(struct socket *so, int off, uint32_t datalen,
u_int64_t *dsn, u_int32_t *relseq, u_int16_t *data_len)
{
struct mbuf *m = so->so_snd.sb_mb;
struct mbuf *mnext = NULL;
uint32_t runlen = 0;
u_int64_t dsn64;
uint32_t contig_len = 0;
if (m == NULL)
return;
if (off < 0)
return;
while (m) {
VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP);
VERIFY(m->m_flags & M_PKTHDR);
if ((unsigned int)off >= m->m_pkthdr.mp_rlen) {
off -= m->m_pkthdr.mp_rlen;
m = m->m_next;
} else {
break;
}
}
if (m == NULL) {
panic("%s: bad offset", __func__);
}
dsn64 = m->m_pkthdr.mp_dsn + off;
*dsn = dsn64;
*relseq = m->m_pkthdr.mp_rseq + off;
runlen = m->m_pkthdr.mp_rlen - off;
contig_len = runlen;
if (datalen <= runlen) {
*data_len = min(datalen, UINT16_MAX);
return;
}
mnext = m->m_next;
while (datalen > runlen) {
if (mnext == NULL) {
panic("%s: bad datalen = %d, %d %d", __func__, datalen,
runlen, off);
}
VERIFY(mnext->m_flags & M_PKTHDR);
VERIFY(mnext->m_pkthdr.pkt_flags & PKTF_MPTCP);
if (mnext->m_pkthdr.mp_dsn == (dsn64 + runlen)) {
runlen += mnext->m_pkthdr.mp_rlen;
contig_len += mnext->m_pkthdr.mp_rlen;
mptcplog3((LOG_INFO, "%s: contig \n",
__func__));
} else {
mptcplog((LOG_INFO, "%s: discontig %d %d \n",
__func__, datalen, contig_len));
break;
}
mnext = mnext->m_next;
}
datalen = min(datalen, UINT16_MAX);
*data_len = min(datalen, contig_len);
mptcplog3((LOG_INFO, "%s: %llu %u %d %d \n", __func__,
*dsn, *relseq, *data_len, off));
}
static void
mptcp_adj_rcvnxt(struct tcpcb *tp, struct mbuf *m)
{
struct mptcb *mp_tp = tptomptp(tp);
if (mp_tp == NULL)
return;
MPT_LOCK(mp_tp);
if ((MPTCP_SEQ_GEQ(mp_tp->mpt_rcvnxt, m->m_pkthdr.mp_dsn)) &&
(MPTCP_SEQ_LEQ(mp_tp->mpt_rcvnxt, (m->m_pkthdr.mp_dsn +
m->m_pkthdr.mp_rlen)))) {
mp_tp->mpt_rcvnxt = m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen;
}
MPT_UNLOCK(mp_tp);
}
void
mptcp_insert_rmap(struct tcpcb *tp, struct mbuf *m)
{
VERIFY(!(m->m_pkthdr.pkt_flags & PKTF_MPTCP));
if (tp->t_mpflags & TMPF_EMBED_DSN) {
VERIFY(m->m_flags & M_PKTHDR);
m->m_pkthdr.mp_dsn = tp->t_rcv_map.mpt_dsn;
m->m_pkthdr.mp_rseq = tp->t_rcv_map.mpt_sseq;
m->m_pkthdr.mp_rlen = tp->t_rcv_map.mpt_len;
m->m_pkthdr.pkt_flags |= PKTF_MPTCP;
tp->t_mpflags &= ~TMPF_EMBED_DSN;
tp->t_mpflags |= TMPF_MPTCP_ACKNOW;
}
}
void
mptcp_adj_rmap(struct socket *so, struct mbuf *m)
{
u_int64_t dsn;
u_int32_t sseq, datalen;
struct tcpcb *tp = intotcpcb(sotoinpcb(so));
u_int32_t old_rcvnxt = 0;
if (m_pktlen(m) == 0)
return;
if (m->m_pkthdr.pkt_flags & PKTF_MPTCP) {
VERIFY(m->m_flags & M_PKTHDR);
dsn = m->m_pkthdr.mp_dsn;
sseq = m->m_pkthdr.mp_rseq + tp->irs;
datalen = m->m_pkthdr.mp_rlen;
} else {
mptcp_notify_mpfail(so);
return;
}
if (m->m_pkthdr.len == (int)datalen) {
mptcp_adj_rcvnxt(tp, m);
return;
}
if (m->m_pkthdr.len > (int)datalen) {
panic("%s: mbuf len = %d expected = %d", __func__,
m->m_pkthdr.len, datalen);
}
old_rcvnxt = tp->rcv_nxt - m->m_pkthdr.len;
if (SEQ_GT(old_rcvnxt, sseq)) {
int off = old_rcvnxt - sseq;
m->m_pkthdr.mp_dsn += off;
m->m_pkthdr.mp_rseq += off;
m->m_pkthdr.mp_rlen -= off;
} else if (old_rcvnxt == sseq) {
m->m_pkthdr.mp_rlen = m->m_pkthdr.len;
} else {
panic("%s: partial map %u %u", __func__, old_rcvnxt, sseq);
}
mptcp_adj_rcvnxt(tp, m);
}
void
mptcp_act_on_txfail(struct socket *so)
{
struct tcpcb *tp = NULL;
struct inpcb *inp = sotoinpcb(so);
if (inp == NULL)
return;
tp = intotcpcb(inp);
if (tp == NULL)
return;
if (tp->t_state != TCPS_ESTABLISHED)
mptcplog((LOG_INFO, "%s: state = %d \n", __func__,
tp->t_state));
if (so->so_flags & SOF_MP_TRYFAILOVER) {
return;
}
so->so_flags |= SOF_MP_TRYFAILOVER;
soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPFAILOVER));
}
int
mptcp_get_map_for_dsn(struct socket *so, u_int64_t dsn_fail, u_int32_t *tcp_seq)
{
struct mbuf *m = so->so_snd.sb_mb;
u_int64_t dsn;
int off = 0;
u_int32_t datalen;
if (m == NULL)
return (-1);
while (m != NULL) {
VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP);
VERIFY(m->m_flags & M_PKTHDR);
dsn = m->m_pkthdr.mp_dsn;
datalen = m->m_pkthdr.mp_rlen;
if (MPTCP_SEQ_LEQ(dsn, dsn_fail) &&
(MPTCP_SEQ_GEQ(dsn + datalen, dsn_fail))) {
off = dsn_fail - dsn;
*tcp_seq = m->m_pkthdr.mp_rseq + off;
return (0);
}
m = m->m_next;
}
mptcplog((LOG_ERR, "%s: %llu not found \n", __func__, dsn_fail));
return (-1);
}
int32_t
mptcp_adj_sendlen(struct socket *so, int32_t off, int32_t len)
{
u_int64_t mdss_dsn = 0;
u_int32_t mdss_subflow_seq = 0;
u_int16_t mdss_data_len = 0;
if (len == 0)
return (len);
mptcp_output_getm_dsnmap64(so, off, (u_int32_t)len,
&mdss_dsn, &mdss_subflow_seq, &mdss_data_len);
return (mdss_data_len);
}
int32_t
mptcp_sbspace(struct mptcb *mpt)
{
struct sockbuf *sb;
uint32_t rcvbuf;
int32_t space;
MPT_LOCK_ASSERT_HELD(mpt);
MPTE_LOCK_ASSERT_HELD(mpt->mpt_mpte);
sb = &mpt->mpt_mpte->mpte_mppcb->mpp_socket->so_rcv;
rcvbuf = sb->sb_hiwat;
space = ((int32_t)imin((rcvbuf - sb->sb_cc),
(sb->sb_mbmax - sb->sb_mbcnt)));
if (space < 0)
space = 0;
return (space);
}
void
mptcp_notify_mpready(struct socket *so)
{
struct tcpcb *tp = NULL;
if (so == NULL)
return;
tp = intotcpcb(sotoinpcb(so));
if (tp == NULL)
return;
DTRACE_MPTCP4(multipath__ready, struct socket *, so,
struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd,
struct tcpcb *, tp);
if (!(tp->t_mpflags & TMPF_MPTCP_TRUE))
return;
if (tp->t_mpflags & TMPF_MPTCP_READY)
return;
tp->t_mpflags &= ~TMPF_TCP_FALLBACK;
tp->t_mpflags |= TMPF_MPTCP_READY;
soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPSTATUS));
}
void
mptcp_notify_mpfail(struct socket *so)
{
struct tcpcb *tp = NULL;
if (so == NULL)
return;
tp = intotcpcb(sotoinpcb(so));
if (tp == NULL)
return;
DTRACE_MPTCP4(multipath__failed, struct socket *, so,
struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd,
struct tcpcb *, tp);
if (tp->t_mpflags & TMPF_TCP_FALLBACK)
return;
tp->t_mpflags &= ~(TMPF_MPTCP_READY|TMPF_MPTCP_TRUE);
tp->t_mpflags |= TMPF_TCP_FALLBACK;
soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPSTATUS));
}
boolean_t
mptcp_ok_to_keepalive(struct mptcb *mp_tp)
{
boolean_t ret = 1;
VERIFY(mp_tp != NULL);
MPT_LOCK(mp_tp);
if (mp_tp->mpt_state >= MPTCPS_CLOSE_WAIT) {
ret = 0;
}
MPT_UNLOCK(mp_tp);
return (ret);
}
int
mptcp_adj_mss(struct tcpcb *tp, boolean_t mtudisc)
{
int mss_lower = 0;
struct mptcb *mp_tp = tptomptp(tp);
#define MPTCP_COMPUTE_LEN { \
mss_lower = sizeof (struct mptcp_dss_ack_opt); \
MPT_LOCK(mp_tp); \
if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) \
mss_lower += 2; \
else \
\
mss_lower += 2; \
MPT_UNLOCK(mp_tp); \
}
if (mp_tp == NULL)
return (0);
if ((tp->t_mpflags & TMPF_PREESTABLISHED) &&
(!(tp->t_mpflags & TMPF_JOINED_FLOW))) {
MPTCP_COMPUTE_LEN;
}
if ((tp->t_mpflags & TMPF_PREESTABLISHED) &&
(tp->t_mpflags & TMPF_SENT_JOIN)) {
MPTCP_COMPUTE_LEN;
}
if ((mtudisc) && (tp->t_mpflags & TMPF_MPTCP_TRUE)) {
MPTCP_COMPUTE_LEN;
}
return (mss_lower);
}
void
mptcp_update_last_owner(struct mptsub *mpts, struct socket *parent_mpso)
{
struct socket *subflow_so = mpts->mpts_socket;
MPTS_LOCK_ASSERT_HELD(mpts);
socket_lock(subflow_so, 0);
if ((subflow_so->last_pid != parent_mpso->last_pid) ||
(subflow_so->last_upid != parent_mpso->last_upid)) {
subflow_so->last_upid = parent_mpso->last_upid;
subflow_so->last_pid = parent_mpso->last_pid;
uuid_copy(subflow_so->last_uuid, parent_mpso->last_uuid);
}
so_update_policy(subflow_so);
socket_unlock(subflow_so, 0);
}
static void
fill_mptcp_subflow(struct socket *so, mptcp_flow_t *flow, struct mptsub *mpts)
{
struct inpcb *inp;
tcp_getconninfo(so, &flow->flow_ci);
inp = sotoinpcb(so);
#if INET6
if ((inp->inp_vflag & INP_IPV6) != 0) {
flow->flow_src.ss_family = AF_INET6;
flow->flow_dst.ss_family = AF_INET6;
flow->flow_src.ss_len = sizeof(struct sockaddr_in6);
flow->flow_dst.ss_len = sizeof(struct sockaddr_in6);
SIN6(&flow->flow_src)->sin6_port = inp->in6p_lport;
SIN6(&flow->flow_dst)->sin6_port = inp->in6p_fport;
SIN6(&flow->flow_src)->sin6_addr = inp->in6p_laddr;
SIN6(&flow->flow_dst)->sin6_addr = inp->in6p_faddr;
} else
#endif
{
flow->flow_src.ss_family = AF_INET;
flow->flow_dst.ss_family = AF_INET;
flow->flow_src.ss_len = sizeof(struct sockaddr_in);
flow->flow_dst.ss_len = sizeof(struct sockaddr_in);
SIN(&flow->flow_src)->sin_port = inp->inp_lport;
SIN(&flow->flow_dst)->sin_port = inp->inp_fport;
SIN(&flow->flow_src)->sin_addr = inp->inp_laddr;
SIN(&flow->flow_dst)->sin_addr = inp->inp_faddr;
}
flow->flow_flags = mpts->mpts_flags;
flow->flow_cid = mpts->mpts_connid;
}
static int
mptcp_pcblist SYSCTL_HANDLER_ARGS
{
#pragma unused(oidp, arg1, arg2)
int error = 0, f;
size_t n, len;
struct mppcb *mpp;
struct mptses *mpte;
struct mptcb *mp_tp;
struct mptsub *mpts;
struct socket *so;
conninfo_mptcp_t mptcpci;
mptcp_flow_t *flows;
if (req->newptr != USER_ADDR_NULL)
return (EPERM);
lck_mtx_lock(&mtcbinfo.mppi_lock);
n = mtcbinfo.mppi_count;
if (req->oldptr == USER_ADDR_NULL) {
lck_mtx_unlock(&mtcbinfo.mppi_lock);
req->oldidx = (n + n/8) * sizeof(conninfo_mptcp_t) +
4 * (n + n/8) * sizeof(mptcp_flow_t);
return (0);
}
TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) {
bzero(&mptcpci, sizeof(mptcpci));
lck_mtx_lock(&mpp->mpp_lock);
VERIFY(mpp->mpp_flags & MPP_ATTACHED);
mpte = mptompte(mpp);
VERIFY(mpte != NULL);
mp_tp = mpte->mpte_mptcb;
VERIFY(mp_tp != NULL);
len = sizeof(*flows) * mpte->mpte_numflows;
flows = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO);
if (flows == NULL) {
lck_mtx_unlock(&mpp->mpp_lock);
break;
}
mptcpci.mptcpci_state = mp_tp->mpt_state;
mptcpci.mptcpci_nflows = mpte->mpte_numflows;
mptcpci.mptcpci_len = sizeof(mptcpci) +
sizeof(*flows) * (mptcpci.mptcpci_nflows - 1);
error = SYSCTL_OUT(req, &mptcpci,
sizeof(mptcpci) - sizeof(*flows));
if (error) {
lck_mtx_unlock(&mpp->mpp_lock);
FREE(flows, M_TEMP);
break;
}
f = 0;
TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
MPTS_LOCK(mpts);
so = mpts->mpts_socket;
socket_lock(so, 0);
fill_mptcp_subflow(so, &flows[f], mpts);
socket_unlock(so, 0);
MPTS_UNLOCK(mpts);
f++;
}
lck_mtx_unlock(&mpp->mpp_lock);
error = SYSCTL_OUT(req, flows, len);
FREE(flows, M_TEMP);
if (error)
break;
}
lck_mtx_unlock(&mtcbinfo.mppi_lock);
return (error);
}
SYSCTL_PROC(_net_inet_mptcp, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED,
0, 0, mptcp_pcblist, "S,conninfo_mptcp_t",
"List of active MPTCP connections");