#include <kern/locks.h>
#include <kern/policy_internal.h>
#include <kern/zalloc.h>
#include <mach/sdt.h>
#include <sys/domain.h>
#include <sys/kdebug.h>
#include <sys/kern_control.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/mcache.h>
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/resourcevar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <net/content_filter.h>
#include <net/if.h>
#include <net/if_var.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_var.h>
#include <netinet/mptcp_var.h>
#include <netinet/mptcp.h>
#include <netinet/mptcp_opt.h>
#include <netinet/mptcp_seq.h>
#include <netinet/mptcp_timer.h>
#include <libkern/crypto/sha1.h>
#if INET6
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6protosw.h>
#endif
#include <dev/random/randomdev.h>
static void mptcp_attach_to_subf(struct socket *, struct mptcb *, uint8_t);
static void mptcp_detach_mptcb_from_subf(struct mptcb *, struct socket *);
static uint32_t mptcp_gc(struct mppcbinfo *);
static int mptcp_subflow_soreceive(struct socket *, struct sockaddr **,
struct uio *, struct mbuf **, struct mbuf **, int *);
static int mptcp_subflow_sosend(struct socket *, struct sockaddr *,
struct uio *, struct mbuf *, struct mbuf *, int);
static void mptcp_subflow_rupcall(struct socket *, void *, int);
static void mptcp_subflow_input(struct mptses *, struct mptsub *);
static void mptcp_subflow_wupcall(struct socket *, void *, int);
static void mptcp_subflow_eupcall1(struct socket *, void *, uint32_t);
static void mptcp_update_last_owner(struct socket *so, struct socket *mp_so);
static void mptcp_drop_tfo_data(struct mptses *, struct mptsub *);
static void mptcp_subflow_abort(struct mptsub *, int);
static void mptcp_send_dfin(struct socket *so);
typedef enum {
MPTS_EVRET_DELETE = 1,
MPTS_EVRET_OK = 2,
MPTS_EVRET_CONNECT_PENDING = 3,
MPTS_EVRET_DISCONNECT_FALLBACK = 4,
} ev_ret_t;
static ev_ret_t mptcp_subflow_events(struct mptses *, struct mptsub *, uint64_t *);
static ev_ret_t mptcp_subflow_propagate_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
static ev_ret_t mptcp_subflow_nosrcaddr_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
static ev_ret_t mptcp_subflow_failover_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
static ev_ret_t mptcp_subflow_ifdenied_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
static ev_ret_t mptcp_subflow_connected_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
static ev_ret_t mptcp_subflow_disconnected_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
static ev_ret_t mptcp_subflow_mpstatus_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
static ev_ret_t mptcp_subflow_mustrst_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
static ev_ret_t mptcp_subflow_mpcantrcvmore_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
static ev_ret_t mptcp_subflow_adaptive_rtimo_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
static ev_ret_t mptcp_subflow_adaptive_wtimo_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
static const char *mptcp_evret2str(ev_ret_t);
static void mptcp_do_sha1(mptcp_key_t *, char *);
static void mptcp_init_local_parms(struct mptses *);
static unsigned int mptsub_zone_size;
static struct zone *mptsub_zone;
static unsigned int mptopt_zone_size;
static struct zone *mptopt_zone;
static unsigned int mpt_subauth_entry_size;
static struct zone *mpt_subauth_zone;
struct mppcbinfo mtcbinfo;
#define MPTCP_SUBFLOW_WRITELEN (8 * 1024)
#define MPTCP_SUBFLOW_READLEN (8 * 1024)
SYSCTL_DECL(_net_inet);
SYSCTL_NODE(_net_inet, OID_AUTO, mptcp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "MPTCP");
uint32_t mptcp_dbg_area = 31;
SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, dbg_area, CTLFLAG_RW|CTLFLAG_LOCKED,
&mptcp_dbg_area, 0, "MPTCP debug area");
uint32_t mptcp_dbg_level = 1;
SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dbg_level, CTLFLAG_RW | CTLFLAG_LOCKED,
&mptcp_dbg_level, 0, "MPTCP debug level");
SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, pcbcount, CTLFLAG_RD|CTLFLAG_LOCKED,
&mtcbinfo.mppi_count, 0, "Number of active PCBs");
static struct protosw mptcp_subflow_protosw;
static struct pr_usrreqs mptcp_subflow_usrreqs;
#if INET6
static struct ip6protosw mptcp_subflow_protosw6;
static struct pr_usrreqs mptcp_subflow_usrreqs6;
#endif
static uint8_t mptcp_create_subflows_scheduled;
typedef struct mptcp_subflow_event_entry {
uint64_t sofilt_hint_mask;
ev_ret_t (*sofilt_hint_ev_hdlr)(
struct mptses *mpte,
struct mptsub *mpts,
uint64_t *p_mpsofilt_hint,
uint64_t event);
} mptsub_ev_entry_t;
static uint8_t mptcp_cellicon_is_set;
static uint32_t mptcp_last_cellicon_set;
#define MPTCP_CELLICON_TOGGLE_RATE (5 * TCP_RETRANSHZ)
static mptsub_ev_entry_t mpsub_ev_entry_tbl [] = {
{
.sofilt_hint_mask = SO_FILT_HINT_MPCANTRCVMORE,
.sofilt_hint_ev_hdlr = mptcp_subflow_mpcantrcvmore_ev,
},
{
.sofilt_hint_mask = SO_FILT_HINT_MPFAILOVER,
.sofilt_hint_ev_hdlr = mptcp_subflow_failover_ev,
},
{
.sofilt_hint_mask = SO_FILT_HINT_CONNRESET,
.sofilt_hint_ev_hdlr = mptcp_subflow_propagate_ev,
},
{
.sofilt_hint_mask = SO_FILT_HINT_MUSTRST,
.sofilt_hint_ev_hdlr = mptcp_subflow_mustrst_ev,
},
{
.sofilt_hint_mask = SO_FILT_HINT_CANTRCVMORE,
.sofilt_hint_ev_hdlr = mptcp_subflow_propagate_ev,
},
{
.sofilt_hint_mask = SO_FILT_HINT_TIMEOUT,
.sofilt_hint_ev_hdlr = mptcp_subflow_propagate_ev,
},
{
.sofilt_hint_mask = SO_FILT_HINT_NOSRCADDR,
.sofilt_hint_ev_hdlr = mptcp_subflow_nosrcaddr_ev,
},
{
.sofilt_hint_mask = SO_FILT_HINT_IFDENIED,
.sofilt_hint_ev_hdlr = mptcp_subflow_ifdenied_ev,
},
{
.sofilt_hint_mask = SO_FILT_HINT_CONNECTED,
.sofilt_hint_ev_hdlr = mptcp_subflow_connected_ev,
},
{
.sofilt_hint_mask = SO_FILT_HINT_MPSTATUS,
.sofilt_hint_ev_hdlr = mptcp_subflow_mpstatus_ev,
},
{
.sofilt_hint_mask = SO_FILT_HINT_DISCONNECTED,
.sofilt_hint_ev_hdlr = mptcp_subflow_disconnected_ev,
},
{
.sofilt_hint_mask = SO_FILT_HINT_ADAPTIVE_RTIMO,
.sofilt_hint_ev_hdlr = mptcp_subflow_adaptive_rtimo_ev,
},
{
.sofilt_hint_mask = SO_FILT_HINT_ADAPTIVE_WTIMO,
.sofilt_hint_ev_hdlr = mptcp_subflow_adaptive_wtimo_ev,
},
};
void
mptcp_init(struct protosw *pp, struct domain *dp)
{
#pragma unused(dp)
static int mptcp_initialized = 0;
struct protosw *prp;
#if INET6
struct ip6protosw *prp6;
#endif
VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED);
if (mptcp_initialized)
return;
mptcp_initialized = 1;
prp = pffindproto_locked(PF_INET, IPPROTO_TCP, SOCK_STREAM);
VERIFY(prp != NULL);
bcopy(prp, &mptcp_subflow_protosw, sizeof (*prp));
bcopy(prp->pr_usrreqs, &mptcp_subflow_usrreqs,
sizeof (mptcp_subflow_usrreqs));
mptcp_subflow_protosw.pr_entry.tqe_next = NULL;
mptcp_subflow_protosw.pr_entry.tqe_prev = NULL;
mptcp_subflow_protosw.pr_usrreqs = &mptcp_subflow_usrreqs;
mptcp_subflow_usrreqs.pru_soreceive = mptcp_subflow_soreceive;
mptcp_subflow_usrreqs.pru_sosend = mptcp_subflow_sosend;
mptcp_subflow_usrreqs.pru_rcvoob = pru_rcvoob_notsupp;
mptcp_subflow_protosw.pr_filter_head.tqh_first =
(struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
mptcp_subflow_protosw.pr_filter_head.tqh_last =
(struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
#if INET6
prp6 = (struct ip6protosw *)pffindproto_locked(PF_INET6,
IPPROTO_TCP, SOCK_STREAM);
VERIFY(prp6 != NULL);
bcopy(prp6, &mptcp_subflow_protosw6, sizeof (*prp6));
bcopy(prp6->pr_usrreqs, &mptcp_subflow_usrreqs6,
sizeof (mptcp_subflow_usrreqs6));
mptcp_subflow_protosw6.pr_entry.tqe_next = NULL;
mptcp_subflow_protosw6.pr_entry.tqe_prev = NULL;
mptcp_subflow_protosw6.pr_usrreqs = &mptcp_subflow_usrreqs6;
mptcp_subflow_usrreqs6.pru_soreceive = mptcp_subflow_soreceive;
mptcp_subflow_usrreqs6.pru_sosend = mptcp_subflow_sosend;
mptcp_subflow_usrreqs6.pru_rcvoob = pru_rcvoob_notsupp;
mptcp_subflow_protosw6.pr_filter_head.tqh_first =
(struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
mptcp_subflow_protosw6.pr_filter_head.tqh_last =
(struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
#endif
bzero(&mtcbinfo, sizeof (mtcbinfo));
TAILQ_INIT(&mtcbinfo.mppi_pcbs);
mtcbinfo.mppi_size = sizeof (struct mpp_mtp);
if ((mtcbinfo.mppi_zone = zinit(mtcbinfo.mppi_size,
1024 * mtcbinfo.mppi_size, 8192, "mptcb")) == NULL) {
panic("%s: unable to allocate MPTCP PCB zone\n", __func__);
}
zone_change(mtcbinfo.mppi_zone, Z_CALLERACCT, FALSE);
zone_change(mtcbinfo.mppi_zone, Z_EXPAND, TRUE);
mtcbinfo.mppi_lock_grp_attr = lck_grp_attr_alloc_init();
mtcbinfo.mppi_lock_grp = lck_grp_alloc_init("mppcb",
mtcbinfo.mppi_lock_grp_attr);
mtcbinfo.mppi_lock_attr = lck_attr_alloc_init();
lck_mtx_init(&mtcbinfo.mppi_lock, mtcbinfo.mppi_lock_grp,
mtcbinfo.mppi_lock_attr);
mtcbinfo.mppi_gc = mptcp_gc;
mtcbinfo.mppi_timer = mptcp_timer;
mp_pcbinfo_attach(&mtcbinfo);
mptsub_zone_size = sizeof (struct mptsub);
if ((mptsub_zone = zinit(mptsub_zone_size, 1024 * mptsub_zone_size,
8192, "mptsub")) == NULL) {
panic("%s: unable to allocate MPTCP subflow zone\n", __func__);
}
zone_change(mptsub_zone, Z_CALLERACCT, FALSE);
zone_change(mptsub_zone, Z_EXPAND, TRUE);
mptopt_zone_size = sizeof (struct mptopt);
if ((mptopt_zone = zinit(mptopt_zone_size, 128 * mptopt_zone_size,
1024, "mptopt")) == NULL) {
panic("%s: unable to allocate MPTCP option zone\n", __func__);
}
zone_change(mptopt_zone, Z_CALLERACCT, FALSE);
zone_change(mptopt_zone, Z_EXPAND, TRUE);
mpt_subauth_entry_size = sizeof (struct mptcp_subf_auth_entry);
if ((mpt_subauth_zone = zinit(mpt_subauth_entry_size,
1024 * mpt_subauth_entry_size, 8192, "mptauth")) == NULL) {
panic("%s: unable to allocate MPTCP address auth zone \n",
__func__);
}
zone_change(mpt_subauth_zone, Z_CALLERACCT, FALSE);
zone_change(mpt_subauth_zone, Z_EXPAND, TRUE);
mptcp_last_cellicon_set = tcp_now;
}
int
mptcp_get_statsindex(struct mptcp_itf_stats *stats, const struct mptsub *mpts)
{
const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
int i, index = -1;
if (ifp == NULL) {
mptcplog((LOG_ERR, "%s: no ifp on subflow\n", __func__),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
return (-1);
}
for (i = 0; i < MPTCP_ITFSTATS_SIZE; i++) {
if (stats[i].ifindex == IFSCOPE_NONE) {
if (index < 0)
index = i;
continue;
}
if (stats[i].ifindex == ifp->if_index) {
index = i;
return (index);
}
}
if (index != -1) {
stats[index].ifindex = ifp->if_index;
if (stats[index].is_expensive == 0)
stats[index].is_expensive = IFNET_IS_CELLULAR(ifp);
}
return (index);
}
void
mptcpstats_inc_switch(struct mptses *mpte, const struct mptsub *mpts)
{
int index;
tcpstat.tcps_mp_switches++;
mpte->mpte_subflow_switches++;
index = mptcp_get_statsindex(mpte->mpte_itfstats, mpts);
if (index != -1)
mpte->mpte_itfstats[index].switches++;
}
static void
mptcp_flush_sopts(struct mptses *mpte)
{
struct mptopt *mpo, *tmpo;
TAILQ_FOREACH_SAFE(mpo, &mpte->mpte_sopts, mpo_entry, tmpo) {
mptcp_sopt_remove(mpte, mpo);
mptcp_sopt_free(mpo);
}
VERIFY(TAILQ_EMPTY(&mpte->mpte_sopts));
}
int
mptcp_sescreate(struct mppcb *mpp)
{
struct mppcbinfo *mppi;
struct mptses *mpte;
struct mptcb *mp_tp;
VERIFY(mpp != NULL);
mppi = mpp->mpp_pcbinfo;
VERIFY(mppi != NULL);
__IGNORE_WCASTALIGN(mpte = &((struct mpp_mtp *)mpp)->mpp_ses);
__IGNORE_WCASTALIGN(mp_tp = &((struct mpp_mtp *)mpp)->mtcb);
bzero(mpte, sizeof (*mpte));
VERIFY(mpp->mpp_pcbe == NULL);
mpp->mpp_pcbe = mpte;
mpte->mpte_mppcb = mpp;
mpte->mpte_mptcb = mp_tp;
TAILQ_INIT(&mpte->mpte_sopts);
TAILQ_INIT(&mpte->mpte_subflows);
mpte->mpte_associd = SAE_ASSOCID_ANY;
mpte->mpte_connid_last = SAE_CONNID_ANY;
mpte->mpte_itfinfo = &mpte->_mpte_itfinfo[0];
mpte->mpte_itfinfo_size = MPTE_ITFINFO_SIZE;
bzero(mp_tp, sizeof (*mp_tp));
mp_tp->mpt_mpte = mpte;
mp_tp->mpt_state = MPTCPS_CLOSED;
DTRACE_MPTCP1(session__create, struct mppcb *, mpp);
return (0);
}
static void
mptcpstats_get_bytes(struct mptses *mpte, boolean_t initial_cell,
uint64_t *cellbytes, uint64_t *allbytes)
{
int64_t mycellbytes = 0;
uint64_t myallbytes = 0;
int i;
for (i = 0; i < MPTCP_ITFSTATS_SIZE; i++) {
if (mpte->mpte_itfstats[i].is_expensive) {
mycellbytes += mpte->mpte_itfstats[i].mpis_txbytes;
mycellbytes += mpte->mpte_itfstats[i].mpis_rxbytes;
}
myallbytes += mpte->mpte_itfstats[i].mpis_txbytes;
myallbytes += mpte->mpte_itfstats[i].mpis_rxbytes;
}
if (initial_cell) {
mycellbytes -= mpte->mpte_init_txbytes;
mycellbytes -= mpte->mpte_init_txbytes;
}
if (mycellbytes < 0) {
mptcplog((LOG_ERR, "%s cellbytes is %d\n", __func__, mycellbytes),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
*cellbytes = 0;
*allbytes = 0;
} else {
*cellbytes = mycellbytes;
*allbytes = myallbytes;
}
}
static void
mptcpstats_session_wrapup(struct mptses *mpte)
{
boolean_t cell = mpte->mpte_initial_cell;
switch (mpte->mpte_svctype) {
case MPTCP_SVCTYPE_HANDOVER:
if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
tcpstat.tcps_mptcp_fp_handover_attempt++;
if (cell && mpte->mpte_handshake_success) {
tcpstat.tcps_mptcp_fp_handover_success_cell++;
if (mpte->mpte_used_wifi)
tcpstat.tcps_mptcp_handover_wifi_from_cell++;
} else if (mpte->mpte_handshake_success) {
tcpstat.tcps_mptcp_fp_handover_success_wifi++;
if (mpte->mpte_used_cell)
tcpstat.tcps_mptcp_handover_cell_from_wifi++;
}
} else {
tcpstat.tcps_mptcp_handover_attempt++;
if (cell && mpte->mpte_handshake_success) {
tcpstat.tcps_mptcp_handover_success_cell++;
if (mpte->mpte_used_wifi)
tcpstat.tcps_mptcp_handover_wifi_from_cell++;
} else if (mpte->mpte_handshake_success) {
tcpstat.tcps_mptcp_handover_success_wifi++;
if (mpte->mpte_used_cell)
tcpstat.tcps_mptcp_handover_cell_from_wifi++;
}
}
if (mpte->mpte_handshake_success) {
uint64_t cellbytes;
uint64_t allbytes;
mptcpstats_get_bytes(mpte, cell, &cellbytes, &allbytes);
tcpstat.tcps_mptcp_handover_cell_bytes += cellbytes;
tcpstat.tcps_mptcp_handover_all_bytes += allbytes;
}
break;
case MPTCP_SVCTYPE_INTERACTIVE:
if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
tcpstat.tcps_mptcp_fp_interactive_attempt++;
if (mpte->mpte_handshake_success) {
tcpstat.tcps_mptcp_fp_interactive_success++;
if (!cell && mpte->mpte_used_cell)
tcpstat.tcps_mptcp_interactive_cell_from_wifi++;
}
} else {
tcpstat.tcps_mptcp_interactive_attempt++;
if (mpte->mpte_handshake_success) {
tcpstat.tcps_mptcp_interactive_success++;
if (!cell && mpte->mpte_used_cell)
tcpstat.tcps_mptcp_interactive_cell_from_wifi++;
}
}
if (mpte->mpte_handshake_success) {
uint64_t cellbytes;
uint64_t allbytes;
mptcpstats_get_bytes(mpte, cell, &cellbytes, &allbytes);
tcpstat.tcps_mptcp_interactive_cell_bytes += cellbytes;
tcpstat.tcps_mptcp_interactive_all_bytes += allbytes;
}
break;
case MPTCP_SVCTYPE_AGGREGATE:
if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
tcpstat.tcps_mptcp_fp_aggregate_attempt++;
if (mpte->mpte_handshake_success)
tcpstat.tcps_mptcp_fp_aggregate_success++;
} else {
tcpstat.tcps_mptcp_aggregate_attempt++;
if (mpte->mpte_handshake_success) {
tcpstat.tcps_mptcp_aggregate_success++;
}
}
if (mpte->mpte_handshake_success) {
uint64_t cellbytes;
uint64_t allbytes;
mptcpstats_get_bytes(mpte, cell, &cellbytes, &allbytes);
tcpstat.tcps_mptcp_aggregate_cell_bytes += cellbytes;
tcpstat.tcps_mptcp_aggregate_all_bytes += allbytes;
}
break;
}
if (cell && mpte->mpte_handshake_success && mpte->mpte_used_wifi)
tcpstat.tcps_mptcp_back_to_wifi++;
}
static void
mptcp_session_destroy(struct mptses *mpte)
{
struct mptcb *mp_tp;
mpte_lock_assert_held(mpte);
mp_tp = mpte->mpte_mptcb;
VERIFY(mp_tp != NULL);
mptcpstats_session_wrapup(mpte);
mptcp_unset_cellicon();
mptcp_flush_sopts(mpte);
VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows) && mpte->mpte_numflows == 0);
if (mpte->mpte_itfinfo_size > MPTE_ITFINFO_SIZE)
_FREE(mpte->mpte_itfinfo, M_TEMP);
mpte->mpte_itfinfo = NULL;
m_freem_list(mpte->mpte_reinjectq);
DTRACE_MPTCP2(session__destroy, struct mptses *, mpte,
struct mptcb *, mp_tp);
}
static boolean_t
mptcp_ok_to_create_subflows(struct mptcb *mp_tp)
{
return (mp_tp->mpt_state >= MPTCPS_ESTABLISHED &&
mp_tp->mpt_state < MPTCPS_TIME_WAIT &&
!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP));
}
static int
mptcp_synthesize_nat64(struct in6_addr *addr, uint32_t len, struct in_addr *addrv4)
{
static const struct in6_addr well_known_prefix = {
.__u6_addr.__u6_addr8 = {0x00, 0x64, 0xff, 0x9b, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00},
};
char buf[MAX_IPv6_STR_LEN];
char *ptrv4 = (char *)addrv4;
char *ptr = (char *)addr;
if (IN_ZERONET(addrv4->s_addr) || IN_LOOPBACK(addrv4->s_addr) || IN_LINKLOCAL(addrv4->s_addr) || IN_DS_LITE(addrv4->s_addr) || IN_6TO4_RELAY_ANYCAST(addrv4->s_addr) || IN_MULTICAST(addrv4->s_addr) || INADDR_BROADCAST == addrv4->s_addr) { return (-1);
}
if (len == NAT64_PREFIX_LEN_96 &&
IN6_ARE_ADDR_EQUAL(addr, &well_known_prefix)) {
if (IN_PRIVATE(addrv4->s_addr) || IN_SHARED_ADDRESS_SPACE(addrv4->s_addr)) return (-1);
}
switch (len) {
case NAT64_PREFIX_LEN_96:
memcpy(ptr + 12, ptrv4, 4);
break;
case NAT64_PREFIX_LEN_64:
memcpy(ptr + 9, ptrv4, 4);
break;
case NAT64_PREFIX_LEN_56:
memcpy(ptr + 7, ptrv4, 1);
memcpy(ptr + 9, ptrv4 + 1, 3);
break;
case NAT64_PREFIX_LEN_48:
memcpy(ptr + 6, ptrv4, 2);
memcpy(ptr + 9, ptrv4 + 2, 2);
break;
case NAT64_PREFIX_LEN_40:
memcpy(ptr + 5, ptrv4, 3);
memcpy(ptr + 9, ptrv4 + 3, 1);
break;
case NAT64_PREFIX_LEN_32:
memcpy(ptr + 4, ptrv4, 4);
break;
default:
panic("NAT64-prefix len is wrong: %u\n", len);
}
mptcplog((LOG_DEBUG, "%s: nat64prefix-len %u synthesized %s\n", __func__,
len, inet_ntop(AF_INET6, (void *)addr, buf, sizeof(buf))),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
return (0);
}
void
mptcp_check_subflows_and_add(struct mptses *mpte)
{
struct mptcb *mp_tp = mpte->mpte_mptcb;
uint32_t i;
if (!mptcp_ok_to_create_subflows(mp_tp))
return;
for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
struct mpt_itf_info *info;
struct mptsub *mpts;
uint32_t ifindex;
int found = 0;
info = &mpte->mpte_itfinfo[i];
if (info->no_mptcp_support)
continue;
ifindex = info->ifindex;
if (ifindex == IFSCOPE_NONE)
continue;
TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
if (ifp == NULL)
continue;
if (ifp->if_index == ifindex &&
!(mpts->mpts_socket->so_state & SS_ISDISCONNECTED)) {
found = 1;
break;
}
if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER &&
!IFNET_IS_CELLULAR(ifp) &&
!(mpts->mpts_flags & (MPTSF_DISCONNECTING | MPTSF_DISCONNECTED | MPTSF_CLOSE_REQD)) &&
(!mptcp_is_wifi_unusable() ||
(sototcpcb(mpts->mpts_socket)->t_rxtshift < mptcp_fail_thresh &&
mptetoso(mpte)->so_snd.sb_cc))) {
mptcplog((LOG_DEBUG, "%s handover, wifi state %u rxt %u ifindex %u this %u\n",
__func__, mptcp_is_wifi_unusable(), sototcpcb(mpts->mpts_socket)->t_rxtshift, ifindex,
ifp->if_index),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
found = 1;
break;
}
}
if (!found && !(mpte->mpte_flags & MPTE_FIRSTPARTY) &&
!(mpte->mpte_flags & MPTE_ACCESS_GRANTED) &&
mptcp_developer_mode == 0) {
mptcp_ask_symptoms(mpte);
return;
}
if (!found) {
struct sockaddr *dst = &mpte->mpte_dst;
struct sockaddr_in6 nat64pre;
if (mpte->mpte_dst.sa_family == AF_INET &&
!info->has_v4_conn && info->has_v6_conn) {
struct ipv6_prefix nat64prefixes[NAT64_MAX_NUM_PREFIXES];
struct ifnet *ifp;
int error, j;
bzero(&nat64pre, sizeof(struct sockaddr_in6));
ifnet_head_lock_shared();
ifp = ifindex2ifnet[ifindex];
ifnet_head_done();
error = ifnet_get_nat64prefix(ifp, nat64prefixes);
if (error) {
mptcplog((LOG_ERR, "%s: no NAT64-prefix on itf %s, error %d\n",
__func__, ifp->if_name, error),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
continue;
}
for (j = 0; j < NAT64_MAX_NUM_PREFIXES; j++) {
if (nat64prefixes[j].prefix_len != 0)
break;
}
VERIFY(j < NAT64_MAX_NUM_PREFIXES);
error = mptcp_synthesize_nat64(&nat64prefixes[j].ipv6_prefix,
nat64prefixes[j].prefix_len,
&mpte->__mpte_dst_v4.sin_addr);
if (error != 0) {
mptcplog((LOG_INFO, "%s: cannot synthesize this addr\n", __func__),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
continue;
}
memcpy(&nat64pre.sin6_addr,
&nat64prefixes[j].ipv6_prefix,
sizeof(nat64pre.sin6_addr));
nat64pre.sin6_len = sizeof(struct sockaddr_in6);
nat64pre.sin6_family = AF_INET6;
nat64pre.sin6_port = mpte->__mpte_dst_v6.sin6_port;
nat64pre.sin6_flowinfo = 0;
nat64pre.sin6_scope_id = 0;
dst = (struct sockaddr *)&nat64pre;
}
mptcp_subflow_add(mpte, NULL, dst, ifindex, NULL);
}
}
}
static void
mptcp_check_subflows_and_remove(struct mptses *mpte)
{
struct mptsub *mpts, *tmpts;
int found_working_subflow = 0, removed_some = 0;
int wifi_unusable = mptcp_is_wifi_unusable();
if (mpte->mpte_svctype != MPTCP_SVCTYPE_HANDOVER)
return;
TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
struct socket *so;
struct tcpcb *tp;
if (ifp == NULL || IFNET_IS_CELLULAR(ifp))
continue;
so = mpts->mpts_socket;
tp = sototcpcb(so);
if (!(mpts->mpts_flags & MPTSF_CONNECTED) ||
tp->t_state != TCPS_ESTABLISHED)
continue;
if (tp->t_rxtshift == 0 && mptetoso(mpte)->so_snd.sb_cc)
found_working_subflow = 1;
if (!wifi_unusable)
found_working_subflow = 1;
}
if (!found_working_subflow)
return;
TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
if (ifp == NULL || !IFNET_IS_CELLULAR(ifp))
continue;
soevent(mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
removed_some = 1;
}
if (removed_some)
mptcp_unset_cellicon();
}
static void
mptcp_remove_subflows(struct mptses *mpte)
{
struct mptsub *mpts, *tmpts;
TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
if (mpts->mpts_flags & MPTSF_CLOSE_REQD) {
mpts->mpts_flags &= ~MPTSF_CLOSE_REQD;
soevent(mpts->mpts_socket,
SO_FILT_HINT_LOCKED | SO_FILT_HINT_NOSRCADDR);
}
}
}
static void
mptcp_create_subflows(__unused void *arg)
{
struct mppcb *mpp;
if (OSTestAndClear(0x01, &mptcp_create_subflows_scheduled))
mptcplog((LOG_ERR, "%s: bit was already cleared!\n", __func__),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
lck_mtx_lock(&mtcbinfo.mppi_lock);
TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) {
struct mptses *mpte;
struct socket *mp_so;
if (!(mpp->mpp_flags & MPP_CREATE_SUBFLOWS))
continue;
mpp_lock(mpp);
mpp->mpp_flags &= ~MPP_CREATE_SUBFLOWS;
mpte = mpp->mpp_pcbe;
mp_so = mpp->mpp_socket;
VERIFY(mp_so->so_usecount > 0);
mptcp_check_subflows_and_add(mpte);
mptcp_remove_subflows(mpte);
mp_so->so_usecount--;
mpp_unlock(mpp);
}
lck_mtx_unlock(&mtcbinfo.mppi_lock);
}
void
mptcp_sched_create_subflows(struct mptses *mpte)
{
struct mppcb *mpp = mpte->mpte_mppcb;
struct mptcb *mp_tp = mpte->mpte_mptcb;
struct socket *mp_so = mpp->mpp_socket;
if (!mptcp_ok_to_create_subflows(mp_tp)) {
mptcplog((LOG_DEBUG, "%s: not a good time for subflows, state %u flags %#x",
__func__, mp_tp->mpt_state, mp_tp->mpt_flags),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
return;
}
if (!(mpp->mpp_flags & MPP_CREATE_SUBFLOWS)) {
mp_so->so_usecount++;
mpp->mpp_flags |= MPP_CREATE_SUBFLOWS;
}
if (OSTestAndSet(0x01, &mptcp_create_subflows_scheduled))
return;
timeout(mptcp_create_subflows, NULL, hz/10);
}
struct mptopt *
mptcp_sopt_alloc(int how)
{
struct mptopt *mpo;
mpo = (how == M_WAITOK) ? zalloc(mptopt_zone) :
zalloc_noblock(mptopt_zone);
if (mpo != NULL) {
bzero(mpo, mptopt_zone_size);
}
return (mpo);
}
void
mptcp_sopt_free(struct mptopt *mpo)
{
VERIFY(!(mpo->mpo_flags & MPOF_ATTACHED));
zfree(mptopt_zone, mpo);
}
void
mptcp_sopt_insert(struct mptses *mpte, struct mptopt *mpo)
{
mpte_lock_assert_held(mpte);
VERIFY(!(mpo->mpo_flags & MPOF_ATTACHED));
mpo->mpo_flags |= MPOF_ATTACHED;
TAILQ_INSERT_TAIL(&mpte->mpte_sopts, mpo, mpo_entry);
}
void
mptcp_sopt_remove(struct mptses *mpte, struct mptopt *mpo)
{
mpte_lock_assert_held(mpte);
VERIFY(mpo->mpo_flags & MPOF_ATTACHED);
mpo->mpo_flags &= ~MPOF_ATTACHED;
TAILQ_REMOVE(&mpte->mpte_sopts, mpo, mpo_entry);
}
struct mptopt *
mptcp_sopt_find(struct mptses *mpte, struct sockopt *sopt)
{
struct mptopt *mpo;
mpte_lock_assert_held(mpte);
TAILQ_FOREACH(mpo, &mpte->mpte_sopts, mpo_entry) {
if (mpo->mpo_level == sopt->sopt_level &&
mpo->mpo_name == sopt->sopt_name)
break;
}
VERIFY(mpo == NULL || sopt->sopt_valsize == sizeof (int));
return (mpo);
}
static struct mptsub *
mptcp_subflow_alloc(void)
{
struct mptsub *mpts = zalloc(mptsub_zone);
if (mpts == NULL)
return (NULL);
bzero(mpts, mptsub_zone_size);
return (mpts);
}
static void
mptcp_subflow_free(struct mptsub *mpts)
{
VERIFY(mpts->mpts_refcnt == 0);
VERIFY(!(mpts->mpts_flags & MPTSF_ATTACHED));
VERIFY(mpts->mpts_mpte == NULL);
VERIFY(mpts->mpts_socket == NULL);
if (mpts->mpts_src != NULL) {
FREE(mpts->mpts_src, M_SONAME);
mpts->mpts_src = NULL;
}
zfree(mptsub_zone, mpts);
}
static void
mptcp_subflow_addref(struct mptsub *mpts)
{
if (++mpts->mpts_refcnt == 0)
panic("%s: mpts %p wraparound refcnt\n", __func__, mpts);
}
static void
mptcp_subflow_remref(struct mptsub *mpts)
{
if (mpts->mpts_refcnt == 0) {
panic("%s: mpts %p negative refcnt\n", __func__, mpts);
}
if (--mpts->mpts_refcnt > 0)
return;
mptcp_subflow_free(mpts);
}
static void
mptcp_subflow_attach(struct mptses *mpte, struct mptsub *mpts, struct socket *so)
{
struct socket *mp_so = mpte->mpte_mppcb->mpp_socket;
struct tcpcb *tp = sototcpcb(so);
tp->t_mptcb = mpte->mpte_mptcb;
so->so_flags |= SOF_MP_SUBFLOW;
mp_so->so_usecount++;
TAILQ_INSERT_TAIL(&mpte->mpte_subflows, mpts, mpts_entry);
mpte->mpte_numflows++;
atomic_bitset_32(&mpts->mpts_flags, MPTSF_ATTACHED);
mpts->mpts_mpte = mpte;
mpts->mpts_socket = so;
tp->t_mpsub = mpts;
mptcp_subflow_addref(mpts);
mptcp_subflow_addref(mpts);
}
static void
mptcp_subflow_necp_cb(void *handle, __unused int action,
__unused struct necp_client_flow *flow)
{
struct inpcb *inp = (struct inpcb *)handle;
struct socket *so = inp->inp_socket;
struct mptsub *mpts;
struct mptses *mpte;
if (action != NECP_CLIENT_CBACTION_NONVIABLE)
return;
if (so->so_usecount == 0)
return;
socket_lock(so, 1);
if (so->so_usecount == 0)
goto out;
mpte = tptomptp(sototcpcb(so))->mpt_mpte;
mpts = sototcpcb(so)->t_mpsub;
mptcplog((LOG_DEBUG, "%s: Subflow became non-viable", __func__),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_VERBOSE);
mpts->mpts_flags |= MPTSF_CLOSE_REQD;
mptcp_sched_create_subflows(mpte);
if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER)
flow->viable = 1;
out:
socket_unlock(so, 1);
}
static int
mptcp_subflow_socreate(struct mptses *mpte, struct mptsub *mpts, int dom,
struct socket **so)
{
lck_mtx_t *subflow_mtx;
struct mptopt smpo, *mpo, *tmpo;
struct proc *p;
struct socket *mp_so;
int error;
*so = NULL;
mpte_lock_assert_held(mpte);
mp_so = mptetoso(mpte);
p = proc_find(mp_so->last_pid);
if (p == PROC_NULL) {
mptcplog((LOG_ERR, "%s: Couldn't find proc for pid %u\n", __func__, mp_so->last_pid),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
return (ESRCH);
}
mpte_unlock(mpte);
error = socreate_internal(dom, so, SOCK_STREAM, IPPROTO_TCP, p,
SOCF_ASYNC, PROC_NULL);
mpte_lock(mpte);
if (error) {
mptcplog((LOG_ERR, "%s: subflow socreate mp_so 0x%llx unable to create subflow socket error %d\n",
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so), error),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
proc_rele(p);
mptcp_subflow_free(mpts);
return (error);
}
subflow_mtx = ((*so)->so_proto->pr_getlock)(*so, 0);
lck_mtx_lock(subflow_mtx);
mptcp_subflow_attach(mpte, mpts, *so);
(*so)->so_state |= SS_NOFDREF;
lck_mtx_unlock(subflow_mtx);
(*so)->so_rcv.sb_flags |= SB_NOCOMPRESS;
(*so)->so_snd.sb_flags |= SB_NOCOMPRESS;
if (mp_so->so_flags1 & SOF1_PRECONNECT_DATA)
(*so)->so_flags1 |= SOF1_PRECONNECT_DATA;
if (mp_so->so_flags1 & SOF1_DATA_IDEMPOTENT)
(*so)->so_flags1 |= SOF1_DATA_IDEMPOTENT;
if (!uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) {
struct mptcb *mp_tp = mpte->mpte_mptcb;
sotoinpcb(*so)->necp_cb = mptcp_subflow_necp_cb;
mpte_unlock(mpte);
error = necp_client_register_socket_flow(mp_so->last_pid,
mpsotomppcb(mp_so)->necp_client_uuid, sotoinpcb(*so));
mpte_lock(mpte);
if (error)
goto out_err;
if (mp_tp->mpt_state >= MPTCPS_TIME_WAIT ||
(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP))
goto out_err;
uuid_copy(sotoinpcb(*so)->necp_client_uuid, mpsotomppcb(mp_so)->necp_client_uuid);
} else {
mptcplog((LOG_NOTICE, "%s: uuid is not set!\n"),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
}
bzero(&smpo, sizeof (smpo));
smpo.mpo_flags |= MPOF_SUBFLOW_OK;
smpo.mpo_level = SOL_SOCKET;
smpo.mpo_intval = 1;
smpo.mpo_name = SO_NOSIGPIPE;
if ((error = mptcp_subflow_sosetopt(mpte, mpts, &smpo)) != 0)
goto out_err;
smpo.mpo_name = SO_NOADDRERR;
if ((error = mptcp_subflow_sosetopt(mpte, mpts, &smpo)) != 0)
goto out_err;
smpo.mpo_name = SO_KEEPALIVE;
if ((error = mptcp_subflow_sosetopt(mpte, mpts, &smpo)) != 0)
goto out_err;
smpo.mpo_level = IPPROTO_TCP;
smpo.mpo_intval = mptcp_subflow_keeptime;
smpo.mpo_name = TCP_KEEPALIVE;
if ((error = mptcp_subflow_sosetopt(mpte, mpts, &smpo)) != 0)
goto out_err;
if (mpte->mpte_mptcb->mpt_state >= MPTCPS_ESTABLISHED) {
smpo.mpo_level = SOL_SOCKET;
smpo.mpo_name = SO_MARK_CELLFALLBACK;
smpo.mpo_intval = 1;
if ((error = mptcp_subflow_sosetopt(mpte, mpts, &smpo)) != 0)
goto out_err;
}
TAILQ_FOREACH_SAFE(mpo, &mpte->mpte_sopts, mpo_entry, tmpo) {
int interim;
if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK))
continue;
if (mpo->mpo_level == SOL_SOCKET &&
(mpo->mpo_name == SO_NOSIGPIPE ||
mpo->mpo_name == SO_NOADDRERR ||
mpo->mpo_name == SO_KEEPALIVE))
continue;
interim = (mpo->mpo_flags & MPOF_INTERIM);
if (mptcp_subflow_sosetopt(mpte, mpts, mpo) != 0 && interim) {
mptcplog((LOG_ERR, "%s: subflow socreate mp_so 0x%llx"
" sopt %s val %d interim record removed\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name),
mpo->mpo_intval),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
mptcp_sopt_remove(mpte, mpo);
mptcp_sopt_free(mpo);
continue;
}
}
switch (dom) {
case PF_INET:
(*so)->so_proto = &mptcp_subflow_protosw;
break;
#if INET6
case PF_INET6:
(*so)->so_proto = (struct protosw *)&mptcp_subflow_protosw6;
break;
#endif
default:
VERIFY(0);
}
proc_rele(p);
DTRACE_MPTCP3(subflow__create, struct mptses *, mpte,
int, dom, int, error);
return (0);
out_err:
mptcp_subflow_abort(mpts, error);
proc_rele(p);
mptcplog((LOG_ERR, "%s: subflow socreate failed with error %d\n",
__func__, error), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
return (error);
}
static void
mptcp_subflow_soclose(struct mptsub *mpts)
{
struct socket *so = mpts->mpts_socket;
if (mpts->mpts_flags & MPTSF_CLOSED)
return;
VERIFY(so != NULL);
VERIFY(so->so_flags & SOF_MP_SUBFLOW);
VERIFY((so->so_state & (SS_NBIO|SS_NOFDREF)) == (SS_NBIO|SS_NOFDREF));
DTRACE_MPTCP5(subflow__close, struct mptsub *, mpts,
struct socket *, so,
struct sockbuf *, &so->so_rcv,
struct sockbuf *, &so->so_snd,
struct mptses *, mpts->mpts_mpte);
mpts->mpts_flags |= MPTSF_CLOSED;
if (so->so_retaincnt == 0) {
soclose_locked(so);
return;
} else {
VERIFY(so->so_usecount > 0);
so->so_usecount--;
}
return;
}
static int
mptcp_subflow_soconnectx(struct mptses *mpte, struct mptsub *mpts)
{
char dbuf[MAX_IPv6_STR_LEN];
struct socket *mp_so, *so;
struct mptcb *mp_tp;
struct sockaddr *dst;
struct proc *p;
int af, error;
mpte_lock_assert_held(mpte);
mp_so = mptetoso(mpte);
mp_tp = mpte->mpte_mptcb;
p = proc_find(mp_so->last_pid);
if (p == PROC_NULL) {
mptcplog((LOG_ERR, "%s: Couldn't find proc for pid %u\n", __func__, mp_so->last_pid),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
return (ESRCH);
}
so = mpts->mpts_socket;
af = mpts->mpts_dst.sa_family;
VERIFY((mpts->mpts_flags & (MPTSF_CONNECTING|MPTSF_CONNECTED)) == MPTSF_CONNECTING);
VERIFY(mpts->mpts_socket != NULL);
VERIFY(af == AF_INET || af == AF_INET6);
dst = &mpts->mpts_dst;
mptcplog((LOG_DEBUG, "%s: connectx mp_so 0x%llx dst %s[%d] cid %d [pended %s]\n",
__func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
inet_ntop(af, ((af == AF_INET) ? (void *)&SIN(dst)->sin_addr.s_addr :
(void *)&SIN6(dst)->sin6_addr),
dbuf, sizeof (dbuf)),
((af == AF_INET) ? ntohs(SIN(dst)->sin_port) : ntohs(SIN6(dst)->sin6_port)),
mpts->mpts_connid,
((mpts->mpts_flags & MPTSF_CONNECT_PENDING) ? "YES" : "NO")),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
mpts->mpts_flags &= ~MPTSF_CONNECT_PENDING;
mptcp_attach_to_subf(so, mpte->mpte_mptcb, mpte->mpte_addrid_last);
error = soconnectxlocked(so, mpts->mpts_src, &mpts->mpts_dst,
p, mpts->mpts_ifscope,
mpte->mpte_associd, NULL, 0, NULL, 0, NULL, NULL);
mpts->mpts_iss = sototcpcb(so)->iss;
if (mp_tp->mpt_state < MPTCPS_ESTABLISHED &&
(mp_so->so_flags1 & SOF1_PRECONNECT_DATA)) {
mp_tp->mpt_sndwnd = sototcpcb(so)->snd_wnd;
}
mpte->mpte_addrid_last++;
if (mpte->mpte_addrid_last == 0)
mpte->mpte_addrid_last++;
proc_rele(p);
DTRACE_MPTCP3(subflow__connect, struct mptses *, mpte,
struct mptsub *, mpts, int, error);
if (error)
mptcplog((LOG_ERR, "%s: connectx failed with error %d ifscope %u\n",
__func__, error, mpts->mpts_ifscope),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
return (error);
}
static int
mptcp_subflow_soreceive(struct socket *so, struct sockaddr **psa,
struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
{
#pragma unused(uio)
struct socket *mp_so = mptetoso(tptomptp(sototcpcb(so))->mpt_mpte);
int flags, error = 0;
struct proc *p = current_proc();
struct mbuf *m, **mp = mp0;
boolean_t proc_held = FALSE;
mpte_lock_assert_held(tptomptp(sototcpcb(so))->mpt_mpte);
VERIFY(so->so_proto->pr_flags & PR_CONNREQUIRED);
#ifdef MORE_LOCKING_DEBUG
if (so->so_usecount == 1) {
panic("%s: so=%x no other reference on socket\n", __func__, so);
}
#endif
if (mp == NULL || controlp != NULL)
return (EINVAL);
*mp = NULL;
if (psa != NULL)
*psa = NULL;
if (flagsp != NULL)
flags = *flagsp &~ MSG_EOR;
else
flags = 0;
if (flags & (MSG_PEEK|MSG_OOB|MSG_NEEDSA|MSG_WAITALL|MSG_WAITSTREAM))
return (EOPNOTSUPP);
flags |= (MSG_DONTWAIT|MSG_NBIO);
if (so->so_flags & SOF_DEFUNCT) {
struct sockbuf *sb = &so->so_rcv;
error = ENOTCONN;
if (so->so_state & SS_DEFUNCT)
sb_empty_assert(sb, __func__);
return (error);
}
if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) ==
(SS_NOFDREF | SS_CANTRCVMORE) && !(so->so_flags & SOF_MP_SUBFLOW))
return (0);
error = sblock(&so->so_rcv, 0);
if (error != 0)
return (error);
m = so->so_rcv.sb_mb;
if (m == NULL) {
SB_MB_CHECK(&so->so_rcv);
if (so->so_error != 0) {
error = so->so_error;
so->so_error = 0;
goto release;
}
if (so->so_state & SS_CANTRCVMORE) {
goto release;
}
if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING))) {
error = ENOTCONN;
goto release;
}
error = EWOULDBLOCK;
goto release;
}
mptcp_update_last_owner(so, mp_so);
if (mp_so->last_pid != proc_pid(p)) {
p = proc_find(mp_so->last_pid);
if (p == PROC_NULL) {
p = current_proc();
} else {
proc_held = TRUE;
}
}
OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv);
SBLASTRECORDCHK(&so->so_rcv, "mptcp_subflow_soreceive 1");
SBLASTMBUFCHK(&so->so_rcv, "mptcp_subflow_soreceive 1");
while (m != NULL) {
int dlen = 0, dfin = 0, error_out = 0;
struct mbuf *start = m;
uint64_t dsn;
uint32_t sseq;
uint16_t orig_dlen;
uint16_t csum;
VERIFY(m->m_nextpkt == NULL);
if ((m->m_flags & M_PKTHDR) && (m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
orig_dlen = dlen = m->m_pkthdr.mp_rlen;
dsn = m->m_pkthdr.mp_dsn;
sseq = m->m_pkthdr.mp_rseq;
csum = m->m_pkthdr.mp_csum;
} else {
mptcp_adj_rmap(so, m, 0, 0, 0, 0);
sbfree(&so->so_rcv, m);
if (mp != NULL) {
*mp = m;
mp = &m->m_next;
so->so_rcv.sb_mb = m = m->m_next;
*mp = NULL;
}
if (m != NULL) {
so->so_rcv.sb_lastrecord = m;
} else {
SB_EMPTY_FIXUP(&so->so_rcv);
}
continue;
}
if (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN)
dfin = 1;
if ((int)so->so_rcv.sb_cc < dlen - dfin) {
mptcplog((LOG_INFO, "%s not enough data (%u) need %u\n",
__func__, so->so_rcv.sb_cc, dlen),
MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG);
if (*mp0 == NULL)
error = EWOULDBLOCK;
goto release;
}
while (dlen > 0) {
if (mptcp_adj_rmap(so, m, orig_dlen - dlen, dsn, sseq, orig_dlen)) {
error_out = 1;
error = EIO;
dlen = 0;
soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
break;
}
dlen -= m->m_len;
sbfree(&so->so_rcv, m);
if (mp != NULL) {
*mp = m;
mp = &m->m_next;
so->so_rcv.sb_mb = m = m->m_next;
*mp = NULL;
}
if (dlen - dfin == 0)
dlen = 0;
VERIFY(dlen <= 0 || m);
}
VERIFY(dlen == 0);
if (m != NULL) {
so->so_rcv.sb_lastrecord = m;
} else {
SB_EMPTY_FIXUP(&so->so_rcv);
}
if (error_out)
goto release;
if (mptcp_validate_csum(sototcpcb(so), start, dsn, sseq, orig_dlen, csum, dfin)) {
error = EIO;
*mp0 = NULL;
goto release;
}
SBLASTRECORDCHK(&so->so_rcv, "mptcp_subflow_soreceive 2");
SBLASTMBUFCHK(&so->so_rcv, "mptcp_subflow_soreceive 2");
}
DTRACE_MPTCP3(subflow__receive, struct socket *, so,
struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd);
if (flagsp != NULL)
*flagsp |= flags;
release:
sbunlock(&so->so_rcv, TRUE);
if (proc_held)
proc_rele(p);
return (error);
}
static int
mptcp_subflow_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
struct mbuf *top, struct mbuf *control, int flags)
{
struct socket *mp_so = mptetoso(tptomptp(sototcpcb(so))->mpt_mpte);
struct proc *p = current_proc();
boolean_t en_tracing = FALSE, proc_held = FALSE;
int en_tracing_val;
int sblocked = 1;
int error;
VERIFY(control == NULL);
VERIFY(addr == NULL);
VERIFY(uio == NULL);
VERIFY(flags == 0);
VERIFY((so->so_flags & SOF_CONTENT_FILTER) == 0);
VERIFY(top->m_pkthdr.len > 0 && top->m_pkthdr.len <= UINT16_MAX);
VERIFY(top->m_pkthdr.pkt_flags & PKTF_MPTCP);
if (ENTR_SHOULDTRACE &&
(SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
struct inpcb *inp = sotoinpcb(so);
if (inp->inp_last_outifp != NULL &&
!(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) {
en_tracing = TRUE;
en_tracing_val = top->m_pkthdr.len;
KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_START,
VM_KERNEL_ADDRPERM(so),
((so->so_state & SS_NBIO) ? kEnTrFlagNonBlocking : 0),
(int64_t)en_tracing_val);
}
}
mptcp_update_last_owner(so, mp_so);
if (mp_so->last_pid != proc_pid(p)) {
p = proc_find(mp_so->last_pid);
if (p == PROC_NULL) {
p = current_proc();
} else {
proc_held = TRUE;
}
}
#if NECP
inp_update_necp_policy(sotoinpcb(so), NULL, NULL, 0);
#endif
OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
error = sosendcheck(so, NULL, top->m_pkthdr.len, 0, 1, 0, &sblocked, NULL);
if (error)
goto out;
error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, top, NULL, NULL, p);
top = NULL;
out:
if (top != NULL)
m_freem(top);
if (proc_held)
proc_rele(p);
soclearfastopen(so);
if (en_tracing) {
KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_END,
VM_KERNEL_ADDRPERM(so),
((error == EWOULDBLOCK) ? kEnTrFlagNoWork : 0),
(int64_t)en_tracing_val);
}
return (error);
}
int
mptcp_subflow_add(struct mptses *mpte, struct sockaddr *src,
struct sockaddr *dst, uint32_t ifscope, sae_connid_t *pcid)
{
struct socket *mp_so, *so = NULL;
struct mptcb *mp_tp;
struct mptsub *mpts = NULL;
int af, error = 0;
mpte_lock_assert_held(mpte);
mp_so = mptetoso(mpte);
mp_tp = mpte->mpte_mptcb;
if (mp_tp->mpt_state >= MPTCPS_CLOSE_WAIT) {
mptcplog((LOG_ERR, "%s state %u\n", __func__, mp_tp->mpt_state),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
error = ENOTCONN;
goto out_err;
}
mpts = mptcp_subflow_alloc();
if (mpts == NULL) {
mptcplog((LOG_ERR, "%s malloc subflow failed\n", __func__),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
error = ENOMEM;
goto out_err;
}
if (src != NULL) {
int len = src->sa_len;
MALLOC(mpts->mpts_src, struct sockaddr *, len, M_SONAME,
M_WAITOK | M_ZERO);
if (mpts->mpts_src == NULL) {
mptcplog((LOG_ERR, "%s malloc mpts_src failed", __func__),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
error = ENOMEM;
goto out_err;
}
bcopy(src, mpts->mpts_src, len);
}
memcpy(&mpts->mpts_dst, dst, dst->sa_len);
af = mpts->mpts_dst.sa_family;
mpts->mpts_ifscope = ifscope;
if ((error = mptcp_subflow_socreate(mpte, mpts, af, &so)) != 0)
return (error);
mptcp_update_last_owner(mpts->mpts_socket, mp_so);
mpte->mpte_connid_last++;
if (mpte->mpte_connid_last == SAE_CONNID_ALL ||
mpte->mpte_connid_last == SAE_CONNID_ANY)
mpte->mpte_connid_last++;
mpts->mpts_connid = mpte->mpte_connid_last;
mpts->mpts_rel_seq = 1;
mpte->mpte_addrid_last++;
if (mpte->mpte_addrid_last == 0)
mpte->mpte_addrid_last++;
sock_setupcalls_locked(so, mptcp_subflow_rupcall, mpts, mptcp_subflow_wupcall, mpts, 1);
sock_catchevents_locked(so, mptcp_subflow_eupcall1, mpts,
SO_FILT_HINT_CONNRESET | SO_FILT_HINT_CANTRCVMORE |
SO_FILT_HINT_TIMEOUT | SO_FILT_HINT_NOSRCADDR |
SO_FILT_HINT_IFDENIED | SO_FILT_HINT_CONNECTED |
SO_FILT_HINT_DISCONNECTED | SO_FILT_HINT_MPFAILOVER |
SO_FILT_HINT_MPSTATUS | SO_FILT_HINT_MUSTRST |
SO_FILT_HINT_MPCANTRCVMORE | SO_FILT_HINT_ADAPTIVE_RTIMO |
SO_FILT_HINT_ADAPTIVE_WTIMO);
VERIFY(!(mpts->mpts_flags &
(MPTSF_CONNECTING|MPTSF_CONNECTED|MPTSF_CONNECT_PENDING)));
if (mp_tp->mpt_state < MPTCPS_ESTABLISHED && mpte->mpte_numflows == 1) {
mpts->mpts_flags |= MPTSF_INITIAL_SUB;
if (mp_tp->mpt_state == MPTCPS_CLOSED) {
mptcp_init_local_parms(mpte);
}
soisconnecting(mp_so);
if (so->so_flags1 & SOF1_PRECONNECT_DATA)
mpts->mpts_flags |= MPTSF_TFO_REQD;
} else {
if (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY))
mpts->mpts_flags |= MPTSF_CONNECT_PENDING;
}
mpts->mpts_flags |= MPTSF_CONNECTING;
if (af == AF_INET || af == AF_INET6) {
char dbuf[MAX_IPv6_STR_LEN];
mptcplog((LOG_DEBUG, "MPTCP Socket: %s "
"mp_so 0x%llx dst %s[%d] cid %d "
"[pending %s]\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
inet_ntop(af, ((af == AF_INET) ?
(void *)&SIN(&mpts->mpts_dst)->sin_addr.s_addr :
(void *)&SIN6(&mpts->mpts_dst)->sin6_addr),
dbuf, sizeof (dbuf)), ((af == AF_INET) ?
ntohs(SIN(&mpts->mpts_dst)->sin_port) :
ntohs(SIN6(&mpts->mpts_dst)->sin6_port)),
mpts->mpts_connid,
((mpts->mpts_flags & MPTSF_CONNECT_PENDING) ?
"YES" : "NO")),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
}
if (!(mpts->mpts_flags & MPTSF_CONNECT_PENDING))
error = mptcp_subflow_soconnectx(mpte, mpts);
if (error)
goto out_err_close;
if (pcid)
*pcid = mpts->mpts_connid;
return (0);
out_err_close:
mptcp_subflow_abort(mpts, error);
return (error);
out_err:
if (mpts)
mptcp_subflow_free(mpts);
return (error);
}
void
mptcpstats_update(struct mptcp_itf_stats *stats, struct mptsub *mpts)
{
int index = mptcp_get_statsindex(stats, mpts);
if (index != -1) {
struct inpcb *inp = sotoinpcb(mpts->mpts_socket);
stats[index].mpis_txbytes += inp->inp_stat->txbytes;
stats[index].mpis_rxbytes += inp->inp_stat->rxbytes;
}
}
void
mptcp_subflow_del(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *mp_so = mptetoso(mpte);
struct socket *so = mpts->mpts_socket;
struct tcpcb *tp = sototcpcb(so);
mpte_lock_assert_held(mpte);
VERIFY(mpts->mpts_mpte == mpte);
VERIFY(mpts->mpts_flags & MPTSF_ATTACHED);
VERIFY(mpte->mpte_numflows != 0);
VERIFY(mp_so->so_usecount > 0);
mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx [u=%d,r=%d] cid %d %x error %d\n",
__func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mp_so->so_usecount, mp_so->so_retaincnt, mpts->mpts_connid,
mpts->mpts_flags, mp_so->so_error),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
mptcpstats_update(mpte->mpte_itfstats, mpts);
mpte->mpte_init_rxbytes = sotoinpcb(so)->inp_stat->rxbytes;
mpte->mpte_init_txbytes = sotoinpcb(so)->inp_stat->txbytes;
atomic_bitclear_32(&mpts->mpts_flags, MPTSF_ATTACHED);
TAILQ_REMOVE(&mpte->mpte_subflows, mpts, mpts_entry);
mpte->mpte_numflows--;
if (mpte->mpte_active_sub == mpts)
mpte->mpte_active_sub = NULL;
sock_setupcalls_locked(so, NULL, NULL, NULL, NULL, 0);
sock_catchevents_locked(so, NULL, NULL, 0);
mptcp_detach_mptcb_from_subf(mpte->mpte_mptcb, so);
mp_so->so_usecount--;
mpts->mpts_mpte = NULL;
mpts->mpts_socket = NULL;
mptcp_subflow_remref(mpts);
mptcp_subflow_remref(mpts);
so->so_flags &= ~SOF_MP_SUBFLOW;
tp->t_mptcb = NULL;
tp->t_mpsub = NULL;
}
void
mptcp_subflow_shutdown(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *so = mpts->mpts_socket;
struct mptcb *mp_tp = mpte->mpte_mptcb;
int send_dfin = 0;
if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT)
send_dfin = 1;
if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) &&
(so->so_state & SS_ISCONNECTED)) {
mptcplog((LOG_DEBUG, "MPTCP subflow shutdown %s: cid %d fin %d\n",
__func__, mpts->mpts_connid, send_dfin),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
if (send_dfin)
mptcp_send_dfin(so);
soshutdownlock(so, SHUT_WR);
}
}
static void
mptcp_subflow_abort(struct mptsub *mpts, int error)
{
struct socket *so = mpts->mpts_socket;
struct tcpcb *tp = sototcpcb(so);
if (mpts->mpts_flags & MPTSF_DISCONNECTED)
return;
mptcplog((LOG_DEBUG, "%s aborting connection state %u\n", __func__, tp->t_state),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
if (tp->t_state != TCPS_CLOSED)
tcp_drop(tp, error);
mptcp_subflow_eupcall1(so, mpts, SO_FILT_HINT_DISCONNECTED);
}
void
mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *so;
struct mptcb *mp_tp;
int send_dfin = 0;
mpte_lock_assert_held(mpte);
VERIFY(mpts->mpts_mpte == mpte);
VERIFY(mpts->mpts_socket != NULL);
if (mpts->mpts_flags & (MPTSF_DISCONNECTING|MPTSF_DISCONNECTED))
return;
mpts->mpts_flags |= MPTSF_DISCONNECTING;
so = mpts->mpts_socket;
mp_tp = mpte->mpte_mptcb;
if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT)
send_dfin = 1;
if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) &&
(so->so_state & SS_ISCONNECTED)) {
mptcplog((LOG_DEBUG, "MPTCP Socket %s: cid %d fin %d\n",
__func__, mpts->mpts_connid, send_dfin),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
if (send_dfin)
mptcp_send_dfin(so);
(void) soshutdownlock(so, SHUT_RD);
(void) soshutdownlock(so, SHUT_WR);
(void) sodisconnectlocked(so);
}
mptcp_subflow_eupcall1(so, mpts, SO_FILT_HINT_DISCONNECTED);
}
static void
mptcp_subflow_rupcall(struct socket *so, void *arg, int waitf)
{
#pragma unused(so, waitf)
struct mptsub *mpts = arg, *tmpts;
struct mptses *mpte = mpts->mpts_mpte;
VERIFY(mpte != NULL);
if (mptcp_should_defer_upcall(mpte->mpte_mppcb)) {
if (!(mpte->mpte_mppcb->mpp_flags & MPP_RUPCALL))
mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_RWAKEUP;
return;
}
mpte->mpte_mppcb->mpp_flags |= MPP_RUPCALL;
TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
if (mpts->mpts_socket->so_usecount == 0) {
continue;
}
mptcp_subflow_addref(mpts);
mpts->mpts_socket->so_usecount++;
mptcp_subflow_input(mpte, mpts);
mptcp_subflow_remref(mpts);
VERIFY(mpts->mpts_socket->so_usecount != 0);
mpts->mpts_socket->so_usecount--;
}
mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_RUPCALL);
}
static void
mptcp_subflow_input(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *mp_so = mptetoso(mpte);
struct mbuf *m = NULL;
struct socket *so;
int error, wakeup = 0;
VERIFY(!(mpte->mpte_mppcb->mpp_flags & MPP_INSIDE_INPUT));
mpte->mpte_mppcb->mpp_flags |= MPP_INSIDE_INPUT;
DTRACE_MPTCP2(subflow__input, struct mptses *, mpte,
struct mptsub *, mpts);
if (!(mpts->mpts_flags & MPTSF_CONNECTED))
goto out;
so = mpts->mpts_socket;
error = sock_receive_internal(so, NULL, &m, 0, NULL);
if (error != 0 && error != EWOULDBLOCK) {
mptcplog((LOG_ERR, "%s: cid %d error %d\n",
__func__, mpts->mpts_connid, error),
MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR);
if (error == ENODATA) {
mp_so->so_error = ENODATA;
wakeup = 1;
goto out;
}
} else if (error == 0) {
mptcplog((LOG_DEBUG, "%s: cid %d \n", __func__, mpts->mpts_connid),
MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
}
if (m && (mpts->mpts_flags & MPTSF_MP_DEGRADED) &&
!(mpts->mpts_flags & MPTSF_ACTIVE)) {
mptcplog((LOG_DEBUG, "%s: degraded and got data on non-active flow\n",
__func__), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
m_freem(m);
goto out;
}
if (m != NULL) {
if (IFNET_IS_CELLULAR(sotoinpcb(so)->inp_last_outifp)) {
mpte->mpte_mppcb->mpp_flags |= MPP_SET_CELLICON;
mpte->mpte_used_cell = 1;
} else {
mpte->mpte_mppcb->mpp_flags |= MPP_UNSET_CELLICON;
mpte->mpte_used_wifi = 1;
}
mptcp_input(mpte, m);
}
if (error == 0 && m != NULL &&
(so->so_proto->pr_flags & PR_WANTRCVD) && so->so_pcb != NULL)
(*so->so_proto->pr_usrreqs->pru_rcvd)(so, 0);
out:
if (wakeup)
mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_RWAKEUP;
mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_INSIDE_INPUT);
}
static void
mptcp_subflow_wupcall(struct socket *so, void *arg, int waitf)
{
#pragma unused(so, waitf)
struct mptsub *mpts = arg;
struct mptses *mpte = mpts->mpts_mpte;
VERIFY(mpte != NULL);
if (mptcp_should_defer_upcall(mpte->mpte_mppcb)) {
if (!(mpte->mpte_mppcb->mpp_flags & MPP_WUPCALL))
mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_WWAKEUP;
return;
}
mptcp_output(mpte);
}
int
mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts, int flags)
{
struct mptcb *mp_tp = mpte->mpte_mptcb;
struct mbuf *sb_mb, *m, *mpt_mbuf = NULL, *head, *tail;
struct socket *mp_so, *so;
struct tcpcb *tp;
uint64_t mpt_dsn = 0, off = 0;
int sb_cc = 0, error = 0, wakeup = 0;
uint32_t dss_csum;
uint16_t tot_sent = 0;
boolean_t reinjected = FALSE;
mpte_lock_assert_held(mpte);
mp_so = mptetoso(mpte);
so = mpts->mpts_socket;
tp = sototcpcb(so);
VERIFY(!(mpte->mpte_mppcb->mpp_flags & MPP_INSIDE_OUTPUT));
mpte->mpte_mppcb->mpp_flags |= MPP_INSIDE_OUTPUT;
VERIFY(!INP_WAIT_FOR_IF_FEEDBACK(sotoinpcb(so)));
VERIFY((mpts->mpts_flags & MPTSF_MP_CAPABLE) ||
(mpts->mpts_flags & MPTSF_MP_DEGRADED) ||
(mpts->mpts_flags & MPTSF_TFO_REQD));
VERIFY(mptcp_subflow_cwnd_space(mpts->mpts_socket) > 0);
mptcplog((LOG_DEBUG, "%s mpts_flags %#x, mpte_flags %#x cwnd_space %u\n",
__func__, mpts->mpts_flags, mpte->mpte_flags,
mptcp_subflow_cwnd_space(so)),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
DTRACE_MPTCP2(subflow__output, struct mptses *, mpte,
struct mptsub *, mpts);
if (mpte->mpte_flags & MPTE_SND_REM_ADDR) {
tp->t_rem_aid = mpte->mpte_lost_aid;
tp->t_mpflags |= TMPF_SND_REM_ADDR;
mpte->mpte_flags &= ~MPTE_SND_REM_ADDR;
}
if (mpte->mpte_reinjectq)
sb_mb = mpte->mpte_reinjectq;
else
sb_mb = mp_so->so_snd.sb_mb;
if (sb_mb == NULL) {
mptcplog((LOG_ERR, "%s: No data in MPTCP-sendbuffer! smax %u snxt %u suna %u\n",
__func__, (uint32_t)mp_tp->mpt_sndmax, (uint32_t)mp_tp->mpt_sndnxt, (uint32_t)mp_tp->mpt_snduna),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR);
goto out;
}
VERIFY(sb_mb->m_pkthdr.pkt_flags & PKTF_MPTCP);
if (sb_mb->m_pkthdr.mp_rlen == 0 &&
!(so->so_state & SS_ISCONNECTED) &&
(so->so_flags1 & SOF1_PRECONNECT_DATA)) {
tp->t_mpflags |= TMPF_TFO_REQUEST;
goto zero_len_write;
}
mpt_dsn = sb_mb->m_pkthdr.mp_dsn;
if (MPTCP_SEQ_LT(mpt_dsn, mp_tp->mpt_snduna)) {
mptcplog((LOG_ERR, "%s: dropping data, should have been done earlier "
"dsn %u suna %u reinject? %u\n",
__func__, (uint32_t)mpt_dsn,
(uint32_t)mp_tp->mpt_snduna, !!mpte->mpte_reinjectq),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR);
if (mpte->mpte_reinjectq) {
mptcp_clean_reinjectq(mpte);
} else {
uint64_t len = 0;
len = mp_tp->mpt_snduna - mpt_dsn;
sbdrop(&mp_so->so_snd, (int)len);
wakeup = 1;
}
}
if (mp_so->so_snd.sb_mb == NULL && mpte->mpte_reinjectq == NULL) {
mptcplog((LOG_ERR, "%s send-buffer is empty\n", __func__),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR);
goto out;
}
if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) &&
(mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC) &&
mp_so->so_snd.sb_mb) {
mpt_dsn = mp_so->so_snd.sb_mb->m_pkthdr.mp_dsn;
if (MPTCP_SEQ_LT(mpt_dsn, mp_tp->mpt_snduna)) {
uint64_t len = 0;
len = mp_tp->mpt_snduna - mpt_dsn;
sbdrop(&mp_so->so_snd, (int)len);
wakeup = 1;
mptcplog((LOG_ERR, "%s: dropping data in degraded mode, should have been done earlier dsn %u sndnxt %u suna %u\n",
__func__, (uint32_t)mpt_dsn, (uint32_t)mp_tp->mpt_sndnxt, (uint32_t)mp_tp->mpt_snduna),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR);
}
}
if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) &&
!(mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC)) {
mp_tp->mpt_flags |= MPTCPF_POST_FALLBACK_SYNC;
so->so_flags1 |= SOF1_POST_FALLBACK_SYNC;
}
if (MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mp_tp->mpt_snduna))
mp_tp->mpt_sndnxt = mp_tp->mpt_snduna;
if (mpte->mpte_reinjectq)
sb_mb = mpte->mpte_reinjectq;
else
sb_mb = mp_so->so_snd.sb_mb;
if (sb_mb == NULL) {
mptcplog((LOG_ERR, "%s send-buffer is still empty\n", __func__),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR);
goto out;
}
if (mpte->mpte_reinjectq) {
sb_cc = sb_mb->m_pkthdr.mp_rlen;
} else if (flags & MPTCP_SUBOUT_PROBING) {
sb_cc = sb_mb->m_pkthdr.mp_rlen;
off = 0;
} else {
sb_cc = min(mp_so->so_snd.sb_cc, mp_tp->mpt_sndwnd);
if ((mp_so->so_flags1 & SOF1_PRECONNECT_DATA) ||
MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax)) {
off = mp_tp->mpt_sndnxt - mp_tp->mpt_snduna;
sb_cc -= off;
} else {
mptcplog((LOG_ERR, "%s this should not happen: sndnxt %u sndmax %u\n",
__func__, (uint32_t)mp_tp->mpt_sndnxt,
(uint32_t)mp_tp->mpt_sndmax),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR);
goto out;
}
}
sb_cc = min(sb_cc, mptcp_subflow_cwnd_space(so));
if (sb_cc <= 0) {
mptcplog((LOG_ERR, "%s sb_cc is %d, mp_so->sb_cc %u, sndwnd %u,sndnxt %u sndmax %u cwnd %u\n",
__func__, sb_cc, mp_so->so_snd.sb_cc, mp_tp->mpt_sndwnd,
(uint32_t)mp_tp->mpt_sndnxt, (uint32_t)mp_tp->mpt_sndmax,
mptcp_subflow_cwnd_space(so)),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR);
}
sb_cc = min(sb_cc, UINT16_MAX);
if (mpte->mpte_reinjectq)
mpt_dsn = sb_mb->m_pkthdr.mp_dsn;
else
mpt_dsn = mp_tp->mpt_snduna + off;
mpt_mbuf = sb_mb;
while (mpt_mbuf && mpte->mpte_reinjectq == NULL &&
(mpt_mbuf->m_pkthdr.mp_rlen == 0 ||
mpt_mbuf->m_pkthdr.mp_rlen <= (uint32_t)off)) {
off -= mpt_mbuf->m_pkthdr.mp_rlen;
mpt_mbuf = mpt_mbuf->m_next;
}
if (mpts->mpts_flags & MPTSF_MP_DEGRADED)
mptcplog((LOG_DEBUG, "%s: %u snduna = %u sndnxt = %u probe %d\n",
__func__, mpts->mpts_connid, (uint32_t)mp_tp->mpt_snduna, (uint32_t)mp_tp->mpt_sndnxt,
mpts->mpts_probecnt),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
VERIFY((mpt_mbuf == NULL) || (mpt_mbuf->m_pkthdr.pkt_flags & PKTF_MPTCP));
head = tail = NULL;
while (tot_sent < sb_cc) {
ssize_t mlen;
mlen = mpt_mbuf->m_len;
mlen -= off;
mlen = min(mlen, sb_cc - tot_sent);
if (mlen < 0) {
mptcplog((LOG_ERR, "%s mlen %d mp_rlen %u off %u sb_cc %u tot_sent %u\n",
__func__, (int)mlen, mpt_mbuf->m_pkthdr.mp_rlen,
(uint32_t)off, sb_cc, tot_sent),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR);
goto out;
}
if (mlen == 0)
goto next;
m = m_copym_mode(mpt_mbuf, (int)off, mlen, M_DONTWAIT,
M_COPYM_MUST_COPY_HDR);
if (m == NULL) {
mptcplog((LOG_ERR, "%s m_copym_mode failed\n", __func__),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR);
error = ENOBUFS;
break;
}
VERIFY(m->m_flags & M_PKTHDR);
VERIFY(m->m_next == NULL);
m->m_pkthdr.pkt_flags |= PKTF_MPTCP;
m->m_pkthdr.pkt_flags &= ~PKTF_MPSO;
m->m_pkthdr.mp_dsn = mpt_dsn;
m->m_pkthdr.mp_rseq = mpts->mpts_rel_seq;
m->m_pkthdr.len = mlen;
if (head == NULL) {
head = tail = m;
} else {
tail->m_next = m;
tail = m;
}
tot_sent += mlen;
off = 0;
next:
mpt_mbuf = mpt_mbuf->m_next;
}
if (mpte->mpte_reinjectq) {
reinjected = TRUE;
if (sb_cc < sb_mb->m_pkthdr.mp_rlen) {
struct mbuf *n = sb_mb;
while (n) {
n->m_pkthdr.mp_dsn += sb_cc;
n->m_pkthdr.mp_rlen -= sb_cc;
n = n->m_next;
}
m_adj(sb_mb, sb_cc);
} else {
mpte->mpte_reinjectq = sb_mb->m_nextpkt;
m_freem(sb_mb);
}
}
mptcplog((LOG_DEBUG, "%s: Queued dsn %u ssn %u len %u on sub %u\n",
__func__, (uint32_t)mpt_dsn, mpts->mpts_rel_seq,
tot_sent, mpts->mpts_connid), MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
if (head && (mp_tp->mpt_flags & MPTCPF_CHECKSUM)) {
dss_csum = mptcp_output_csum(head, mpt_dsn, mpts->mpts_rel_seq,
tot_sent);
}
mpts->mpts_rel_seq += tot_sent;
m = head;
while (m) {
if (mp_tp->mpt_flags & MPTCPF_CHECKSUM)
m->m_pkthdr.mp_csum = dss_csum;
m->m_pkthdr.mp_rlen = tot_sent;
m = m->m_next;
}
if (head != NULL) {
if ((mpts->mpts_flags & MPTSF_TFO_REQD) &&
(tp->t_tfo_stats == 0))
tp->t_mpflags |= TMPF_TFO_REQUEST;
error = sock_sendmbuf(so, NULL, head, 0, NULL);
DTRACE_MPTCP7(send, struct mbuf *, m, struct socket *, so,
struct sockbuf *, &so->so_rcv,
struct sockbuf *, &so->so_snd,
struct mptses *, mpte, struct mptsub *, mpts,
size_t, tot_sent);
}
done_sending:
if (error == 0 ||
(error == EWOULDBLOCK && (tp->t_mpflags & TMPF_TFO_REQUEST))) {
uint64_t new_sndnxt = mp_tp->mpt_sndnxt + tot_sent;
if (mpts->mpts_probesoon && mpts->mpts_maxseg && tot_sent) {
tcpstat.tcps_mp_num_probes++;
if ((uint32_t)tot_sent < mpts->mpts_maxseg)
mpts->mpts_probecnt += 1;
else
mpts->mpts_probecnt +=
tot_sent/mpts->mpts_maxseg;
}
if (!reinjected && !(flags & MPTCP_SUBOUT_PROBING)) {
if (MPTCP_DATASEQ_HIGH32(new_sndnxt) >
MPTCP_DATASEQ_HIGH32(mp_tp->mpt_sndnxt))
mp_tp->mpt_flags |= MPTCPF_SND_64BITDSN;
mp_tp->mpt_sndnxt = new_sndnxt;
}
mptcp_cancel_timer(mp_tp, MPTT_REXMT);
soclearfastopen(mp_so);
if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) ||
(mpts->mpts_probesoon != 0))
mptcplog((LOG_DEBUG, "%s %u degraded %u wrote %d %d probe %d probedelta %d\n",
__func__, mpts->mpts_connid,
!!(mpts->mpts_flags & MPTSF_MP_DEGRADED),
tot_sent, (int) sb_cc, mpts->mpts_probecnt,
(tcp_now - mpts->mpts_probesoon)),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
if (IFNET_IS_CELLULAR(sotoinpcb(so)->inp_last_outifp)) {
mpte->mpte_mppcb->mpp_flags |= MPP_SET_CELLICON;
mpte->mpte_used_cell = 1;
} else {
mpte->mpte_mppcb->mpp_flags |= MPP_UNSET_CELLICON;
mpte->mpte_used_wifi = 1;
}
error = 0;
} else {
mptcplog((LOG_ERR, "%s: %u error %d len %d subflags %#x sostate %#x soerror %u hiwat %u lowat %u\n",
__func__, mpts->mpts_connid, error, tot_sent, so->so_flags, so->so_state, so->so_error, so->so_snd.sb_hiwat, so->so_snd.sb_lowat),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR);
}
out:
if (wakeup)
mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_WWAKEUP;
mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_INSIDE_OUTPUT);
return (error);
zero_len_write:
error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, NULL, NULL,
NULL, current_proc());
goto done_sending;
}
static void
mptcp_add_reinjectq(struct mptses *mpte, struct mbuf *m)
{
struct mbuf *n, *prev = NULL;
mptcplog((LOG_DEBUG, "%s reinjecting dsn %u dlen %u rseq %u\n",
__func__, (uint32_t)m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rlen,
m->m_pkthdr.mp_rseq),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
n = mpte->mpte_reinjectq;
while (n) {
if (MPTCP_SEQ_GEQ(n->m_pkthdr.mp_dsn, m->m_pkthdr.mp_dsn))
break;
prev = n;
n = n->m_nextpkt;
}
if (n) {
if (n->m_pkthdr.mp_dsn == m->m_pkthdr.mp_dsn &&
n->m_pkthdr.mp_rlen >= m->m_pkthdr.mp_rlen) {
mptcplog((LOG_DEBUG, "%s fully covered with len %u\n",
__func__, n->m_pkthdr.mp_rlen),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
goto dont_queue;
}
if (m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen >= n->m_pkthdr.mp_dsn + n->m_pkthdr.mp_rlen) {
struct mbuf *tmp = n->m_nextpkt;
mptcplog((LOG_DEBUG, "%s m is covering that guy dsn %u len %u dsn %u len %u\n",
__func__, m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rlen,
n->m_pkthdr.mp_dsn, n->m_pkthdr.mp_rlen),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
m->m_nextpkt = NULL;
if (prev == NULL)
mpte->mpte_reinjectq = tmp;
else
prev->m_nextpkt = tmp;
m_freem(n);
n = tmp;
}
}
if (prev) {
if (prev->m_pkthdr.mp_dsn + prev->m_pkthdr.mp_rlen >= m->m_pkthdr.mp_dsn + m->m_pkthdr.len) {
mptcplog((LOG_DEBUG, "%s prev covers us from %u with len %u\n",
__func__, prev->m_pkthdr.mp_dsn, prev->m_pkthdr.mp_rlen),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
goto dont_queue;
}
}
if (prev == NULL)
mpte->mpte_reinjectq = m;
else
prev->m_nextpkt = m;
m->m_nextpkt = n;
return;
dont_queue:
m_freem(m);
return;
}
static struct mbuf *
mptcp_lookup_dsn(struct mptses *mpte, uint64_t dsn)
{
struct socket *mp_so = mptetoso(mpte);
struct mbuf *m;
m = mp_so->so_snd.sb_mb;
while (m) {
if (MPTCP_SEQ_LEQ(m->m_pkthdr.mp_dsn, dsn) &&
MPTCP_SEQ_GT(m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen, dsn))
break;
if (MPTCP_SEQ_GT(m->m_pkthdr.mp_dsn, dsn))
return NULL;
m = m->m_next;
}
return m;
}
static struct mbuf *
mptcp_copy_mbuf_list(struct mbuf *m, int len)
{
struct mbuf *top = NULL, *tail = NULL;
uint64_t dsn;
uint32_t dlen, rseq;
dsn = m->m_pkthdr.mp_dsn;
dlen = m->m_pkthdr.mp_rlen;
rseq = m->m_pkthdr.mp_rseq;
while (len > 0) {
struct mbuf *n;
VERIFY((m->m_flags & M_PKTHDR) && (m->m_pkthdr.pkt_flags & PKTF_MPTCP));
n = m_copym_mode(m, 0, m->m_len, M_DONTWAIT, M_COPYM_MUST_COPY_HDR);
if (n == NULL) {
mptcplog((LOG_ERR, "%s m_copym_mode returned NULL\n", __func__),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
goto err;
}
VERIFY(n->m_flags & M_PKTHDR);
VERIFY(n->m_next == NULL);
VERIFY(n->m_pkthdr.mp_dsn == dsn);
VERIFY(n->m_pkthdr.mp_rlen == dlen);
VERIFY(n->m_pkthdr.mp_rseq == rseq);
VERIFY(n->m_len == m->m_len);
n->m_pkthdr.pkt_flags |= (PKTF_MPSO | PKTF_MPTCP);
if (top == NULL)
top = n;
if (tail != NULL)
tail->m_next = n;
tail = n;
len -= m->m_len;
m = m->m_next;
}
return top;
err:
if (top)
m_freem(top);
return NULL;
}
static void
mptcp_reinject_mbufs(struct socket *so)
{
struct tcpcb *tp = sototcpcb(so);
struct mptsub *mpts = tp->t_mpsub;
struct mptcb *mp_tp = tptomptp(tp);
struct mptses *mpte = mp_tp->mpt_mpte;;
struct sockbuf *sb = &so->so_snd;
struct mbuf *m;
m = sb->sb_mb;
while (m) {
struct mbuf *n = m->m_next, *orig = m;
mptcplog((LOG_DEBUG, "%s working on suna %u relseq %u iss %u len %u pktflags %#x\n",
__func__, tp->snd_una, m->m_pkthdr.mp_rseq, mpts->mpts_iss,
m->m_pkthdr.mp_rlen, m->m_pkthdr.pkt_flags),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
VERIFY((m->m_flags & M_PKTHDR) && (m->m_pkthdr.pkt_flags & PKTF_MPTCP));
if (m->m_pkthdr.pkt_flags & PKTF_MPTCP_REINJ)
goto next;
if (MPTCP_SEQ_GEQ(mp_tp->mpt_snduna, m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen))
goto next;
if (SEQ_GT(tp->snd_una - mpts->mpts_iss, m->m_pkthdr.mp_rseq)) {
m = mptcp_lookup_dsn(mpte, m->m_pkthdr.mp_dsn);
if (m == NULL)
goto next;
}
m = mptcp_copy_mbuf_list(m, m->m_pkthdr.mp_rlen);
if (m == NULL)
break;
VERIFY(m->m_nextpkt == NULL);
mptcp_add_reinjectq(mpte, m);
orig->m_pkthdr.pkt_flags |= PKTF_MPTCP_REINJ;
next:
while (n) {
VERIFY((n->m_flags & M_PKTHDR) && (n->m_pkthdr.pkt_flags & PKTF_MPTCP));
if (n->m_pkthdr.mp_dsn != orig->m_pkthdr.mp_dsn)
break;
n->m_pkthdr.pkt_flags |= PKTF_MPTCP_REINJ;
n = n->m_next;
}
m = n;
}
}
void
mptcp_clean_reinjectq(struct mptses *mpte)
{
struct mptcb *mp_tp = mpte->mpte_mptcb;
mpte_lock_assert_held(mpte);
while (mpte->mpte_reinjectq) {
struct mbuf *m = mpte->mpte_reinjectq;
if (MPTCP_SEQ_GEQ(m->m_pkthdr.mp_dsn, mp_tp->mpt_snduna) ||
MPTCP_SEQ_GEQ(m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen, mp_tp->mpt_snduna))
break;
mpte->mpte_reinjectq = m->m_nextpkt;
m->m_nextpkt = NULL;
m_freem(m);
}
}
static void
mptcp_subflow_eupcall1(struct socket *so, void *arg, uint32_t events)
{
#pragma unused(so)
struct mptsub *mpts = arg;
struct mptses *mpte = mpts->mpts_mpte;
VERIFY(mpte != NULL);
mpte_lock_assert_held(mpte);
if ((mpts->mpts_evctl & events) == events)
return;
mpts->mpts_evctl |= events;
if (mptcp_should_defer_upcall(mpte->mpte_mppcb)) {
mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_WORKLOOP;
return;
}
mptcp_subflow_workloop(mpte);
}
static ev_ret_t
mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts,
uint64_t *p_mpsofilt_hint)
{
ev_ret_t ret = MPTS_EVRET_OK;
int i, mpsub_ev_entry_count = sizeof(mpsub_ev_entry_tbl) /
sizeof(mpsub_ev_entry_tbl[0]);
mpte_lock_assert_held(mpte);
if (!mpts->mpts_evctl)
return (ret);
if (mpts->mpts_evctl & (SO_FILT_HINT_CONNRESET|SO_FILT_HINT_MUSTRST|
SO_FILT_HINT_CANTSENDMORE|SO_FILT_HINT_TIMEOUT|
SO_FILT_HINT_NOSRCADDR|SO_FILT_HINT_IFDENIED|
SO_FILT_HINT_DISCONNECTED)) {
mpts->mpts_evctl |= SO_FILT_HINT_MPFAILOVER;
}
DTRACE_MPTCP3(subflow__events, struct mptses *, mpte,
struct mptsub *, mpts, uint32_t, mpts->mpts_evctl);
mptcplog((LOG_DEBUG, "%s cid %d events=%b\n", __func__,
mpts->mpts_connid, mpts->mpts_evctl, SO_FILT_HINT_BITS),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_VERBOSE);
for (i = 0; i < mpsub_ev_entry_count && mpts->mpts_evctl; i++) {
if ((mpts->mpts_evctl & mpsub_ev_entry_tbl[i].sofilt_hint_mask) &&
(ret >= MPTS_EVRET_OK ||
mpsub_ev_entry_tbl[i].sofilt_hint_mask == SO_FILT_HINT_DISCONNECTED)) {
mpts->mpts_evctl &= ~mpsub_ev_entry_tbl[i].sofilt_hint_mask;
ev_ret_t error =
mpsub_ev_entry_tbl[i].sofilt_hint_ev_hdlr(mpte, mpts, p_mpsofilt_hint, mpsub_ev_entry_tbl[i].sofilt_hint_mask);
ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
}
}
if (mpts->mpts_evctl || ret < MPTS_EVRET_OK)
mptcplog((LOG_WARNING, "%s%s: cid %d evret %s (%d) unhandled events=%b\n", __func__,
(mpts->mpts_evctl && ret == MPTS_EVRET_OK) ? "MPTCP_ERROR " : "",
mpts->mpts_connid,
mptcp_evret2str(ret), ret, mpts->mpts_evctl, SO_FILT_HINT_BITS),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
else
mptcplog((LOG_DEBUG, "%s: Done, events %b\n", __func__,
mpts->mpts_evctl, SO_FILT_HINT_BITS),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_VERBOSE);
return (ret);
}
static ev_ret_t
mptcp_subflow_propagate_ev(struct mptses *mpte, struct mptsub *mpts,
uint64_t *p_mpsofilt_hint, uint64_t event)
{
struct socket *mp_so, *so;
struct mptcb *mp_tp;
mpte_lock_assert_held(mpte);
VERIFY(mpte->mpte_mppcb != NULL);
mp_so = mptetoso(mpte);
mp_tp = mpte->mpte_mptcb;
so = mpts->mpts_socket;
mptcplog((LOG_DEBUG, "%s: cid %d event %d\n", __func__,
mpts->mpts_connid, event),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
if (mp_tp->mpt_state < MPTCPS_ESTABLISHED ||
((mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) && (mpts->mpts_flags & MPTSF_ACTIVE))) {
mp_so->so_error = so->so_error;
*p_mpsofilt_hint |= event;
}
return (MPTS_EVRET_OK);
}
static ev_ret_t
mptcp_subflow_nosrcaddr_ev(struct mptses *mpte, struct mptsub *mpts,
uint64_t *p_mpsofilt_hint, uint64_t event)
{
#pragma unused(p_mpsofilt_hint, event)
struct socket *mp_so;
struct tcpcb *tp;
mpte_lock_assert_held(mpte);
VERIFY(mpte->mpte_mppcb != NULL);
mp_so = mptetoso(mpte);
tp = intotcpcb(sotoinpcb(mpts->mpts_socket));
mpte->mpte_flags |= MPTE_SND_REM_ADDR;
mpte->mpte_lost_aid = tp->t_local_aid;
mptcplog((LOG_DEBUG, "%s cid %d\n", __func__, mpts->mpts_connid),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
mptcp_subflow_abort(mpts, EADDRNOTAVAIL);
if (mp_so->so_flags & SOF_NOADDRAVAIL)
mptcp_subflow_propagate_ev(mpte, mpts, p_mpsofilt_hint, event);
return (MPTS_EVRET_DELETE);
}
static ev_ret_t
mptcp_subflow_mpcantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts,
uint64_t *p_mpsofilt_hint, uint64_t event)
{
#pragma unused(event)
struct mptcb *mp_tp;
mpte_lock_assert_held(mpte);
mp_tp = mpte->mpte_mptcb;
mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
if (mp_tp->mpt_state == MPTCPS_CLOSE_WAIT)
*p_mpsofilt_hint |= SO_FILT_HINT_CANTRCVMORE;
return (MPTS_EVRET_OK);
}
static ev_ret_t
mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts,
uint64_t *p_mpsofilt_hint, uint64_t event)
{
#pragma unused(event, p_mpsofilt_hint)
struct mptsub *mpts_alt = NULL;
struct socket *alt_so = NULL;
struct socket *mp_so;
int altpath_exists = 0;
mpte_lock_assert_held(mpte);
mp_so = mptetoso(mpte);
mptcplog((LOG_NOTICE, "%s: mp_so 0x%llx\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
mptcp_reinject_mbufs(mpts->mpts_socket);
mpts_alt = mptcp_get_subflow(mpte, mpts, NULL);
if (mpts_alt == NULL) {
mptcplog((LOG_WARNING, "%s: no alternate path\n", __func__),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
goto done;
}
altpath_exists = 1;
alt_so = mpts_alt->mpts_socket;
if (mpts_alt->mpts_flags & MPTSF_FAILINGOVER) {
if (alt_so->so_snd.sb_cc == 0 && mptcp_no_rto_spike(alt_so)) {
mpts_alt->mpts_flags &= ~MPTSF_FAILINGOVER;
} else {
altpath_exists = 0;
}
}
if (altpath_exists) {
mpts_alt->mpts_flags |= MPTSF_ACTIVE;
mpte->mpte_active_sub = mpts_alt;
mpts->mpts_flags |= MPTSF_FAILINGOVER;
mpts->mpts_flags &= ~MPTSF_ACTIVE;
mptcplog((LOG_NOTICE, "%s: switched from %d to %d\n",
__func__, mpts->mpts_connid, mpts_alt->mpts_connid),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
mptcpstats_inc_switch(mpte, mpts);
sowwakeup(alt_so);
} else {
mptcplog((LOG_DEBUG, "%s: no alt cid = %d\n", __func__,
mpts->mpts_connid),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
done:
mpts->mpts_socket->so_flags &= ~SOF_MP_TRYFAILOVER;
}
return (MPTS_EVRET_OK);
}
static ev_ret_t
mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts,
uint64_t *p_mpsofilt_hint, uint64_t event)
{
mpte_lock_assert_held(mpte);
VERIFY(mpte->mpte_mppcb != NULL);
mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__,
mpts->mpts_connid), MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
mptcp_subflow_abort(mpts, EPERM);
mptcp_subflow_propagate_ev(mpte, mpts, p_mpsofilt_hint, event);
return (MPTS_EVRET_DELETE);
}
static ev_ret_t
mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts,
uint64_t *p_mpsofilt_hint, uint64_t event)
{
#pragma unused(event, p_mpsofilt_hint)
struct socket *mp_so, *so;
struct inpcb *inp;
struct tcpcb *tp;
struct mptcb *mp_tp;
int af;
boolean_t mpok = FALSE;
mpte_lock_assert_held(mpte);
VERIFY(mpte->mpte_mppcb != NULL);
mp_so = mptetoso(mpte);
mp_tp = mpte->mpte_mptcb;
so = mpts->mpts_socket;
tp = sototcpcb(so);
af = mpts->mpts_dst.sa_family;
if (mpts->mpts_flags & MPTSF_CONNECTED)
return (MPTS_EVRET_OK);
if ((mpts->mpts_flags & MPTSF_DISCONNECTED) ||
(mpts->mpts_flags & MPTSF_DISCONNECTING)) {
if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) &&
(so->so_state & SS_ISCONNECTED)) {
mptcplog((LOG_DEBUG, "%s: cid %d disconnect before tcp connect\n",
__func__, mpts->mpts_connid),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
(void) soshutdownlock(so, SHUT_RD);
(void) soshutdownlock(so, SHUT_WR);
(void) sodisconnectlocked(so);
}
return (MPTS_EVRET_OK);
}
if (so->so_state & SS_ISDISCONNECTED) {
return (MPTS_EVRET_OK);
}
if (mpts->mpts_flags & MPTSF_TFO_REQD)
mptcp_drop_tfo_data(mpte, mpts);
mpts->mpts_flags &= ~(MPTSF_CONNECTING | MPTSF_TFO_REQD);
mpts->mpts_flags |= MPTSF_CONNECTED;
if (tp->t_mpflags & TMPF_MPTCP_TRUE)
mpts->mpts_flags |= MPTSF_MP_CAPABLE;
tp->t_mpflags &= ~TMPF_TFO_REQUEST;
inp = sotoinpcb(so);
mpts->mpts_maxseg = tp->t_maxseg;
mptcplog((LOG_DEBUG, "%s: cid %d outif %s is %s\n", __func__, mpts->mpts_connid,
((inp->inp_last_outifp != NULL) ? inp->inp_last_outifp->if_xname : "NULL"),
((mpts->mpts_flags & MPTSF_MP_CAPABLE) ? "MPTCP capable" : "a regular TCP")),
(MPTCP_SOCKET_DBG | MPTCP_EVENTS_DBG), MPTCP_LOGLVL_LOG);
mpok = (mpts->mpts_flags & MPTSF_MP_CAPABLE);
if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
mp_tp->mpt_state = MPTCPS_ESTABLISHED;
mpte->mpte_associd = mpts->mpts_connid;
DTRACE_MPTCP2(state__change,
struct mptcb *, mp_tp,
uint32_t, 0 );
if (SOCK_DOM(so) == AF_INET) {
in_getsockaddr_s(so, &mpte->__mpte_src_v4);
} else {
in6_getsockaddr_s(so, &mpte->__mpte_src_v6);
}
if (!mpok) {
tcpstat.tcps_mpcap_fallback++;
tp->t_mpflags |= TMPF_INFIN_SENT;
mptcp_notify_mpfail(so);
} else {
if (IFNET_IS_CELLULAR(inp->inp_last_outifp) &&
mpte->mpte_svctype != MPTCP_SVCTYPE_AGGREGATE) {
tp->t_mpflags |= (TMPF_BACKUP_PATH | TMPF_SND_MPPRIO);
} else {
mpts->mpts_flags |= MPTSF_PREFERRED;
}
mpts->mpts_flags |= MPTSF_ACTIVE;
mpts->mpts_flags |= MPTSF_MPCAP_CTRSET;
mpte->mpte_nummpcapflows++;
mptcp_check_subflows_and_add(mpte);
if (IFNET_IS_CELLULAR(inp->inp_last_outifp))
mpte->mpte_initial_cell = 1;
mpte->mpte_handshake_success = 1;
}
mp_tp->mpt_sndwnd = tp->snd_wnd;
mp_tp->mpt_sndwl1 = mp_tp->mpt_rcvnxt;
mp_tp->mpt_sndwl2 = mp_tp->mpt_snduna;
soisconnected(mp_so);
mptcplog((LOG_DEBUG, "%s: MPTCPS_ESTABLISHED for mp_so 0x%llx mpok %u\n",
__func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpok),
MPTCP_STATE_DBG, MPTCP_LOGLVL_LOG);
} else if (mpok) {
if (IFNET_IS_CELLULAR(inp->inp_last_outifp) &&
!(tp->t_mpflags & TMPF_BACKUP_PATH) &&
mpte->mpte_svctype != MPTCP_SVCTYPE_AGGREGATE) {
tp->t_mpflags |= (TMPF_BACKUP_PATH | TMPF_SND_MPPRIO);
mpts->mpts_flags &= ~MPTSF_PREFERRED;
} else {
mpts->mpts_flags |= MPTSF_PREFERRED;
}
mpts->mpts_flags |= MPTSF_MPCAP_CTRSET;
mpte->mpte_nummpcapflows++;
mpts->mpts_rel_seq = 1;
mptcp_check_subflows_and_remove(mpte);
} else {
unsigned int i;
for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
struct mpt_itf_info *info = &mpte->mpte_itfinfo[i];
if (inp->inp_last_outifp->if_index == info->ifindex) {
info->no_mptcp_support = 1;
break;
}
}
tcpstat.tcps_join_fallback++;
if (IFNET_IS_CELLULAR(inp->inp_last_outifp))
tcpstat.tcps_mptcp_cell_proxy++;
else
tcpstat.tcps_mptcp_wifi_proxy++;
soevent(mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
return (MPTS_EVRET_OK);
}
mptcp_get_statsindex(mpte->mpte_itfstats, mpts);
mptcp_output(mpte);
return (MPTS_EVRET_OK);
}
static ev_ret_t
mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts,
uint64_t *p_mpsofilt_hint, uint64_t event)
{
#pragma unused(event, p_mpsofilt_hint)
struct socket *mp_so, *so;
struct mptcb *mp_tp;
mpte_lock_assert_held(mpte);
VERIFY(mpte->mpte_mppcb != NULL);
mp_so = mptetoso(mpte);
mp_tp = mpte->mpte_mptcb;
so = mpts->mpts_socket;
mptcplog((LOG_DEBUG, "%s: cid %d, so_err %d, mpt_state %u fallback %u active %u flags %#x\n",
__func__, mpts->mpts_connid, so->so_error, mp_tp->mpt_state,
!!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP),
!!(mpts->mpts_flags & MPTSF_ACTIVE), sototcpcb(so)->t_mpflags),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
if (mpts->mpts_flags & MPTSF_DISCONNECTED)
return (MPTS_EVRET_DELETE);
mpts->mpts_flags |= MPTSF_DISCONNECTED;
if (mpts->mpts_flags & MPTSF_MPCAP_CTRSET) {
mpte->mpte_nummpcapflows--;
if (mpte->mpte_active_sub == mpts) {
mpte->mpte_active_sub = NULL;
mptcplog((LOG_DEBUG, "%s: resetting active subflow \n",
__func__), MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
}
mpts->mpts_flags &= ~MPTSF_MPCAP_CTRSET;
}
if (mp_tp->mpt_state < MPTCPS_ESTABLISHED ||
((mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) && (mpts->mpts_flags & MPTSF_ACTIVE)) ||
(sototcpcb(so)->t_mpflags & TMPF_FASTCLOSERCV)) {
mptcp_drop(mpte, mp_tp, so->so_error);
}
mpts->mpts_flags &= ~(MPTSF_CONNECTING|MPTSF_CONNECT_PENDING|
MPTSF_CONNECTED|MPTSF_DISCONNECTING|MPTSF_PREFERRED|
MPTSF_MP_CAPABLE|MPTSF_MP_READY|MPTSF_MP_DEGRADED|MPTSF_ACTIVE);
return (MPTS_EVRET_DELETE);
}
static ev_ret_t
mptcp_subflow_mpstatus_ev(struct mptses *mpte, struct mptsub *mpts,
uint64_t *p_mpsofilt_hint, uint64_t event)
{
#pragma unused(event, p_mpsofilt_hint)
struct socket *mp_so, *so;
struct mptcb *mp_tp;
ev_ret_t ret = MPTS_EVRET_OK;
mpte_lock_assert_held(mpte);
VERIFY(mpte->mpte_mppcb != NULL);
mp_so = mptetoso(mpte);
mp_tp = mpte->mpte_mptcb;
so = mpts->mpts_socket;
if (sototcpcb(so)->t_mpflags & TMPF_MPTCP_TRUE)
mpts->mpts_flags |= MPTSF_MP_CAPABLE;
else
mpts->mpts_flags &= ~MPTSF_MP_CAPABLE;
if (sototcpcb(so)->t_mpflags & TMPF_TCP_FALLBACK) {
if (mpts->mpts_flags & MPTSF_MP_DEGRADED)
goto done;
mpts->mpts_flags |= MPTSF_MP_DEGRADED;
}
else
mpts->mpts_flags &= ~MPTSF_MP_DEGRADED;
if (sototcpcb(so)->t_mpflags & TMPF_MPTCP_READY)
mpts->mpts_flags |= MPTSF_MP_READY;
else
mpts->mpts_flags &= ~MPTSF_MP_READY;
if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
mp_tp->mpt_flags |= MPTCPF_FALLBACK_TO_TCP;
mp_tp->mpt_flags &= ~MPTCPF_JOIN_READY;
}
if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
VERIFY(!(mp_tp->mpt_flags & MPTCPF_JOIN_READY));
ret = MPTS_EVRET_DISCONNECT_FALLBACK;
} else if (mpts->mpts_flags & MPTSF_MP_READY) {
mp_tp->mpt_flags |= MPTCPF_JOIN_READY;
ret = MPTS_EVRET_CONNECT_PENDING;
}
mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx mpt_flags=%b cid %d mptsf=%b\n",
__func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mp_tp->mpt_flags, MPTCPF_BITS, mpts->mpts_connid,
mpts->mpts_flags, MPTSF_BITS),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
done:
return (ret);
}
static ev_ret_t
mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts,
uint64_t *p_mpsofilt_hint, uint64_t event)
{
#pragma unused(event)
struct socket *mp_so, *so;
struct mptcb *mp_tp;
boolean_t is_fastclose;
mpte_lock_assert_held(mpte);
VERIFY(mpte->mpte_mppcb != NULL);
mp_so = mptetoso(mpte);
mp_tp = mpte->mpte_mptcb;
so = mpts->mpts_socket;
struct tcptemp *t_template;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp = NULL;
tp = intotcpcb(inp);
so->so_error = ECONNABORTED;
is_fastclose = !!(tp->t_mpflags & TMPF_FASTCLOSERCV);
t_template = tcp_maketemplate(tp);
if (t_template) {
struct tcp_respond_args tra;
bzero(&tra, sizeof(tra));
if (inp->inp_flags & INP_BOUND_IF)
tra.ifscope = inp->inp_boundifp->if_index;
else
tra.ifscope = IFSCOPE_NONE;
tra.awdl_unrestricted = 1;
tcp_respond(tp, t_template->tt_ipgen,
&t_template->tt_t, (struct mbuf *)NULL,
tp->rcv_nxt, tp->snd_una, TH_RST, &tra);
(void) m_free(dtom(t_template));
mptcplog((LOG_DEBUG, "MPTCP Events: "
"%s: mp_so 0x%llx cid %d \n",
__func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
so, mpts->mpts_connid),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
}
mptcp_subflow_abort(mpts, ECONNABORTED);
if (!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) && is_fastclose) {
*p_mpsofilt_hint |= SO_FILT_HINT_CONNRESET;
if (mp_tp->mpt_state < MPTCPS_ESTABLISHED)
mp_so->so_error = ECONNABORTED;
else
mp_so->so_error = ECONNRESET;
}
if (mp_tp->mpt_gc_ticks == MPT_GC_TICKS)
mp_tp->mpt_gc_ticks = MPT_GC_TICKS_FAST;
return (MPTS_EVRET_DELETE);
}
static ev_ret_t
mptcp_subflow_adaptive_rtimo_ev(struct mptses *mpte, struct mptsub *mpts,
uint64_t *p_mpsofilt_hint, uint64_t event)
{
#pragma unused(event)
bool found_active = false;
mpts->mpts_flags |= MPTSF_READ_STALL;
TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
if (!TCPS_HAVEESTABLISHED(tp->t_state) ||
TCPS_HAVERCVDFIN2(tp->t_state))
continue;
if (!(mpts->mpts_flags & MPTSF_READ_STALL)) {
found_active = true;
break;
}
}
if (!found_active)
*p_mpsofilt_hint |= SO_FILT_HINT_ADAPTIVE_RTIMO;
return (MPTS_EVRET_OK);
}
static ev_ret_t
mptcp_subflow_adaptive_wtimo_ev(struct mptses *mpte, struct mptsub *mpts,
uint64_t *p_mpsofilt_hint, uint64_t event)
{
#pragma unused(event)
bool found_active = false;
mpts->mpts_flags |= MPTSF_WRITE_STALL;
TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
if (!TCPS_HAVEESTABLISHED(tp->t_state) ||
tp->t_state > TCPS_CLOSE_WAIT)
continue;
if (!(mpts->mpts_flags & MPTSF_WRITE_STALL)) {
found_active = true;
break;
}
}
if (!found_active)
*p_mpsofilt_hint |= SO_FILT_HINT_ADAPTIVE_WTIMO;
return (MPTS_EVRET_OK);
}
static const char *
mptcp_evret2str(ev_ret_t ret)
{
const char *c = "UNKNOWN";
switch (ret) {
case MPTS_EVRET_DELETE:
c = "MPTS_EVRET_DELETE";
break;
case MPTS_EVRET_CONNECT_PENDING:
c = "MPTS_EVRET_CONNECT_PENDING";
break;
case MPTS_EVRET_DISCONNECT_FALLBACK:
c = "MPTS_EVRET_DISCONNECT_FALLBACK";
break;
case MPTS_EVRET_OK:
c = "MPTS_EVRET_OK";
break;
default:
break;
}
return (c);
}
int
mptcp_subflow_sosetopt(struct mptses *mpte, struct mptsub *mpts, struct mptopt *mpo)
{
struct socket *mp_so, *so;
struct sockopt sopt;
int error;
VERIFY(mpo->mpo_flags & MPOF_SUBFLOW_OK);
mpte_lock_assert_held(mpte);
mp_so = mptetoso(mpte);
so = mpts->mpts_socket;
if (mpte->mpte_mptcb->mpt_state >= MPTCPS_ESTABLISHED &&
mpo->mpo_level == SOL_SOCKET &&
mpo->mpo_name == SO_MARK_CELLFALLBACK) {
mptcplog((LOG_DEBUG, "%s Setting CELL_FALLBACK, mpte_flags %#x, svctype %u wifi unusable %u lastcell? %d boundcell? %d\n",
__func__, mpte->mpte_flags, mpte->mpte_svctype, mptcp_is_wifi_unusable(),
sotoinpcb(so)->inp_last_outifp ? IFNET_IS_CELLULAR(sotoinpcb(so)->inp_last_outifp) : -1,
mpts->mpts_ifscope != IFSCOPE_NONE ? IFNET_IS_CELLULAR(ifindex2ifnet[mpts->mpts_ifscope]) : -1),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
if (mpte->mpte_flags & MPTE_FIRSTPARTY)
return (0);
if (sotoinpcb(so)->inp_last_outifp &&
!IFNET_IS_CELLULAR(sotoinpcb(so)->inp_last_outifp))
return (0);
if (mpts->mpts_ifscope == IFSCOPE_NONE ||
!IFNET_IS_CELLULAR(ifindex2ifnet[mpts->mpts_ifscope]))
return (0);
}
mpo->mpo_flags &= ~MPOF_INTERIM;
bzero(&sopt, sizeof (sopt));
sopt.sopt_dir = SOPT_SET;
sopt.sopt_level = mpo->mpo_level;
sopt.sopt_name = mpo->mpo_name;
sopt.sopt_val = CAST_USER_ADDR_T(&mpo->mpo_intval);
sopt.sopt_valsize = sizeof (int);
sopt.sopt_p = kernproc;
error = sosetoptlock(so, &sopt, 0);
if (error == 0) {
mptcplog((LOG_INFO, "%s: mp_so 0x%llx sopt %s "
"val %d set successful\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name),
mpo->mpo_intval),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
} else {
mptcplog((LOG_ERR, "%s:mp_so 0x%llx sopt %s "
"val %d set error %d\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name),
mpo->mpo_intval, error),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
}
return (error);
}
int
mptcp_subflow_sogetopt(struct mptses *mpte, struct socket *so,
struct mptopt *mpo)
{
struct socket *mp_so;
struct sockopt sopt;
int error;
VERIFY(mpo->mpo_flags & MPOF_SUBFLOW_OK);
mpte_lock_assert_held(mpte);
mp_so = mptetoso(mpte);
bzero(&sopt, sizeof (sopt));
sopt.sopt_dir = SOPT_GET;
sopt.sopt_level = mpo->mpo_level;
sopt.sopt_name = mpo->mpo_name;
sopt.sopt_val = CAST_USER_ADDR_T(&mpo->mpo_intval);
sopt.sopt_valsize = sizeof (int);
sopt.sopt_p = kernproc;
error = sogetoptlock(so, &sopt, 0);
if (error == 0) {
mptcplog((LOG_DEBUG, "MPTCP Socket: "
"%s: mp_so 0x%llx sopt %s "
"val %d get successful\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name),
mpo->mpo_intval),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
} else {
mptcplog((LOG_ERR, "MPTCP Socket: "
"%s: mp_so 0x%llx sopt %s get error %d\n",
__func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name), error),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
}
return (error);
}
static uint32_t
mptcp_gc(struct mppcbinfo *mppi)
{
struct mppcb *mpp, *tmpp;
uint32_t active = 0;
LCK_MTX_ASSERT(&mppi->mppi_lock, LCK_MTX_ASSERT_OWNED);
TAILQ_FOREACH_SAFE(mpp, &mppi->mppi_pcbs, mpp_entry, tmpp) {
struct socket *mp_so;
struct mptses *mpte;
struct mptcb *mp_tp;
VERIFY(mpp->mpp_flags & MPP_ATTACHED);
mp_so = mpp->mpp_socket;
VERIFY(mp_so != NULL);
mpte = mptompte(mpp);
VERIFY(mpte != NULL);
mp_tp = mpte->mpte_mptcb;
VERIFY(mp_tp != NULL);
mptcplog((LOG_DEBUG, "MPTCP Socket: "
"%s: mp_so 0x%llx found "
"(u=%d,r=%d,s=%d)\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mp_so->so_usecount,
mp_so->so_retaincnt, mpp->mpp_state),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
if (!mpte_try_lock(mpte)) {
mptcplog((LOG_DEBUG, "MPTCP Socket: "
"%s: mp_so 0x%llx skipped lock "
"(u=%d,r=%d)\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mp_so->so_usecount, mp_so->so_retaincnt),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
active++;
continue;
}
if (mp_so->so_usecount > 0) {
boolean_t wakeup = FALSE;
struct mptsub *mpts, *tmpts;
mptcplog((LOG_DEBUG, "MPTCP Socket: "
"%s: mp_so 0x%llx skipped usecount "
"[u=%d,r=%d] %d %d\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mp_so->so_usecount, mp_so->so_retaincnt,
mp_tp->mpt_gc_ticks,
mp_tp->mpt_state),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1) {
if (mp_tp->mpt_gc_ticks > 0)
mp_tp->mpt_gc_ticks--;
if (mp_tp->mpt_gc_ticks == 0) {
wakeup = TRUE;
}
}
if (wakeup) {
TAILQ_FOREACH_SAFE(mpts,
&mpte->mpte_subflows, mpts_entry, tmpts) {
mptcp_subflow_eupcall1(mpts->mpts_socket,
mpts, SO_FILT_HINT_DISCONNECTED);
}
}
mpte_unlock(mpte);
active++;
continue;
}
if (mpp->mpp_state != MPPCB_STATE_DEAD) {
panic("MPTCP Socket: %s: mp_so 0x%llx skipped state "
"[u=%d,r=%d,s=%d]\n", __func__,
(u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mp_so->so_usecount, mp_so->so_retaincnt,
mpp->mpp_state);
}
if (mp_tp->mpt_state == MPTCPS_TIME_WAIT)
mptcp_close(mpte, mp_tp);
mptcp_session_destroy(mpte);
mptcplog((LOG_DEBUG, "MPTCP Socket: "
"%s: mp_so 0x%llx destroyed [u=%d,r=%d]\n",
__func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mp_so->so_usecount, mp_so->so_retaincnt),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
DTRACE_MPTCP4(dispose, struct socket *, mp_so,
struct sockbuf *, &mp_so->so_rcv,
struct sockbuf *, &mp_so->so_snd,
struct mppcb *, mpp);
mp_pcbdispose(mpp);
sodealloc(mp_so);
}
return (active);
}
struct mptses *
mptcp_drop(struct mptses *mpte, struct mptcb *mp_tp, int errno)
{
struct socket *mp_so;
mpte_lock_assert_held(mpte);
VERIFY(mpte->mpte_mptcb == mp_tp);
mp_so = mptetoso(mpte);
DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp,
uint32_t, 0 );
if (errno == ETIMEDOUT && mp_tp->mpt_softerror != 0)
errno = mp_tp->mpt_softerror;
mp_so->so_error = errno;
return (mptcp_close(mpte, mp_tp));
}
struct mptses *
mptcp_close(struct mptses *mpte, struct mptcb *mp_tp)
{
struct socket *mp_so = NULL;
struct mptsub *mpts = NULL, *tmpts = NULL;
mpte_lock_assert_held(mpte);
VERIFY(mpte->mpte_mptcb == mp_tp);
mp_so = mptetoso(mpte);
mp_tp->mpt_state = MPTCPS_TERMINATE;
mptcp_freeq(mp_tp);
soisdisconnected(mp_so);
TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
mptcp_subflow_disconnect(mpte, mpts);
}
return (NULL);
}
void
mptcp_notify_close(struct socket *so)
{
soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED));
}
void
mptcp_subflow_workloop(struct mptses *mpte)
{
struct socket *mp_so;
struct mptsub *mpts, *tmpts;
boolean_t connect_pending = FALSE, disconnect_fallback = FALSE;
uint64_t mpsofilt_hint_mask = SO_FILT_HINT_LOCKED;
mpte_lock_assert_held(mpte);
VERIFY(mpte->mpte_mppcb != NULL);
mp_so = mptetoso(mpte);
VERIFY(mp_so != NULL);
TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
ev_ret_t ret;
if (mpts->mpts_socket->so_usecount == 0) {
continue;
}
mptcp_subflow_addref(mpts);
mpts->mpts_socket->so_usecount++;
ret = mptcp_subflow_events(mpte, mpts, &mpsofilt_hint_mask);
if (mp_so->so_flags & SOF_PCBCLEARING)
mptcp_subflow_disconnect(mpte, mpts);
switch (ret) {
case MPTS_EVRET_OK:
break;
case MPTS_EVRET_DELETE:
mptcp_subflow_soclose(mpts);
break;
case MPTS_EVRET_CONNECT_PENDING:
connect_pending = TRUE;
break;
case MPTS_EVRET_DISCONNECT_FALLBACK:
disconnect_fallback = TRUE;
break;
default:
mptcplog((LOG_DEBUG,
"MPTCP Socket: %s: mptcp_subflow_events "
"returned invalid value: %d\n", __func__,
ret),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
break;
}
mptcp_subflow_remref(mpts);
VERIFY(mpts->mpts_socket->so_usecount != 0);
mpts->mpts_socket->so_usecount--;
}
if (mpsofilt_hint_mask != SO_FILT_HINT_LOCKED) {
VERIFY(mpsofilt_hint_mask & SO_FILT_HINT_LOCKED);
soevent(mp_so, mpsofilt_hint_mask);
}
if (!connect_pending && !disconnect_fallback)
return;
TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
if (disconnect_fallback) {
struct socket *so = NULL;
struct inpcb *inp = NULL;
struct tcpcb *tp = NULL;
if (mpts->mpts_flags & MPTSF_MP_DEGRADED)
continue;
mpts->mpts_flags |= MPTSF_MP_DEGRADED;
if (mpts->mpts_flags & (MPTSF_DISCONNECTING|
MPTSF_DISCONNECTED|MPTSF_CONNECT_PENDING))
continue;
so = mpts->mpts_socket;
inp = sotoinpcb(so);
tp = intotcpcb(inp);
tp->t_mpflags &=
~(TMPF_MPTCP_READY|TMPF_MPTCP_TRUE);
tp->t_mpflags |= TMPF_TCP_FALLBACK;
if (mpts->mpts_flags & MPTSF_ACTIVE) {
continue;
}
tp->t_mpflags |= TMPF_RESET;
soevent(so, SO_FILT_HINT_MUSTRST);
} else if (connect_pending) {
if (mpts->mpts_flags & MPTSF_CONNECT_PENDING) {
int error = mptcp_subflow_soconnectx(mpte, mpts);
if (error)
mptcp_subflow_abort(mpts, error);
}
}
}
}
int
mptcp_lock(struct socket *mp_so, int refcount, void *lr)
{
struct mppcb *mpp = mpsotomppcb(mp_so);
void *lr_saved;
if (lr == NULL)
lr_saved = __builtin_return_address(0);
else
lr_saved = lr;
if (mpp == NULL) {
panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__,
mp_so, lr_saved, solockhistory_nr(mp_so));
}
mpp_lock(mpp);
if (mp_so->so_usecount < 0) {
panic("%s: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", __func__,
mp_so, mp_so->so_pcb, lr_saved, mp_so->so_usecount,
solockhistory_nr(mp_so));
}
if (refcount != 0)
mp_so->so_usecount++;
mp_so->lock_lr[mp_so->next_lock_lr] = lr_saved;
mp_so->next_lock_lr = (mp_so->next_lock_lr + 1) % SO_LCKDBG_MAX;
return (0);
}
int
mptcp_unlock(struct socket *mp_so, int refcount, void *lr)
{
struct mppcb *mpp = mpsotomppcb(mp_so);
void *lr_saved;
if (lr == NULL)
lr_saved = __builtin_return_address(0);
else
lr_saved = lr;
if (mpp == NULL) {
panic("%s: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", __func__,
mp_so, mp_so->so_usecount, lr_saved,
solockhistory_nr(mp_so));
}
mpp_lock_assert_held(mpp);
if (refcount != 0)
mp_so->so_usecount--;
if (mp_so->so_usecount < 0) {
panic("%s: so=%p usecount=%x lrh= %s\n", __func__,
mp_so, mp_so->so_usecount, solockhistory_nr(mp_so));
}
mp_so->unlock_lr[mp_so->next_unlock_lr] = lr_saved;
mp_so->next_unlock_lr = (mp_so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
mpp_unlock(mpp);
return (0);
}
lck_mtx_t *
mptcp_getlock(struct socket *mp_so, int flags)
{
struct mppcb *mpp = mpsotomppcb(mp_so);
if (mpp == NULL) {
panic("%s: so=%p NULL so_pcb %s\n", __func__, mp_so,
solockhistory_nr(mp_so));
}
if (mp_so->so_usecount < 0) {
panic("%s: so=%p usecount=%x lrh= %s\n", __func__,
mp_so, mp_so->so_usecount, solockhistory_nr(mp_so));
}
return (mpp_getlock(mpp, flags));
}
static void
mptcp_attach_to_subf(struct socket *so, struct mptcb *mp_tp,
uint8_t addr_id)
{
struct tcpcb *tp = sototcpcb(so);
struct mptcp_subf_auth_entry *sauth_entry;
mpte_lock_assert_held(mp_tp->mpt_mpte);
if (mp_tp->mpt_state == MPTCPS_CLOSED) {
tp->t_local_aid = 0;
} else {
tp->t_local_aid = addr_id;
tp->t_mpflags |= (TMPF_PREESTABLISHED | TMPF_JOINED_FLOW);
so->so_flags |= SOF_MP_SEC_SUBFLOW;
}
sauth_entry = zalloc(mpt_subauth_zone);
sauth_entry->msae_laddr_id = tp->t_local_aid;
sauth_entry->msae_raddr_id = 0;
sauth_entry->msae_raddr_rand = 0;
try_again:
sauth_entry->msae_laddr_rand = RandomULong();
if (sauth_entry->msae_laddr_rand == 0)
goto try_again;
LIST_INSERT_HEAD(&mp_tp->mpt_subauth_list, sauth_entry, msae_next);
}
static void
mptcp_detach_mptcb_from_subf(struct mptcb *mp_tp, struct socket *so)
{
struct mptcp_subf_auth_entry *sauth_entry;
struct tcpcb *tp = NULL;
int found = 0;
tp = sototcpcb(so);
if (tp == NULL)
return;
LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) {
if (sauth_entry->msae_laddr_id == tp->t_local_aid) {
found = 1;
break;
}
}
if (found) {
LIST_REMOVE(sauth_entry, msae_next);
}
if (found)
zfree(mpt_subauth_zone, sauth_entry);
}
void
mptcp_get_rands(mptcp_addr_id addr_id, struct mptcb *mp_tp, u_int32_t *lrand,
u_int32_t *rrand)
{
struct mptcp_subf_auth_entry *sauth_entry;
mpte_lock_assert_held(mp_tp->mpt_mpte);
LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) {
if (sauth_entry->msae_laddr_id == addr_id) {
if (lrand)
*lrand = sauth_entry->msae_laddr_rand;
if (rrand)
*rrand = sauth_entry->msae_raddr_rand;
break;
}
}
}
void
mptcp_set_raddr_rand(mptcp_addr_id laddr_id, struct mptcb *mp_tp,
mptcp_addr_id raddr_id, u_int32_t raddr_rand)
{
struct mptcp_subf_auth_entry *sauth_entry;
mpte_lock_assert_held(mp_tp->mpt_mpte);
LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) {
if (sauth_entry->msae_laddr_id == laddr_id) {
if ((sauth_entry->msae_raddr_id != 0) &&
(sauth_entry->msae_raddr_id != raddr_id)) {
mptcplog((LOG_ERR, "MPTCP Socket: %s mismatched"
" address ids %d %d \n", __func__, raddr_id,
sauth_entry->msae_raddr_id),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
return;
}
sauth_entry->msae_raddr_id = raddr_id;
if ((sauth_entry->msae_raddr_rand != 0) &&
(sauth_entry->msae_raddr_rand != raddr_rand)) {
mptcplog((LOG_ERR, "MPTCP Socket: "
"%s: dup SYN_ACK %d %d \n",
__func__, raddr_rand,
sauth_entry->msae_raddr_rand),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
return;
}
sauth_entry->msae_raddr_rand = raddr_rand;
return;
}
}
}
static void
mptcp_do_sha1(mptcp_key_t *key, char *sha_digest)
{
SHA1_CTX sha1ctxt;
const unsigned char *sha1_base;
int sha1_size;
sha1_base = (const unsigned char *) key;
sha1_size = sizeof (mptcp_key_t);
SHA1Init(&sha1ctxt);
SHA1Update(&sha1ctxt, sha1_base, sha1_size);
SHA1Final(sha_digest, &sha1ctxt);
}
void
mptcp_hmac_sha1(mptcp_key_t key1, mptcp_key_t key2,
u_int32_t rand1, u_int32_t rand2, u_char *digest)
{
SHA1_CTX sha1ctxt;
mptcp_key_t key_ipad[8] = {0};
mptcp_key_t key_opad[8] = {0};
u_int32_t data[2];
int i;
bzero(digest, SHA1_RESULTLEN);
key_ipad[0] = key1;
key_ipad[1] = key2;
key_opad[0] = key1;
key_opad[1] = key2;
data[0] = rand1;
data[1] = rand2;
for (i = 0; i < 8; i++) {
key_ipad[i] ^= 0x3636363636363636;
key_opad[i] ^= 0x5c5c5c5c5c5c5c5c;
}
SHA1Init(&sha1ctxt);
SHA1Update(&sha1ctxt, (unsigned char *)key_ipad, sizeof (key_ipad));
SHA1Update(&sha1ctxt, (unsigned char *)data, sizeof (data));
SHA1Final(digest, &sha1ctxt);
SHA1Init(&sha1ctxt);
SHA1Update(&sha1ctxt, (unsigned char *)key_opad, sizeof (key_opad));
SHA1Update(&sha1ctxt, (unsigned char *)digest, SHA1_RESULTLEN);
SHA1Final(digest, &sha1ctxt);
}
void
mptcp_get_hmac(mptcp_addr_id aid, struct mptcb *mp_tp, u_char *digest)
{
uint32_t lrand, rrand;
mpte_lock_assert_held(mp_tp->mpt_mpte);
lrand = rrand = 0;
mptcp_get_rands(aid, mp_tp, &lrand, &rrand);
mptcp_hmac_sha1(mp_tp->mpt_localkey, mp_tp->mpt_remotekey, lrand, rrand,
digest);
}
static void
mptcp_generate_token(char *sha_digest, int sha_digest_len, caddr_t token,
int token_len)
{
VERIFY(token_len == sizeof (u_int32_t));
VERIFY(sha_digest_len == SHA1_RESULTLEN);
bcopy(sha_digest, token, sizeof (u_int32_t));
return;
}
static void
mptcp_generate_idsn(char *sha_digest, int sha_digest_len, caddr_t idsn,
int idsn_len)
{
VERIFY(idsn_len == sizeof (u_int64_t));
VERIFY(sha_digest_len == SHA1_RESULTLEN);
idsn[7] = sha_digest[12];
idsn[6] = sha_digest[13];
idsn[5] = sha_digest[14];
idsn[4] = sha_digest[15];
idsn[3] = sha_digest[16];
idsn[2] = sha_digest[17];
idsn[1] = sha_digest[18];
idsn[0] = sha_digest[19];
return;
}
static void
mptcp_conn_properties(struct mptcb *mp_tp)
{
mp_tp->mpt_version = MPTCP_STD_VERSION_0;
if (mptcp_dss_csum)
mp_tp->mpt_flags |= MPTCPF_CHECKSUM;
mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp);
mp_tp->mpt_gc_ticks = MPT_GC_TICKS;
}
static void
mptcp_init_local_parms(struct mptses *mpte)
{
struct mptcb *mp_tp = mpte->mpte_mptcb;
char key_digest[SHA1_RESULTLEN];
read_frandom(&mp_tp->mpt_localkey, sizeof(mp_tp->mpt_localkey));
mptcp_do_sha1(&mp_tp->mpt_localkey, key_digest);
mptcp_generate_token(key_digest, SHA1_RESULTLEN,
(caddr_t)&mp_tp->mpt_localtoken, sizeof (mp_tp->mpt_localtoken));
mptcp_generate_idsn(key_digest, SHA1_RESULTLEN,
(caddr_t)&mp_tp->mpt_local_idsn, sizeof (u_int64_t));
mp_tp->mpt_snduna = mp_tp->mpt_sndmax = mp_tp->mpt_local_idsn + 1;
mp_tp->mpt_sndnxt = mp_tp->mpt_snduna;
mptcp_conn_properties(mp_tp);
}
int
mptcp_init_remote_parms(struct mptcb *mp_tp)
{
char remote_digest[SHA1_RESULTLEN];
mpte_lock_assert_held(mp_tp->mpt_mpte);
if (mp_tp->mpt_version != MPTCP_STD_VERSION_0)
return (-1);
mptcp_do_sha1(&mp_tp->mpt_remotekey, remote_digest);
mptcp_generate_token(remote_digest, SHA1_RESULTLEN,
(caddr_t)&mp_tp->mpt_remotetoken, sizeof (mp_tp->mpt_remotetoken));
mptcp_generate_idsn(remote_digest, SHA1_RESULTLEN,
(caddr_t)&mp_tp->mpt_remote_idsn, sizeof (u_int64_t));
mp_tp->mpt_rcvnxt = mp_tp->mpt_remote_idsn + 1;
return (0);
}
static void
mptcp_send_dfin(struct socket *so)
{
struct tcpcb *tp = NULL;
struct inpcb *inp = NULL;
inp = sotoinpcb(so);
if (!inp)
return;
tp = intotcpcb(inp);
if (!tp)
return;
if (!(tp->t_mpflags & TMPF_RESET))
tp->t_mpflags |= TMPF_SEND_DFIN;
}
void
mptcp_insert_dsn(struct mppcb *mpp, struct mbuf *m)
{
struct mptcb *mp_tp;
if (m == NULL)
return;
__IGNORE_WCASTALIGN(mp_tp = &((struct mpp_mtp *)mpp)->mtcb);
mpte_lock_assert_held(mp_tp->mpt_mpte);
while (m) {
VERIFY(m->m_flags & M_PKTHDR);
m->m_pkthdr.pkt_flags |= (PKTF_MPTCP | PKTF_MPSO);
m->m_pkthdr.mp_dsn = mp_tp->mpt_sndmax;
m->m_pkthdr.mp_rlen = m_pktlen(m);
mp_tp->mpt_sndmax += m_pktlen(m);
m = m->m_next;
}
}
void
mptcp_fallback_sbdrop(struct socket *so, struct mbuf *m, int len)
{
struct mptcb *mp_tp = tptomptp(sototcpcb(so));
uint64_t data_ack;
uint64_t dsn;
if (!m || len == 0)
return;
while (m && len > 0) {
VERIFY(m->m_flags & M_PKTHDR);
VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP);
data_ack = m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen;
dsn = m->m_pkthdr.mp_dsn;
len -= m->m_len;
m = m->m_next;
}
if (m && len == 0) {
data_ack = m->m_pkthdr.mp_dsn;
}
if (len < 0) {
data_ack = dsn;
}
mptcplog((LOG_DEBUG, "%s inferred ack up to %u\n", __func__, (uint32_t)data_ack),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
mptcp_data_ack_rcvd(mp_tp, sototcpcb(so), data_ack);
}
void
mptcp_preproc_sbdrop(struct socket *so, struct mbuf *m, unsigned int len)
{
int rewinding = 0;
if (so->so_flags1 & SOF1_TFO_REWIND) {
rewinding = 1;
so->so_flags1 &= ~SOF1_TFO_REWIND;
}
while (m && (!(so->so_flags & SOF_MP_SUBFLOW) || rewinding)) {
u_int32_t sub_len;
VERIFY(m->m_flags & M_PKTHDR);
VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP);
sub_len = m->m_pkthdr.mp_rlen;
if (sub_len < len) {
m->m_pkthdr.mp_dsn += sub_len;
if (!(m->m_pkthdr.pkt_flags & PKTF_MPSO)) {
m->m_pkthdr.mp_rseq += sub_len;
}
m->m_pkthdr.mp_rlen = 0;
len -= sub_len;
} else {
if (rewinding == 0)
m->m_pkthdr.mp_dsn += len;
if (!(m->m_pkthdr.pkt_flags & PKTF_MPSO)) {
if (rewinding == 0)
m->m_pkthdr.mp_rseq += len;
}
mptcplog((LOG_DEBUG, "%s: dsn %u ssn %u len %d %d\n",
__func__, (u_int32_t)m->m_pkthdr.mp_dsn,
m->m_pkthdr.mp_rseq, m->m_pkthdr.mp_rlen, len),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
m->m_pkthdr.mp_rlen -= len;
break;
}
m = m->m_next;
}
if (so->so_flags & SOF_MP_SUBFLOW &&
!(sototcpcb(so)->t_mpflags & TMPF_TFO_REQUEST) &&
!(sototcpcb(so)->t_mpflags & TMPF_RCVD_DACK)) {
sototcpcb(so)->t_mpflags |= TMPF_INFIN_SENT;
mptcp_notify_mpfail(so);
}
}
void
mptcp_output_getm_dsnmap32(struct socket *so, int off,
uint32_t *dsn, uint32_t *relseq, uint16_t *data_len, uint16_t *dss_csum)
{
u_int64_t dsn64;
mptcp_output_getm_dsnmap64(so, off, &dsn64, relseq, data_len, dss_csum);
*dsn = (u_int32_t)MPTCP_DATASEQ_LOW32(dsn64);
}
void
mptcp_output_getm_dsnmap64(struct socket *so, int off, uint64_t *dsn,
uint32_t *relseq, uint16_t *data_len,
uint16_t *dss_csum)
{
struct mbuf *m = so->so_snd.sb_mb;
int off_orig = off;
VERIFY(off >= 0);
while (m) {
VERIFY(m->m_flags & M_PKTHDR);
VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP);
if (off >= m->m_len) {
off -= m->m_len;
m = m->m_next;
} else {
break;
}
}
VERIFY(m);
VERIFY(off >= 0);
VERIFY(m->m_pkthdr.mp_rlen <= UINT16_MAX);
*dsn = m->m_pkthdr.mp_dsn;
*relseq = m->m_pkthdr.mp_rseq;
*data_len = m->m_pkthdr.mp_rlen;
*dss_csum = m->m_pkthdr.mp_csum;
mptcplog((LOG_DEBUG, "%s: dsn %u ssn %u data_len %d off %d off_orig %d\n",
__func__, (u_int32_t)(*dsn), *relseq, *data_len, off, off_orig),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
}
void
mptcp_insert_rmap(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th)
{
VERIFY(m->m_flags & M_PKTHDR);
VERIFY(!(m->m_pkthdr.pkt_flags & PKTF_MPTCP));
if (tp->t_mpflags & TMPF_EMBED_DSN) {
m->m_pkthdr.mp_dsn = tp->t_rcv_map.mpt_dsn;
m->m_pkthdr.mp_rseq = tp->t_rcv_map.mpt_sseq;
m->m_pkthdr.mp_rlen = tp->t_rcv_map.mpt_len;
m->m_pkthdr.mp_csum = tp->t_rcv_map.mpt_csum;
if (tp->t_rcv_map.mpt_dfin)
m->m_pkthdr.pkt_flags |= PKTF_MPTCP_DFIN;
m->m_pkthdr.pkt_flags |= PKTF_MPTCP;
tp->t_mpflags &= ~TMPF_EMBED_DSN;
tp->t_mpflags |= TMPF_MPTCP_ACKNOW;
} else if (tp->t_mpflags & TMPF_TCP_FALLBACK) {
if (th->th_flags & TH_FIN)
m->m_pkthdr.pkt_flags |= PKTF_MPTCP_DFIN;
}
}
int
mptcp_adj_rmap(struct socket *so, struct mbuf *m, int off, uint64_t dsn,
uint32_t rseq, uint16_t dlen)
{
struct mptsub *mpts = sototcpcb(so)->t_mpsub;
if (m_pktlen(m) == 0)
return (0);
if ((m->m_flags & M_PKTHDR) && (m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
if (off && (dsn != m->m_pkthdr.mp_dsn ||
rseq != m->m_pkthdr.mp_rseq ||
dlen != m->m_pkthdr.mp_rlen)) {
mptcplog((LOG_ERR, "%s: Received incorrect second mapping: %llu - %llu , %u - %u, %u - %u\n",
__func__, dsn, m->m_pkthdr.mp_dsn,
rseq, m->m_pkthdr.mp_rseq,
dlen, m->m_pkthdr.mp_rlen),
MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR);
return (-1);
}
m->m_pkthdr.mp_dsn += off;
m->m_pkthdr.mp_rseq += off;
m->m_pkthdr.mp_rlen = m->m_pkthdr.len;
} else {
if (!(mpts->mpts_flags & MPTSF_CONFIRMED)) {
mptcp_notify_mpfail(so);
}
}
mpts->mpts_flags |= MPTSF_CONFIRMED;
return (0);
}
void
mptcp_act_on_txfail(struct socket *so)
{
struct tcpcb *tp = NULL;
struct inpcb *inp = sotoinpcb(so);
if (inp == NULL)
return;
tp = intotcpcb(inp);
if (tp == NULL)
return;
if (so->so_flags & SOF_MP_TRYFAILOVER)
return;
so->so_flags |= SOF_MP_TRYFAILOVER;
soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPFAILOVER));
}
int
mptcp_get_map_for_dsn(struct socket *so, u_int64_t dsn_fail, u_int32_t *tcp_seq)
{
struct mbuf *m = so->so_snd.sb_mb;
u_int64_t dsn;
int off = 0;
u_int32_t datalen;
if (m == NULL)
return (-1);
while (m != NULL) {
VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP);
VERIFY(m->m_flags & M_PKTHDR);
dsn = m->m_pkthdr.mp_dsn;
datalen = m->m_pkthdr.mp_rlen;
if (MPTCP_SEQ_LEQ(dsn, dsn_fail) &&
(MPTCP_SEQ_GEQ(dsn + datalen, dsn_fail))) {
off = dsn_fail - dsn;
*tcp_seq = m->m_pkthdr.mp_rseq + off;
mptcplog((LOG_DEBUG, "%s: %llu %llu \n", __func__, dsn,
dsn_fail), MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
return (0);
}
m = m->m_next;
}
mptcplog((LOG_ERR, "MPTCP Sender: "
"%s: %llu not found \n", __func__, dsn_fail),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
return (-1);
}
int32_t
mptcp_adj_sendlen(struct socket *so, int32_t off)
{
struct tcpcb *tp = sototcpcb(so);
struct mptsub *mpts = tp->t_mpsub;
uint64_t mdss_dsn;
uint32_t mdss_subflow_seq;
int mdss_subflow_off;
uint16_t mdss_data_len;
uint16_t dss_csum;
mptcp_output_getm_dsnmap64(so, off, &mdss_dsn, &mdss_subflow_seq,
&mdss_data_len, &dss_csum);
mdss_subflow_off = (mdss_subflow_seq + mpts->mpts_iss) - tp->snd_una;
if (tp->t_mpflags & TMPF_TFO_REQUEST)
mdss_subflow_off--;
if (off < mdss_subflow_off)
printf("%s off %d mdss_subflow_off %d mdss_subflow_seq %u iss %u suna %u\n", __func__,
off, mdss_subflow_off, mdss_subflow_seq, mpts->mpts_iss, tp->snd_una);
VERIFY(off >= mdss_subflow_off);
mptcplog((LOG_DEBUG, "%s dlen %u off %d sub_off %d sub_seq %u iss %u suna %u\n",
__func__, mdss_data_len, off, mdss_subflow_off, mdss_subflow_seq,
mpts->mpts_iss, tp->snd_una), MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
return (mdss_data_len - (off - mdss_subflow_off));
}
static uint32_t
mptcp_get_maxseg(struct mptses *mpte)
{
struct mptsub *mpts;
uint32_t maxseg = 0;
TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
if (!TCPS_HAVEESTABLISHED(tp->t_state) ||
TCPS_HAVERCVDFIN2(tp->t_state))
continue;
if (tp->t_maxseg > maxseg)
maxseg = tp->t_maxseg;
}
return (maxseg);
}
static uint8_t
mptcp_get_rcvscale(struct mptses *mpte)
{
struct mptsub *mpts;
uint8_t rcvscale = UINT8_MAX;
TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
if (!TCPS_HAVEESTABLISHED(tp->t_state) ||
TCPS_HAVERCVDFIN2(tp->t_state))
continue;
if (tp->rcv_scale < rcvscale)
rcvscale = tp->rcv_scale;
}
return (rcvscale);
}
static void
mptcp_sbrcv_reserve(struct mptcb *mp_tp, struct sockbuf *sbrcv,
u_int32_t newsize, u_int32_t idealsize)
{
uint8_t rcvscale = mptcp_get_rcvscale(mp_tp->mpt_mpte);
newsize = min(newsize, tcp_autorcvbuf_max);
newsize = min(newsize, TCP_MAXWIN << rcvscale);
if (newsize > sbrcv->sb_hiwat &&
(sbreserve(sbrcv, newsize) == 1)) {
sbrcv->sb_idealsize = min(max(sbrcv->sb_idealsize,
(idealsize != 0) ? idealsize : newsize), tcp_autorcvbuf_max);
sbrcv->sb_idealsize = min(sbrcv->sb_idealsize,
TCP_MAXWIN << rcvscale);
}
}
void
mptcp_sbrcv_grow(struct mptcb *mp_tp)
{
struct mptses *mpte = mp_tp->mpt_mpte;
struct socket *mp_so = mpte->mpte_mppcb->mpp_socket;
struct sockbuf *sbrcv = &mp_so->so_rcv;
uint32_t hiwat_sum = 0;
uint32_t ideal_sum = 0;
struct mptsub *mpts;
if (tcp_do_autorcvbuf == 0 ||
(sbrcv->sb_flags & SB_AUTOSIZE) == 0 ||
tcp_cansbgrow(sbrcv) == 0 ||
sbrcv->sb_hiwat >= tcp_autorcvbuf_max ||
(mp_so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ||
!LIST_EMPTY(&mp_tp->mpt_segq)) {
return;
}
TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
hiwat_sum += mpts->mpts_socket->so_rcv.sb_hiwat;
ideal_sum += mpts->mpts_socket->so_rcv.sb_idealsize;
}
mptcp_sbrcv_reserve(mp_tp, sbrcv, hiwat_sum, ideal_sum);
}
static void
mptcp_sbrcv_grow_rwin(struct mptcb *mp_tp, struct sockbuf *sb)
{
struct socket *mp_so = mp_tp->mpt_mpte->mpte_mppcb->mpp_socket;
u_int32_t rcvbufinc = mptcp_get_maxseg(mp_tp->mpt_mpte) << 4;
u_int32_t rcvbuf = sb->sb_hiwat;
if (tcp_recv_bg == 1 || IS_TCP_RECV_BG(mp_so))
return;
if (tcp_do_autorcvbuf == 1 &&
tcp_cansbgrow(sb) &&
(mp_so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) == 0 &&
(rcvbuf - sb->sb_cc) < rcvbufinc &&
rcvbuf < tcp_autorcvbuf_max &&
(sb->sb_idealsize > 0 &&
sb->sb_hiwat <= (sb->sb_idealsize + rcvbufinc))) {
sbreserve(sb, min((sb->sb_hiwat + rcvbufinc), tcp_autorcvbuf_max));
}
}
int32_t
mptcp_sbspace(struct mptcb *mp_tp)
{
struct sockbuf *sb = &mp_tp->mpt_mpte->mpte_mppcb->mpp_socket->so_rcv;
uint32_t rcvbuf;
int32_t space;
int32_t pending = 0;
mpte_lock_assert_held(mp_tp->mpt_mpte);
mptcp_sbrcv_grow_rwin(mp_tp, sb);
rcvbuf = sb->sb_hiwat;
space = ((int32_t) imin((rcvbuf - sb->sb_cc),
(sb->sb_mbmax - sb->sb_mbcnt)));
if (space < 0)
space = 0;
#if CONTENT_FILTER
pending = cfil_sock_data_space(sb);
#endif
if (pending > space)
space = 0;
else
space -= pending;
return (space);
}
void
mptcp_notify_mpready(struct socket *so)
{
struct tcpcb *tp = NULL;
if (so == NULL)
return;
tp = intotcpcb(sotoinpcb(so));
if (tp == NULL)
return;
DTRACE_MPTCP4(multipath__ready, struct socket *, so,
struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd,
struct tcpcb *, tp);
if (!(tp->t_mpflags & TMPF_MPTCP_TRUE))
return;
if (tp->t_mpflags & TMPF_MPTCP_READY)
return;
tp->t_mpflags &= ~TMPF_TCP_FALLBACK;
tp->t_mpflags |= TMPF_MPTCP_READY;
soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPSTATUS));
}
void
mptcp_notify_mpfail(struct socket *so)
{
struct tcpcb *tp = NULL;
if (so == NULL)
return;
tp = intotcpcb(sotoinpcb(so));
if (tp == NULL)
return;
DTRACE_MPTCP4(multipath__failed, struct socket *, so,
struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd,
struct tcpcb *, tp);
if (tp->t_mpflags & TMPF_TCP_FALLBACK)
return;
tp->t_mpflags &= ~(TMPF_MPTCP_READY|TMPF_MPTCP_TRUE);
tp->t_mpflags |= TMPF_TCP_FALLBACK;
soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPSTATUS));
}
boolean_t
mptcp_ok_to_keepalive(struct mptcb *mp_tp)
{
boolean_t ret = 1;
mpte_lock_assert_held(mp_tp->mpt_mpte);
if (mp_tp->mpt_state >= MPTCPS_CLOSE_WAIT) {
ret = 0;
}
return (ret);
}
int
mptcp_adj_mss(struct tcpcb *tp, boolean_t mtudisc)
{
int mss_lower = 0;
struct mptcb *mp_tp = tptomptp(tp);
#define MPTCP_COMPUTE_LEN { \
mss_lower = sizeof (struct mptcp_dss_ack_opt); \
if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) \
mss_lower += 2; \
else \
\
mss_lower += 2; \
}
if (mp_tp == NULL)
return (0);
mpte_lock_assert_held(mp_tp->mpt_mpte);
if (!mtudisc) {
if (tp->t_mpflags & TMPF_MPTCP_TRUE &&
!(tp->t_mpflags & TMPF_JOINED_FLOW)) {
MPTCP_COMPUTE_LEN;
}
if (tp->t_mpflags & TMPF_PREESTABLISHED &&
tp->t_mpflags & TMPF_SENT_JOIN) {
MPTCP_COMPUTE_LEN;
}
} else {
if (tp->t_mpflags & TMPF_MPTCP_TRUE) {
MPTCP_COMPUTE_LEN;
}
}
return (mss_lower);
}
void
mptcp_update_last_owner(struct socket *so, struct socket *mp_so)
{
if (so->last_pid != mp_so->last_pid ||
so->last_upid != mp_so->last_upid) {
so->last_upid = mp_so->last_upid;
so->last_pid = mp_so->last_pid;
uuid_copy(so->last_uuid, mp_so->last_uuid);
}
so_update_policy(so);
}
static void
fill_mptcp_subflow(struct socket *so, mptcp_flow_t *flow, struct mptsub *mpts)
{
struct inpcb *inp;
tcp_getconninfo(so, &flow->flow_ci);
inp = sotoinpcb(so);
#if INET6
if ((inp->inp_vflag & INP_IPV6) != 0) {
flow->flow_src.ss_family = AF_INET6;
flow->flow_dst.ss_family = AF_INET6;
flow->flow_src.ss_len = sizeof(struct sockaddr_in6);
flow->flow_dst.ss_len = sizeof(struct sockaddr_in6);
SIN6(&flow->flow_src)->sin6_port = inp->in6p_lport;
SIN6(&flow->flow_dst)->sin6_port = inp->in6p_fport;
SIN6(&flow->flow_src)->sin6_addr = inp->in6p_laddr;
SIN6(&flow->flow_dst)->sin6_addr = inp->in6p_faddr;
} else
#endif
if ((inp->inp_vflag & INP_IPV4) != 0) {
flow->flow_src.ss_family = AF_INET;
flow->flow_dst.ss_family = AF_INET;
flow->flow_src.ss_len = sizeof(struct sockaddr_in);
flow->flow_dst.ss_len = sizeof(struct sockaddr_in);
SIN(&flow->flow_src)->sin_port = inp->inp_lport;
SIN(&flow->flow_dst)->sin_port = inp->inp_fport;
SIN(&flow->flow_src)->sin_addr = inp->inp_laddr;
SIN(&flow->flow_dst)->sin_addr = inp->inp_faddr;
}
flow->flow_len = sizeof(*flow);
flow->flow_tcpci_offset = offsetof(mptcp_flow_t, flow_ci);
flow->flow_flags = mpts->mpts_flags;
flow->flow_cid = mpts->mpts_connid;
flow->flow_relseq = mpts->mpts_rel_seq;
flow->flow_soerror = mpts->mpts_socket->so_error;
flow->flow_probecnt = mpts->mpts_probecnt;
}
static int
mptcp_pcblist SYSCTL_HANDLER_ARGS
{
#pragma unused(oidp, arg1, arg2)
int error = 0, f;
size_t len;
struct mppcb *mpp;
struct mptses *mpte;
struct mptcb *mp_tp;
struct mptsub *mpts;
struct socket *so;
conninfo_mptcp_t mptcpci;
mptcp_flow_t *flows = NULL;
if (req->newptr != USER_ADDR_NULL)
return (EPERM);
lck_mtx_lock(&mtcbinfo.mppi_lock);
if (req->oldptr == USER_ADDR_NULL) {
size_t n = mtcbinfo.mppi_count;
lck_mtx_unlock(&mtcbinfo.mppi_lock);
req->oldidx = (n + n/8) * sizeof(conninfo_mptcp_t) +
4 * (n + n/8) * sizeof(mptcp_flow_t);
return (0);
}
TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) {
flows = NULL;
mpp_lock(mpp);
VERIFY(mpp->mpp_flags & MPP_ATTACHED);
mpte = mptompte(mpp);
VERIFY(mpte != NULL);
mpte_lock_assert_held(mpte);
mp_tp = mpte->mpte_mptcb;
VERIFY(mp_tp != NULL);
bzero(&mptcpci, sizeof(mptcpci));
mptcpci.mptcpci_state = mp_tp->mpt_state;
mptcpci.mptcpci_flags = mp_tp->mpt_flags;
mptcpci.mptcpci_ltoken = mp_tp->mpt_localtoken;
mptcpci.mptcpci_rtoken = mp_tp->mpt_remotetoken;
mptcpci.mptcpci_notsent_lowat = mp_tp->mpt_notsent_lowat;
mptcpci.mptcpci_snduna = mp_tp->mpt_snduna;
mptcpci.mptcpci_sndnxt = mp_tp->mpt_sndnxt;
mptcpci.mptcpci_sndmax = mp_tp->mpt_sndmax;
mptcpci.mptcpci_lidsn = mp_tp->mpt_local_idsn;
mptcpci.mptcpci_sndwnd = mp_tp->mpt_sndwnd;
mptcpci.mptcpci_rcvnxt = mp_tp->mpt_rcvnxt;
mptcpci.mptcpci_rcvatmark = mp_tp->mpt_rcvnxt;
mptcpci.mptcpci_ridsn = mp_tp->mpt_remote_idsn;
mptcpci.mptcpci_rcvwnd = mp_tp->mpt_rcvwnd;
mptcpci.mptcpci_nflows = mpte->mpte_numflows;
mptcpci.mptcpci_mpte_flags = mpte->mpte_flags;
mptcpci.mptcpci_mpte_addrid = mpte->mpte_addrid_last;
mptcpci.mptcpci_flow_offset =
offsetof(conninfo_mptcp_t, mptcpci_flows);
len = sizeof(*flows) * mpte->mpte_numflows;
if (mpte->mpte_numflows != 0) {
flows = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO);
if (flows == NULL) {
mpp_unlock(mpp);
break;
}
mptcpci.mptcpci_len = sizeof(mptcpci) +
sizeof(*flows) * (mptcpci.mptcpci_nflows - 1);
error = SYSCTL_OUT(req, &mptcpci,
sizeof(mptcpci) - sizeof(mptcp_flow_t));
} else {
mptcpci.mptcpci_len = sizeof(mptcpci);
error = SYSCTL_OUT(req, &mptcpci, sizeof(mptcpci));
}
if (error) {
mpp_unlock(mpp);
FREE(flows, M_TEMP);
break;
}
f = 0;
TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
so = mpts->mpts_socket;
fill_mptcp_subflow(so, &flows[f], mpts);
f++;
}
mpp_unlock(mpp);
if (flows) {
error = SYSCTL_OUT(req, flows, len);
FREE(flows, M_TEMP);
if (error)
break;
}
}
lck_mtx_unlock(&mtcbinfo.mppi_lock);
return (error);
}
SYSCTL_PROC(_net_inet_mptcp, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED,
0, 0, mptcp_pcblist, "S,conninfo_mptcp_t",
"List of active MPTCP connections");
int
mptcp_set_notsent_lowat(struct mptses *mpte, int optval)
{
struct mptcb *mp_tp = NULL;
int error = 0;
if (mpte->mpte_mppcb->mpp_flags & MPP_ATTACHED)
mp_tp = mpte->mpte_mptcb;
if (mp_tp)
mp_tp->mpt_notsent_lowat = optval;
else
error = EINVAL;
return (error);
}
u_int32_t
mptcp_get_notsent_lowat(struct mptses *mpte)
{
struct mptcb *mp_tp = NULL;
if (mpte->mpte_mppcb->mpp_flags & MPP_ATTACHED)
mp_tp = mpte->mpte_mptcb;
if (mp_tp)
return (mp_tp->mpt_notsent_lowat);
else
return (0);
}
int
mptcp_notsent_lowat_check(struct socket *so)
{
struct mptses *mpte;
struct mppcb *mpp;
struct mptcb *mp_tp;
struct mptsub *mpts;
int notsent = 0;
mpp = mpsotomppcb(so);
if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
return (0);
}
mpte = mptompte(mpp);
mpte_lock_assert_held(mpte);
mp_tp = mpte->mpte_mptcb;
notsent = so->so_snd.sb_cc;
if ((notsent == 0) ||
((notsent - (mp_tp->mpt_sndnxt - mp_tp->mpt_snduna)) <=
mp_tp->mpt_notsent_lowat)) {
mptcplog((LOG_DEBUG, "MPTCP Sender: "
"lowat %d notsent %d actual %d \n",
mp_tp->mpt_notsent_lowat, notsent,
notsent - (mp_tp->mpt_sndnxt - mp_tp->mpt_snduna)),
MPTCP_SENDER_DBG , MPTCP_LOGLVL_VERBOSE);
return (1);
}
TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
int retval = 0;
if (mpts->mpts_flags & MPTSF_ACTIVE) {
struct socket *subf_so = mpts->mpts_socket;
struct tcpcb *tp = intotcpcb(sotoinpcb(subf_so));
notsent = so->so_snd.sb_cc -
(tp->snd_nxt - tp->snd_una);
if ((tp->t_flags & TF_NODELAY) == 0 &&
notsent > 0 && (notsent <= (int)tp->t_maxseg)) {
retval = 1;
}
mptcplog((LOG_DEBUG, "MPTCP Sender: lowat %d notsent %d"
" nodelay false \n",
mp_tp->mpt_notsent_lowat, notsent),
MPTCP_SENDER_DBG , MPTCP_LOGLVL_VERBOSE);
return (retval);
}
}
return (0);
}
static kern_ctl_ref mptcp_kern_ctrl_ref = NULL;
static uint32_t mptcp_kern_skt_inuse = 0;
static uint32_t mptcp_kern_skt_unit;
symptoms_advisory_t mptcp_advisory;
static errno_t
mptcp_symptoms_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
void **unitinfo)
{
#pragma unused(kctlref, sac, unitinfo)
if (OSIncrementAtomic(&mptcp_kern_skt_inuse) > 0)
mptcplog((LOG_ERR, "%s MPTCP kernel-control socket already open!", __func__),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
mptcp_kern_skt_unit = sac->sc_unit;
return (0);
}
static void
mptcp_allow_uuid(uuid_t uuid)
{
struct mppcb *mpp;
lck_mtx_lock(&mtcbinfo.mppi_lock);
TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) {
struct mptses *mpte;
struct socket *mp_so;
mpp_lock(mpp);
mpte = mpp->mpp_pcbe;
mp_so = mpp->mpp_socket;
if (mp_so->so_flags & SOF_DELEGATED &&
uuid_compare(uuid, mp_so->e_uuid))
goto next;
else if (!(mp_so->so_flags & SOF_DELEGATED) &&
uuid_compare(uuid, mp_so->last_uuid))
goto next;
mpte->mpte_flags |= MPTE_ACCESS_GRANTED;
mptcp_check_subflows_and_add(mpte);
mptcp_remove_subflows(mpte);
mpte->mpte_flags &= ~MPTE_ACCESS_GRANTED;
next:
mpp_unlock(mpp);
}
lck_mtx_unlock(&mtcbinfo.mppi_lock);
}
static void
mptcp_wifi_status_changed(void)
{
struct mppcb *mpp;
lck_mtx_lock(&mtcbinfo.mppi_lock);
TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) {
struct mptses *mpte;
struct socket *mp_so;
mpp_lock(mpp);
mpte = mpp->mpp_pcbe;
mp_so = mpp->mpp_socket;
if (mpte->mpte_svctype != MPTCP_SVCTYPE_HANDOVER)
goto next;
mptcp_check_subflows_and_add(mpte);
mptcp_check_subflows_and_remove(mpte);
next:
mpp_unlock(mpp);
}
lck_mtx_unlock(&mtcbinfo.mppi_lock);
}
void
mptcp_ask_symptoms(struct mptses *mpte)
{
struct mptcp_symptoms_ask_uuid ask;
struct socket *mp_so;
struct proc *p;
int pid, prio, err;
if (mptcp_kern_skt_unit == 0) {
mptcplog((LOG_ERR, "%s skt_unit is still 0\n", __func__),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
return;
}
mp_so = mptetoso(mpte);
if (mp_so->so_flags & SOF_DELEGATED)
pid = mp_so->e_pid;
else
pid = mp_so->last_pid;
p = proc_find(pid);
if (p == PROC_NULL) {
mptcplog((LOG_ERR, "%s Couldn't find proc for pid %u\n", __func__,
pid), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
return;
}
ask.cmd = MPTCP_SYMPTOMS_ASK_UUID;
if (mp_so->so_flags & SOF_DELEGATED)
uuid_copy(ask.uuid, mp_so->e_uuid);
else
uuid_copy(ask.uuid, mp_so->last_uuid);
prio = proc_get_effective_task_policy(proc_task(p), TASK_POLICY_ROLE);
if (prio == TASK_BACKGROUND_APPLICATION)
ask.priority = MPTCP_SYMPTOMS_BACKGROUND;
else if (prio == TASK_FOREGROUND_APPLICATION)
ask.priority = MPTCP_SYMPTOMS_FOREGROUND;
else
ask.priority = MPTCP_SYMPTOMS_UNKNOWN;
mptcplog((LOG_DEBUG, "%s ask symptoms about pid %u, prio %u\n", __func__,
pid, ask.priority), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
err = ctl_enqueuedata(mptcp_kern_ctrl_ref, mptcp_kern_skt_unit,
&ask, sizeof(ask), CTL_DATA_EOR);
if (err)
mptcplog((LOG_ERR, "%s ctl_enqueuedata failed %d\n", __func__, err),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
proc_rele(p);
}
static errno_t
mptcp_symptoms_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit,
void *unitinfo)
{
#pragma unused(kctlref, kcunit, unitinfo)
OSDecrementAtomic(&mptcp_kern_skt_inuse);
return (0);
}
static errno_t
mptcp_symptoms_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
mbuf_t m, int flags)
{
#pragma unused(kctlref, unitinfo, flags)
symptoms_advisory_t *sa = NULL;
if (kcunit != mptcp_kern_skt_unit)
mptcplog((LOG_ERR, "%s kcunit %u is different from expected one %u\n",
__func__, kcunit, mptcp_kern_skt_unit),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
if (mbuf_pkthdr_len(m) < sizeof(*sa)) {
mbuf_freem(m);
return (EINVAL);
}
if (mbuf_len(m) >= sizeof(*sa))
sa = mbuf_data(m);
else
return (EINVAL);
if (sa->sa_nwk_status != SYMPTOMS_ADVISORY_NOCOMMENT &&
sa->sa_nwk_status != SYMPTOMS_ADVISORY_USEAPP) {
uint8_t old_wifi_status = mptcp_advisory.sa_wifi_status;
mptcplog((LOG_DEBUG, "%s: wifi %d,%d\n",
__func__, sa->sa_wifi_status, mptcp_advisory.sa_wifi_status),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_VERBOSE);
if ((sa->sa_wifi_status &
(SYMPTOMS_ADVISORY_WIFI_BAD | SYMPTOMS_ADVISORY_WIFI_OK)) !=
(SYMPTOMS_ADVISORY_WIFI_BAD | SYMPTOMS_ADVISORY_WIFI_OK))
mptcp_advisory.sa_wifi_status = sa->sa_wifi_status;
if (old_wifi_status != mptcp_advisory.sa_wifi_status)
mptcp_wifi_status_changed();
} else if (sa->sa_nwk_status == SYMPTOMS_ADVISORY_NOCOMMENT) {
mptcplog((LOG_DEBUG, "%s: NOCOMMENT wifi %d\n", __func__,
mptcp_advisory.sa_wifi_status),
MPTCP_EVENTS_DBG, MPTCP_LOGLVL_VERBOSE);
} else if (sa->sa_nwk_status == SYMPTOMS_ADVISORY_USEAPP) {
uuid_t uuid;
mptcplog((LOG_DEBUG, "%s Got response about useApp\n", __func__),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
uuid_copy(uuid, (unsigned char *)(sa + 1));
mptcp_allow_uuid(uuid);
}
return (0);
}
void
mptcp_control_register(void)
{
struct kern_ctl_reg mptcp_kern_ctl;
bzero(&mptcp_kern_ctl, sizeof(mptcp_kern_ctl));
strlcpy(mptcp_kern_ctl.ctl_name, MPTCP_KERN_CTL_NAME,
sizeof(mptcp_kern_ctl.ctl_name));
mptcp_kern_ctl.ctl_connect = mptcp_symptoms_ctl_connect;
mptcp_kern_ctl.ctl_disconnect = mptcp_symptoms_ctl_disconnect;
mptcp_kern_ctl.ctl_send = mptcp_symptoms_ctl_send;
mptcp_kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED;
(void)ctl_register(&mptcp_kern_ctl, &mptcp_kern_ctrl_ref);
}
int
mptcp_is_wifi_unusable(void)
{
return (mptcp_advisory.sa_wifi_status & SYMPTOMS_ADVISORY_WIFI_BAD);
}
static void
mptcp_drop_tfo_data(struct mptses *mpte, struct mptsub *mpts)
{
struct socket *mp_so = mptetoso(mpte);
struct socket *so = mpts->mpts_socket;
struct tcpcb *tp = intotcpcb(sotoinpcb(so));
struct mptcb *mp_tp = mpte->mpte_mptcb;
if (tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED) {
u_int64_t mp_droplen = mp_tp->mpt_sndnxt - mp_tp->mpt_snduna;
unsigned int tcp_droplen = tp->snd_una - tp->iss - 1;
VERIFY(mp_droplen <= (UINT_MAX));
VERIFY(mp_droplen >= tcp_droplen);
mpts->mpts_flags &= ~MPTSF_TFO_REQD;
mpts->mpts_iss += tcp_droplen;
tp->t_mpflags &= ~TMPF_TFO_REQUEST;
if (mp_droplen > tcp_droplen) {
mp_so->so_flags1 |= SOF1_TFO_REWIND;
mp_tp->mpt_sndnxt = mp_tp->mpt_snduna + (mp_droplen - tcp_droplen);
mp_droplen = tcp_droplen;
} else {
mpts->mpts_rel_seq = 1;
mp_tp->mpt_sndnxt = mp_tp->mpt_snduna;
}
mp_tp->mpt_sndmax -= tcp_droplen;
if (mp_droplen != 0) {
VERIFY(mp_so->so_snd.sb_mb != NULL);
sbdrop(&mp_so->so_snd, (int)mp_droplen);
}
mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx cid %d TFO tcp len %d mptcp len %d\n",
__func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
mpts->mpts_connid, tcp_droplen, mp_droplen),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
}
}
int
mptcp_freeq(struct mptcb *mp_tp)
{
struct tseg_qent *q;
int rv = 0;
while ((q = LIST_FIRST(&mp_tp->mpt_segq)) != NULL) {
LIST_REMOVE(q, tqe_q);
m_freem(q->tqe_m);
zfree(tcp_reass_zone, q);
rv = 1;
}
mp_tp->mpt_reassqlen = 0;
return (rv);
}
static int
mptcp_post_event(u_int32_t event_code, int value)
{
struct kev_mptcp_data event_data;
struct kev_msg ev_msg;
memset(&ev_msg, 0, sizeof(ev_msg));
ev_msg.vendor_code = KEV_VENDOR_APPLE;
ev_msg.kev_class = KEV_NETWORK_CLASS;
ev_msg.kev_subclass = KEV_MPTCP_SUBCLASS;
ev_msg.event_code = event_code;
event_data.value = value;
ev_msg.dv[0].data_ptr = &event_data;
ev_msg.dv[0].data_length = sizeof(event_data);
return kev_post_msg(&ev_msg);
}
void
mptcp_set_cellicon(struct mptses *mpte)
{
int error;
if (mpte->mpte_flags & MPTE_FIRSTPARTY)
return;
mptcp_last_cellicon_set = tcp_now;
if (OSTestAndSet(7, &mptcp_cellicon_is_set))
return;
error = mptcp_post_event(KEV_MPTCP_CELLUSE, 1);
if (error)
mptcplog((LOG_ERR, "%s: Setting cellicon failed with %d\n",
__func__, error), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
else
mptcplog((LOG_DEBUG, "%s successfully set the cellicon\n", __func__),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
}
void
mptcp_unset_cellicon(void)
{
int error;
if (OSTestAndClear(7, &mptcp_cellicon_is_set))
return;
if (TSTMP_GT(mptcp_last_cellicon_set + MPTCP_CELLICON_TOGGLE_RATE,
tcp_now)) {
OSTestAndSet(7, &mptcp_cellicon_is_set);
return;
}
error = mptcp_post_event(KEV_MPTCP_CELLUSE, 0);
if (error)
mptcplog((LOG_ERR, "%s: Unsetting cellicon failed with %d\n",
__func__, error), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
else
mptcplog((LOG_DEBUG, "%s successfully unset the cellicon\n", __func__),
MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
}
void
mptcp_reset_rexmit_state(struct tcpcb *tp)
{
struct mptsub *mpts;
struct inpcb *inp;
struct socket *so;
inp = tp->t_inpcb;
if (inp == NULL)
return;
so = inp->inp_socket;
if (so == NULL)
return;
if (!(so->so_flags & SOF_MP_SUBFLOW))
return;
mpts = tp->t_mpsub;
mpts->mpts_flags &= ~MPTSF_WRITE_STALL;
so->so_flags &= ~SOF_MP_TRYFAILOVER;
}
void
mptcp_reset_keepalive(struct tcpcb *tp)
{
struct mptsub *mpts = tp->t_mpsub;
mpts->mpts_flags &= ~MPTSF_READ_STALL;
}