#include <sys/types.h>
#include <sys/kern_control.h>
#include <sys/queue.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/syslog.h>
#include <kern/locks.h>
#include <kern/zalloc.h>
#include <kern/debug.h>
#include <net/content_filter.h>
#include <netinet/in_pcb.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <string.h>
#include <libkern/libkern.h>
#define MAX_CONTENT_FILTER 2
struct cfil_entry;
struct content_filter {
kern_ctl_ref cf_kcref;
u_int32_t cf_kcunit;
u_int32_t cf_flags;
uint32_t cf_necp_control_unit;
uint32_t cf_sock_count;
TAILQ_HEAD(, cfil_entry) cf_sock_entries;
};
#define CFF_ACTIVE 0x01
#define CFF_DETACHING 0x02
#define CFF_FLOW_CONTROLLED 0x04
struct content_filter **content_filters = NULL;
uint32_t cfil_active_count = 0;
uint32_t cfil_sock_attached_count = 0;
uint32_t cfil_close_wait_timeout = 1000;
static kern_ctl_ref cfil_kctlref = NULL;
static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
static lck_attr_t *cfil_lck_attr = NULL;
static lck_grp_t *cfil_lck_grp = NULL;
decl_lck_rw_data(static, cfil_lck_rw);
#define CFIL_RW_LCK_MAX 8
int cfil_rw_nxt_lck = 0;
void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
int cfil_rw_nxt_unlck = 0;
void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
#define CONTENT_FILTER_ZONE_NAME "content_filter"
#define CONTENT_FILTER_ZONE_MAX 10
static struct zone *content_filter_zone = NULL;
#define CFIL_INFO_ZONE_NAME "cfil_info"
#define CFIL_INFO_ZONE_MAX 1024
static struct zone *cfil_info_zone = NULL;
MBUFQ_HEAD(cfil_mqhead);
struct cfil_queue {
uint64_t q_start;
uint64_t q_end;
struct cfil_mqhead q_mq;
};
struct cfil_entry {
TAILQ_ENTRY(cfil_entry) cfe_link;
struct content_filter *cfe_filter;
struct cfil_info *cfe_cfil_info;
uint32_t cfe_flags;
uint32_t cfe_necp_control_unit;
struct timeval cfe_last_event;
struct timeval cfe_last_action;
struct cfe_buf {
struct cfil_queue cfe_pending_q;
struct cfil_queue cfe_ctl_q;
uint64_t cfe_pass_offset;
uint64_t cfe_peek_offset;
uint64_t cfe_peeked;
} cfe_snd, cfe_rcv;
};
#define CFEF_CFIL_ATTACHED 0x0001
#define CFEF_SENT_SOCK_ATTACHED 0x0002
#define CFEF_DATA_START 0x0004
#define CFEF_FLOW_CONTROLLED 0x0008
#define CFEF_SENT_DISCONNECT_IN 0x0010
#define CFEF_SENT_DISCONNECT_OUT 0x0020
#define CFEF_SENT_SOCK_CLOSED 0x0040
#define CFEF_CFIL_DETACHED 0x0080
#define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
struct timeval _tdiff; \
if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) { \
timersub(t1, t0, &_tdiff); \
(cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
(cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op; \
(cfil)->cfi_op_list_ctr ++; \
}
struct cfil_info {
TAILQ_ENTRY(cfil_info) cfi_link;
struct socket *cfi_so;
uint64_t cfi_flags;
uint64_t cfi_sock_id;
struct timeval64 cfi_first_event;
uint32_t cfi_op_list_ctr;
uint32_t cfi_op_time[CFI_MAX_TIME_LOG_ENTRY];
unsigned char cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
struct cfi_buf {
uint64_t cfi_pending_first;
uint64_t cfi_pending_last;
int cfi_pending_mbcnt;
uint64_t cfi_pass_offset;
struct cfil_queue cfi_inject_q;
} cfi_snd, cfi_rcv;
struct cfil_entry cfi_entries[MAX_CONTENT_FILTER];
} __attribute__((aligned(8)));
#define CFIF_DROP 0x0001
#define CFIF_CLOSE_WAIT 0x0002
#define CFIF_SOCK_CLOSED 0x0004
#define CFIF_RETRY_INJECT_IN 0x0010
#define CFIF_RETRY_INJECT_OUT 0x0020
#define CFIF_SHUT_WR 0x0040
#define CFIF_SHUT_RD 0x0080
#define CFI_MASK_GENCNT 0xFFFFFFFF00000000
#define CFI_SHIFT_GENCNT 32
#define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF
#define CFI_SHIFT_FLOWHASH 0
TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
#define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
#define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
struct cfil_stats cfil_stats;
int cfil_log_level = LOG_ERR;
int cfil_debug = 1;
static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
struct sysctl_req *);
static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
struct sysctl_req *);
SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "cfil");
SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW|CTLFLAG_LOCKED,
&cfil_log_level, 0, "");
SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW|CTLFLAG_LOCKED,
&cfil_debug, 0, "");
SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD|CTLFLAG_LOCKED,
&cfil_sock_attached_count, 0, "");
SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD|CTLFLAG_LOCKED,
&cfil_active_count, 0, "");
SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW|CTLFLAG_LOCKED,
&cfil_close_wait_timeout, 0, "");
static int cfil_sbtrim = 1;
SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW|CTLFLAG_LOCKED,
&cfil_sbtrim, 0, "");
SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD|CTLFLAG_LOCKED,
0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD|CTLFLAG_LOCKED,
0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD|CTLFLAG_LOCKED,
&cfil_stats, cfil_stats, "");
static int cfil_action_data_pass(struct socket *, uint32_t, int,
uint64_t, uint64_t);
static int cfil_action_drop(struct socket *, uint32_t);
static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
static int cfil_dispatch_closed_event(struct socket *, int);
static int cfil_data_common(struct socket *, int, struct sockaddr *,
struct mbuf *, struct mbuf *, uint32_t);
static int cfil_data_filter(struct socket *, uint32_t, int,
struct mbuf *, uint64_t);
static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
struct in_addr, u_int16_t);
static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
struct in6_addr *, u_int16_t);
static int cfil_dispatch_attach_event(struct socket *, uint32_t);
static void cfil_info_free(struct socket *, struct cfil_info *);
static struct cfil_info * cfil_info_alloc(struct socket *);
static int cfil_info_attach_unit(struct socket *, uint32_t);
static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t);
static struct socket *cfil_socket_from_client_uuid(uuid_t, bool *);
static int cfil_service_pending_queue(struct socket *, uint32_t, int);
static int cfil_data_service_ctl_q(struct socket *, uint32_t, int);
static void cfil_info_verify(struct cfil_info *);
static int cfil_update_data_offsets(struct socket *, uint32_t, int,
uint64_t, uint64_t);
static int cfil_acquire_sockbuf(struct socket *, int);
static void cfil_release_sockbuf(struct socket *, int);
static int cfil_filters_attached(struct socket *);
static void cfil_rw_lock_exclusive(lck_rw_t *);
static void cfil_rw_unlock_exclusive(lck_rw_t *);
static void cfil_rw_lock_shared(lck_rw_t *);
static void cfil_rw_unlock_shared(lck_rw_t *);
static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
static unsigned int cfil_data_length(struct mbuf *, int *);
static void
cfil_rw_lock_exclusive(lck_rw_t *lck)
{
void *lr_saved;
lr_saved = __builtin_return_address(0);
lck_rw_lock_exclusive(lck);
cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
}
static void
cfil_rw_unlock_exclusive(lck_rw_t *lck)
{
void *lr_saved;
lr_saved = __builtin_return_address(0);
lck_rw_unlock_exclusive(lck);
cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
}
static void
cfil_rw_lock_shared(lck_rw_t *lck)
{
void *lr_saved;
lr_saved = __builtin_return_address(0);
lck_rw_lock_shared(lck);
cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
}
static void
cfil_rw_unlock_shared(lck_rw_t *lck)
{
void *lr_saved;
lr_saved = __builtin_return_address(0);
lck_rw_unlock_shared(lck);
cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
}
static boolean_t
cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
{
void *lr_saved;
boolean_t upgraded;
lr_saved = __builtin_return_address(0);
upgraded = lck_rw_lock_shared_to_exclusive(lck);
if (upgraded) {
cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
}
return (upgraded);
}
static void
cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
{
void *lr_saved;
lr_saved = __builtin_return_address(0);
lck_rw_lock_exclusive_to_shared(lck);
cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
}
static void
cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
{
#if !MACH_ASSERT
#pragma unused(lck, exclusive)
#endif
LCK_RW_ASSERT(lck,
exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
}
static unsigned int
cfil_data_length(struct mbuf *m, int *retmbcnt)
{
struct mbuf *m0;
unsigned int pktlen;
int mbcnt;
if (retmbcnt == NULL)
return (m_length(m));
pktlen = 0;
mbcnt = 0;
for (m0 = m; m0 != NULL; m0 = m0->m_next) {
pktlen += m0->m_len;
mbcnt += MSIZE;
if (m0->m_flags & M_EXT)
mbcnt += m0->m_ext.ext_size;
}
*retmbcnt = mbcnt;
return (pktlen);
}
static inline void
cfil_queue_init(struct cfil_queue *cfq)
{
cfq->q_start = 0;
cfq->q_end = 0;
MBUFQ_INIT(&cfq->q_mq);
}
static inline uint64_t
cfil_queue_drain(struct cfil_queue *cfq)
{
uint64_t drained = cfq->q_start - cfq->q_end;
cfq->q_start = 0;
cfq->q_end = 0;
MBUFQ_DRAIN(&cfq->q_mq);
return (drained);
}
static inline int
cfil_queue_empty(struct cfil_queue *cfq)
{
return (MBUFQ_EMPTY(&cfq->q_mq));
}
static inline uint64_t
cfil_queue_offset_first(struct cfil_queue *cfq)
{
return (cfq->q_start);
}
static inline uint64_t
cfil_queue_offset_last(struct cfil_queue *cfq)
{
return (cfq->q_end);
}
static inline uint64_t
cfil_queue_len(struct cfil_queue *cfq)
{
return (cfq->q_end - cfq->q_start);
}
static void
cfil_queue_verify(struct cfil_queue *cfq)
{
mbuf_t m;
mbuf_t n;
uint64_t queuesize = 0;
VERIFY(cfq->q_start <= cfq->q_end);
VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
(!MBUFQ_EMPTY(&cfq->q_mq) &&
cfq->q_start != cfq->q_end));
MBUFQ_FOREACH(m, &cfq->q_mq) {
size_t chainsize = 0;
unsigned int mlen = m_length(m);
if (m == (void *)M_TAG_FREE_PATTERN ||
m->m_next == (void *)M_TAG_FREE_PATTERN ||
m->m_nextpkt == (void *)M_TAG_FREE_PATTERN)
panic("%s - mq %p is free at %p", __func__,
&cfq->q_mq, m);
for (n = m; n != NULL; n = n->m_next) {
if (n->m_type != MT_DATA &&
n->m_type != MT_HEADER &&
n->m_type != MT_OOBDATA)
panic("%s - %p unsupported type %u", __func__,
n, n->m_type);
chainsize += n->m_len;
}
if (mlen != chainsize)
panic("%s - %p m_length() %u != chainsize %lu",
__func__, m, mlen, chainsize);
queuesize += chainsize;
}
if (queuesize != cfq->q_end - cfq->q_start)
panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
m, queuesize, cfq->q_end - cfq->q_start);
}
static void
cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
{
CFIL_QUEUE_VERIFY(cfq);
MBUFQ_ENQUEUE(&cfq->q_mq, m);
cfq->q_end += len;
CFIL_QUEUE_VERIFY(cfq);
}
static void
cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
{
CFIL_QUEUE_VERIFY(cfq);
VERIFY(m_length(m) == len);
MBUFQ_REMOVE(&cfq->q_mq, m);
MBUFQ_NEXT(m) = NULL;
cfq->q_start += len;
CFIL_QUEUE_VERIFY(cfq);
}
static mbuf_t
cfil_queue_first(struct cfil_queue *cfq)
{
return (MBUFQ_FIRST(&cfq->q_mq));
}
static mbuf_t
cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
{
#pragma unused(cfq)
return (MBUFQ_NEXT(m));
}
static void
cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
{
CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
}
static void
cfil_entry_verify(struct cfil_entry *entry)
{
cfil_entry_buf_verify(&entry->cfe_snd);
cfil_entry_buf_verify(&entry->cfe_rcv);
}
static void
cfil_info_buf_verify(struct cfi_buf *cfi_buf)
{
CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
VERIFY(cfi_buf->cfi_pending_mbcnt >= 0);
}
static void
cfil_info_verify(struct cfil_info *cfil_info)
{
int i;
if (cfil_info == NULL)
return;
cfil_info_buf_verify(&cfil_info->cfi_snd);
cfil_info_buf_verify(&cfil_info->cfi_rcv);
for (i = 0; i < MAX_CONTENT_FILTER; i++)
cfil_entry_verify(&cfil_info->cfi_entries[i]);
}
static void
verify_content_filter(struct content_filter *cfc)
{
struct cfil_entry *entry;
uint32_t count = 0;
VERIFY(cfc->cf_sock_count >= 0);
TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
count++;
VERIFY(cfc == entry->cfe_filter);
}
VERIFY(count == cfc->cf_sock_count);
}
static errno_t
cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
void **unitinfo)
{
errno_t error = 0;
struct content_filter *cfc = NULL;
CFIL_LOG(LOG_NOTICE, "");
cfc = zalloc(content_filter_zone);
if (cfc == NULL) {
CFIL_LOG(LOG_ERR, "zalloc failed");
error = ENOMEM;
goto done;
}
bzero(cfc, sizeof(struct content_filter));
cfil_rw_lock_exclusive(&cfil_lck_rw);
if (content_filters == NULL) {
struct content_filter **tmp;
cfil_rw_unlock_exclusive(&cfil_lck_rw);
MALLOC(tmp,
struct content_filter **,
MAX_CONTENT_FILTER * sizeof(struct content_filter *),
M_TEMP,
M_WAITOK | M_ZERO);
cfil_rw_lock_exclusive(&cfil_lck_rw);
if (tmp == NULL && content_filters == NULL) {
error = ENOMEM;
cfil_rw_unlock_exclusive(&cfil_lck_rw);
goto done;
}
if (content_filters != NULL)
FREE(tmp, M_TEMP);
else
content_filters = tmp;
}
if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
error = EINVAL;
} else if (content_filters[sac->sc_unit - 1] != NULL) {
CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
error = EADDRINUSE;
} else {
content_filters[sac->sc_unit - 1] = cfc;
cfc->cf_kcref = kctlref;
cfc->cf_kcunit = sac->sc_unit;
TAILQ_INIT(&cfc->cf_sock_entries);
*unitinfo = cfc;
cfil_active_count++;
}
cfil_rw_unlock_exclusive(&cfil_lck_rw);
done:
if (error != 0 && cfc != NULL)
zfree(content_filter_zone, cfc);
if (error == 0)
OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
else
OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
error, cfil_active_count, sac->sc_unit);
return (error);
}
static errno_t
cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
{
#pragma unused(kctlref)
errno_t error = 0;
struct content_filter *cfc;
struct cfil_entry *entry;
CFIL_LOG(LOG_NOTICE, "");
if (content_filters == NULL) {
CFIL_LOG(LOG_ERR, "no content filter");
error = EINVAL;
goto done;
}
if (kcunit > MAX_CONTENT_FILTER) {
CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
kcunit, MAX_CONTENT_FILTER);
error = EINVAL;
goto done;
}
cfc = (struct content_filter *)unitinfo;
if (cfc == NULL)
goto done;
cfil_rw_lock_exclusive(&cfil_lck_rw);
if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
CFIL_LOG(LOG_ERR, "bad unit info %u)",
kcunit);
cfil_rw_unlock_exclusive(&cfil_lck_rw);
goto done;
}
cfc->cf_flags |= CFF_DETACHING;
while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
verify_content_filter(cfc);
if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
struct cfil_info *cfil_info = entry->cfe_cfil_info;
struct socket *so = cfil_info->cfi_so;
entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
CFEF_DATA_START;
cfil_rw_unlock_exclusive(&cfil_lck_rw);
socket_lock(so, 1);
if (so->so_cfil == NULL || entry->cfe_filter == NULL) {
cfil_rw_lock_exclusive(&cfil_lck_rw);
goto release;
}
(void) cfil_action_data_pass(so, kcunit, 1,
CFM_MAX_OFFSET,
CFM_MAX_OFFSET);
(void) cfil_action_data_pass(so, kcunit, 0,
CFM_MAX_OFFSET,
CFM_MAX_OFFSET);
cfil_rw_lock_exclusive(&cfil_lck_rw);
if (so->so_cfil == NULL || entry->cfe_filter == NULL)
goto release;
entry->cfe_flags |= CFEF_CFIL_DETACHED;
CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
if ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) &&
cfil_filters_attached(so) == 0) {
CFIL_LOG(LOG_NOTICE, "so %llx waking",
(uint64_t)VM_KERNEL_ADDRPERM(so));
wakeup((caddr_t)&so->so_cfil);
}
entry->cfe_filter = NULL;
entry->cfe_necp_control_unit = 0;
TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
cfc->cf_sock_count--;
release:
socket_unlock(so, 1);
}
}
verify_content_filter(cfc);
VERIFY(cfc->cf_sock_count == 0);
content_filters[kcunit - 1] = NULL;
cfil_active_count--;
cfil_rw_unlock_exclusive(&cfil_lck_rw);
zfree(content_filter_zone, cfc);
done:
if (error == 0)
OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
else
OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
error, cfil_active_count, kcunit);
return (error);
}
static int
cfil_acquire_sockbuf(struct socket *so, int outgoing)
{
thread_t tp = current_thread();
struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
lck_mtx_t *mutex_held;
int error = 0;
while ((sb->sb_flags & SB_LOCK) ||
(sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
if (so->so_proto->pr_getlock != NULL)
mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
else
mutex_held = so->so_proto->pr_domain->dom_mtx;
LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
sb->sb_wantlock++;
VERIFY(sb->sb_wantlock != 0);
msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
NULL);
VERIFY(sb->sb_wantlock != 0);
sb->sb_wantlock--;
}
if (sb->sb_cfil_refs == 0) {
VERIFY(sb->sb_cfil_thread == NULL);
VERIFY((sb->sb_flags & SB_LOCK) == 0);
sb->sb_cfil_thread = tp;
sb->sb_flags |= SB_LOCK;
}
sb->sb_cfil_refs++;
if (so->so_cfil == NULL) {
CFIL_LOG(LOG_ERR, "so %llx cfil detached",
(uint64_t)VM_KERNEL_ADDRPERM(so));
error = 0;
} else if (so->so_cfil->cfi_flags & CFIF_DROP) {
CFIL_LOG(LOG_ERR, "so %llx drop set",
(uint64_t)VM_KERNEL_ADDRPERM(so));
error = EPIPE;
}
return (error);
}
static void
cfil_release_sockbuf(struct socket *so, int outgoing)
{
struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
thread_t tp = current_thread();
socket_lock_assert_owned(so);
if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)
panic("%s sb_cfil_thread %p not current %p", __func__,
sb->sb_cfil_thread, tp);
if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK))
panic("%s SB_LOCK not set on %p", __func__,
sb);
sb->sb_cfil_refs--;
if (sb->sb_cfil_refs == 0) {
sb->sb_cfil_thread = NULL;
sb->sb_flags &= ~SB_LOCK;
if (sb->sb_wantlock > 0)
wakeup(&sb->sb_flags);
}
}
cfil_sock_id_t
cfil_sock_id_from_socket(struct socket *so)
{
if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil)
return (so->so_cfil->cfi_sock_id);
else
return (CFIL_SOCK_ID_NONE);
}
static struct socket *
cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id)
{
struct socket *so = NULL;
u_int64_t gencnt = cfil_sock_id >> 32;
u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
struct inpcb *inp = NULL;
struct inpcbinfo *pcbinfo = &tcbinfo;
lck_rw_lock_shared(pcbinfo->ipi_lock);
LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
if (inp->inp_state != INPCB_STATE_DEAD &&
inp->inp_socket != NULL &&
inp->inp_flowhash == flowhash &&
(inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
inp->inp_socket->so_cfil != NULL) {
so = inp->inp_socket;
break;
}
}
lck_rw_done(pcbinfo->ipi_lock);
if (so == NULL) {
OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
CFIL_LOG(LOG_DEBUG,
"no socket for sock_id %llx gencnt %llx flowhash %x",
cfil_sock_id, gencnt, flowhash);
}
return (so);
}
static struct socket *
cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
{
struct socket *so = NULL;
struct inpcb *inp = NULL;
struct inpcbinfo *pcbinfo = &tcbinfo;
lck_rw_lock_shared(pcbinfo->ipi_lock);
LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
if (inp->inp_state != INPCB_STATE_DEAD &&
inp->inp_socket != NULL &&
uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
*cfil_attached = (inp->inp_socket->so_cfil != NULL);
so = inp->inp_socket;
break;
}
}
lck_rw_done(pcbinfo->ipi_lock);
return (so);
}
static errno_t
cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
int flags)
{
#pragma unused(kctlref, flags)
errno_t error = 0;
struct cfil_msg_hdr *msghdr;
struct content_filter *cfc = (struct content_filter *)unitinfo;
struct socket *so;
struct cfil_msg_action *action_msg;
struct cfil_entry *entry;
CFIL_LOG(LOG_INFO, "");
if (content_filters == NULL) {
CFIL_LOG(LOG_ERR, "no content filter");
error = EINVAL;
goto done;
}
if (kcunit > MAX_CONTENT_FILTER) {
CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
kcunit, MAX_CONTENT_FILTER);
error = EINVAL;
goto done;
}
if (m_length(m) < sizeof(struct cfil_msg_hdr)) {
CFIL_LOG(LOG_ERR, "too short %u", m_length(m));
error = EINVAL;
goto done;
}
msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
error = EINVAL;
goto done;
}
if (msghdr->cfm_type != CFM_TYPE_ACTION) {
CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
error = EINVAL;
goto done;
}
switch (msghdr->cfm_op) {
case CFM_OP_DATA_UPDATE:
OSIncrementAtomic(
&cfil_stats.cfs_ctl_action_data_update);
break;
case CFM_OP_DROP:
OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
break;
case CFM_OP_BLESS_CLIENT:
if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
error = EINVAL;
CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
msghdr->cfm_len,
msghdr->cfm_op);
goto done;
}
error = cfil_action_bless_client(kcunit, msghdr);
goto done;
default:
OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
error = EINVAL;
goto done;
}
if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
error = EINVAL;
CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
msghdr->cfm_len,
msghdr->cfm_op);
goto done;
}
cfil_rw_lock_shared(&cfil_lck_rw);
if (cfc != (void *)content_filters[kcunit - 1]) {
CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
kcunit);
error = EINVAL;
cfil_rw_unlock_shared(&cfil_lck_rw);
goto done;
}
so = cfil_socket_from_sock_id(msghdr->cfm_sock_id);
if (so == NULL) {
CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
msghdr->cfm_sock_id);
error = EINVAL;
cfil_rw_unlock_shared(&cfil_lck_rw);
goto done;
}
cfil_rw_unlock_shared(&cfil_lck_rw);
socket_lock(so, 1);
if (so->so_cfil == NULL) {
CFIL_LOG(LOG_NOTICE, "so %llx not attached",
(uint64_t)VM_KERNEL_ADDRPERM(so));
error = EINVAL;
goto unlock;
} else if (so->so_cfil->cfi_flags & CFIF_DROP) {
CFIL_LOG(LOG_NOTICE, "so %llx drop set",
(uint64_t)VM_KERNEL_ADDRPERM(so));
error = EINVAL;
goto unlock;
}
entry = &so->so_cfil->cfi_entries[kcunit - 1];
if (entry->cfe_filter == NULL) {
CFIL_LOG(LOG_NOTICE, "so %llx no filter",
(uint64_t)VM_KERNEL_ADDRPERM(so));
error = EINVAL;
goto unlock;
}
if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)
entry->cfe_flags |= CFEF_DATA_START;
else {
CFIL_LOG(LOG_ERR,
"so %llx attached not sent for %u",
(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
error = EINVAL;
goto unlock;
}
microuptime(&entry->cfe_last_action);
CFI_ADD_TIME_LOG(so->so_cfil, &entry->cfe_last_action, &so->so_cfil->cfi_first_event, msghdr->cfm_op);
action_msg = (struct cfil_msg_action *)msghdr;
switch (msghdr->cfm_op) {
case CFM_OP_DATA_UPDATE:
if (action_msg->cfa_out_peek_offset != 0 ||
action_msg->cfa_out_pass_offset != 0)
error = cfil_action_data_pass(so, kcunit, 1,
action_msg->cfa_out_pass_offset,
action_msg->cfa_out_peek_offset);
if (error == EJUSTRETURN)
error = 0;
if (error != 0)
break;
if (action_msg->cfa_in_peek_offset != 0 ||
action_msg->cfa_in_pass_offset != 0)
error = cfil_action_data_pass(so, kcunit, 0,
action_msg->cfa_in_pass_offset,
action_msg->cfa_in_peek_offset);
if (error == EJUSTRETURN)
error = 0;
break;
case CFM_OP_DROP:
error = cfil_action_drop(so, kcunit);
break;
default:
error = EINVAL;
break;
}
unlock:
socket_unlock(so, 1);
done:
mbuf_freem(m);
if (error == 0)
OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
else
OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
return (error);
}
static errno_t
cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
int opt, void *data, size_t *len)
{
#pragma unused(kctlref, opt)
errno_t error = 0;
struct content_filter *cfc = (struct content_filter *)unitinfo;
CFIL_LOG(LOG_NOTICE, "");
cfil_rw_lock_shared(&cfil_lck_rw);
if (content_filters == NULL) {
CFIL_LOG(LOG_ERR, "no content filter");
error = EINVAL;
goto done;
}
if (kcunit > MAX_CONTENT_FILTER) {
CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
kcunit, MAX_CONTENT_FILTER);
error = EINVAL;
goto done;
}
if (cfc != (void *)content_filters[kcunit - 1]) {
CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
kcunit);
error = EINVAL;
goto done;
}
switch (opt) {
case CFIL_OPT_NECP_CONTROL_UNIT:
if (*len < sizeof(uint32_t)) {
CFIL_LOG(LOG_ERR, "len too small %lu", *len);
error = EINVAL;
goto done;
}
if (data != NULL) {
*(uint32_t *)data = cfc->cf_necp_control_unit;
}
break;
case CFIL_OPT_GET_SOCKET_INFO:
if (*len != sizeof(struct cfil_opt_sock_info)) {
CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
error = EINVAL;
goto done;
}
if (data == NULL) {
CFIL_LOG(LOG_ERR, "data not passed");
error = EINVAL;
goto done;
}
struct cfil_opt_sock_info *sock_info =
(struct cfil_opt_sock_info *) data;
struct socket *sock =
cfil_socket_from_sock_id(sock_info->cfs_sock_id);
if (sock == NULL) {
CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
sock_info->cfs_sock_id);
error = ENOENT;
goto done;
}
cfil_rw_unlock_shared(&cfil_lck_rw);
socket_lock(sock, 1);
if (sock->so_cfil == NULL) {
CFIL_LOG(LOG_NOTICE, "so %llx not attached, cannot fetch info",
(uint64_t)VM_KERNEL_ADDRPERM(sock));
error = EINVAL;
socket_unlock(sock, 1);
goto return_already_unlocked;
}
sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
sock_info->cfs_sock_type = sock->so_proto->pr_type;
sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
struct inpcb *inp = sotoinpcb(sock);
if (inp->inp_vflag & INP_IPV6) {
fill_ip6_sockaddr_4_6(&sock_info->cfs_local,
&inp->in6p_laddr, inp->inp_lport);
fill_ip6_sockaddr_4_6(&sock_info->cfs_remote,
&inp->in6p_faddr, inp->inp_fport);
} else if (inp->inp_vflag & INP_IPV4) {
fill_ip_sockaddr_4_6(&sock_info->cfs_local,
inp->inp_laddr, inp->inp_lport);
fill_ip_sockaddr_4_6(&sock_info->cfs_remote,
inp->inp_faddr, inp->inp_fport);
}
sock_info->cfs_pid = sock->last_pid;
memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
if (sock->so_flags & SOF_DELEGATED) {
sock_info->cfs_e_pid = sock->e_pid;
memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
} else {
sock_info->cfs_e_pid = sock->last_pid;
memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
}
socket_unlock(sock, 1);
goto return_already_unlocked;
default:
error = ENOPROTOOPT;
break;
}
done:
cfil_rw_unlock_shared(&cfil_lck_rw);
return (error);
return_already_unlocked:
return (error);
}
static errno_t
cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
int opt, void *data, size_t len)
{
#pragma unused(kctlref, opt)
errno_t error = 0;
struct content_filter *cfc = (struct content_filter *)unitinfo;
CFIL_LOG(LOG_NOTICE, "");
cfil_rw_lock_exclusive(&cfil_lck_rw);
if (content_filters == NULL) {
CFIL_LOG(LOG_ERR, "no content filter");
error = EINVAL;
goto done;
}
if (kcunit > MAX_CONTENT_FILTER) {
CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
kcunit, MAX_CONTENT_FILTER);
error = EINVAL;
goto done;
}
if (cfc != (void *)content_filters[kcunit - 1]) {
CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
kcunit);
error = EINVAL;
goto done;
}
switch (opt) {
case CFIL_OPT_NECP_CONTROL_UNIT:
if (len < sizeof(uint32_t)) {
CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
"len too small %lu", len);
error = EINVAL;
goto done;
}
if (cfc->cf_necp_control_unit != 0) {
CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
"already set %u",
cfc->cf_necp_control_unit);
error = EINVAL;
goto done;
}
cfc->cf_necp_control_unit = *(uint32_t *)data;
break;
default:
error = ENOPROTOOPT;
break;
}
done:
cfil_rw_unlock_exclusive(&cfil_lck_rw);
return (error);
}
static void
cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
{
#pragma unused(kctlref, flags)
struct content_filter *cfc = (struct content_filter *)unitinfo;
struct socket *so = NULL;
int error;
struct cfil_entry *entry;
CFIL_LOG(LOG_INFO, "");
if (content_filters == NULL) {
CFIL_LOG(LOG_ERR, "no content filter");
OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
return;
}
if (kcunit > MAX_CONTENT_FILTER) {
CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
kcunit, MAX_CONTENT_FILTER);
OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
return;
}
cfil_rw_lock_shared(&cfil_lck_rw);
if (cfc != (void *)content_filters[kcunit - 1]) {
CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
kcunit);
OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
goto done;
}
if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
cfil_rw_lock_exclusive(&cfil_lck_rw);
cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
}
while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
verify_content_filter(cfc);
cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
if (entry->cfe_cfil_info == NULL ||
entry->cfe_cfil_info->cfi_so == NULL)
continue;
if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0)
continue;
}
if (entry == NULL)
break;
OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
so = entry->cfe_cfil_info->cfi_so;
cfil_rw_unlock_shared(&cfil_lck_rw);
socket_lock(so, 1);
do {
error = cfil_acquire_sockbuf(so, 1);
if (error == 0)
error = cfil_data_service_ctl_q(so, kcunit, 1);
cfil_release_sockbuf(so, 1);
if (error != 0)
break;
error = cfil_acquire_sockbuf(so, 0);
if (error == 0)
error = cfil_data_service_ctl_q(so, kcunit, 0);
cfil_release_sockbuf(so, 0);
} while (0);
socket_lock_assert_owned(so);
socket_unlock(so, 1);
cfil_rw_lock_shared(&cfil_lck_rw);
}
done:
cfil_rw_unlock_shared(&cfil_lck_rw);
}
void
cfil_init(void)
{
struct kern_ctl_reg kern_ctl;
errno_t error = 0;
vm_size_t content_filter_size = 0;
vm_size_t cfil_info_size = 0;
CFIL_LOG(LOG_NOTICE, "");
_CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
_CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
_CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
_CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
sizeof(uint32_t)));
VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
sizeof(uint32_t)));
VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
sizeof(uint32_t)));
VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
sizeof(uint32_t)));
VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
sizeof(uint32_t)));
VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
sizeof(uint32_t)));
VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
sizeof(uint32_t)));
VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
sizeof(uint32_t)));
VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
sizeof(uint32_t)));
VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
sizeof(uint32_t)));
content_filter_size = sizeof(struct content_filter);
content_filter_zone = zinit(content_filter_size,
CONTENT_FILTER_ZONE_MAX * content_filter_size,
0,
CONTENT_FILTER_ZONE_NAME);
if (content_filter_zone == NULL) {
panic("%s: zinit(%s) failed", __func__,
CONTENT_FILTER_ZONE_NAME);
}
zone_change(content_filter_zone, Z_CALLERACCT, FALSE);
zone_change(content_filter_zone, Z_EXPAND, TRUE);
cfil_info_size = sizeof(struct cfil_info);
cfil_info_zone = zinit(cfil_info_size,
CFIL_INFO_ZONE_MAX * cfil_info_size,
0,
CFIL_INFO_ZONE_NAME);
if (cfil_info_zone == NULL) {
panic("%s: zinit(%s) failed", __func__, CFIL_INFO_ZONE_NAME);
}
zone_change(cfil_info_zone, Z_CALLERACCT, FALSE);
zone_change(cfil_info_zone, Z_EXPAND, TRUE);
cfil_lck_grp_attr = lck_grp_attr_alloc_init();
if (cfil_lck_grp_attr == NULL) {
panic("%s: lck_grp_attr_alloc_init failed", __func__);
}
cfil_lck_grp = lck_grp_alloc_init("content filter",
cfil_lck_grp_attr);
if (cfil_lck_grp == NULL) {
panic("%s: lck_grp_alloc_init failed", __func__);
}
cfil_lck_attr = lck_attr_alloc_init();
if (cfil_lck_attr == NULL) {
panic("%s: lck_attr_alloc_init failed", __func__);
}
lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
TAILQ_INIT(&cfil_sock_head);
bzero(&kern_ctl, sizeof(kern_ctl));
strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
sizeof(kern_ctl.ctl_name));
kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
kern_ctl.ctl_sendsize = 512 * 1024;
kern_ctl.ctl_recvsize = 512 * 1024;
kern_ctl.ctl_connect = cfil_ctl_connect;
kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
kern_ctl.ctl_send = cfil_ctl_send;
kern_ctl.ctl_getopt = cfil_ctl_getopt;
kern_ctl.ctl_setopt = cfil_ctl_setopt;
kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
error = ctl_register(&kern_ctl, &cfil_kctlref);
if (error != 0) {
CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
return;
}
}
struct cfil_info *
cfil_info_alloc(struct socket *so)
{
int kcunit;
struct cfil_info *cfil_info = NULL;
struct inpcb *inp = sotoinpcb(so);
CFIL_LOG(LOG_INFO, "");
socket_lock_assert_owned(so);
cfil_info = zalloc(cfil_info_zone);
if (cfil_info == NULL)
goto done;
bzero(cfil_info, sizeof(struct cfil_info));
cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
struct cfil_entry *entry;
entry = &cfil_info->cfi_entries[kcunit - 1];
entry->cfe_cfil_info = cfil_info;
entry->cfe_filter = NULL;
entry->cfe_flags = 0;
entry->cfe_necp_control_unit = 0;
entry->cfe_snd.cfe_pass_offset = 0;
entry->cfe_snd.cfe_peek_offset = 0;
entry->cfe_snd.cfe_peeked = 0;
entry->cfe_rcv.cfe_pass_offset = 0;
entry->cfe_rcv.cfe_peek_offset = 0;
entry->cfe_rcv.cfe_peeked = 0;
cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
}
cfil_rw_lock_exclusive(&cfil_lck_rw);
so->so_cfil = cfil_info;
cfil_info->cfi_so = so;
if (inp->inp_flowhash == 0)
inp->inp_flowhash = inp_calc_flowhash(inp);
cfil_info->cfi_sock_id =
((so->so_gencnt << 32) | inp->inp_flowhash);
TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
cfil_sock_attached_count++;
cfil_rw_unlock_exclusive(&cfil_lck_rw);
done:
if (cfil_info != NULL)
OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
else
OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
return (cfil_info);
}
int
cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit)
{
int kcunit;
struct cfil_info *cfil_info = so->so_cfil;
int attached = 0;
CFIL_LOG(LOG_INFO, "");
socket_lock_assert_owned(so);
cfil_rw_lock_exclusive(&cfil_lck_rw);
for (kcunit = 1;
content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
kcunit++) {
struct content_filter *cfc = content_filters[kcunit - 1];
struct cfil_entry *entry;
if (cfc == NULL)
continue;
if (cfc->cf_necp_control_unit != filter_control_unit)
continue;
entry = &cfil_info->cfi_entries[kcunit - 1];
entry->cfe_filter = cfc;
entry->cfe_necp_control_unit = filter_control_unit;
TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
cfc->cf_sock_count++;
verify_content_filter(cfc);
attached = 1;
entry->cfe_flags |= CFEF_CFIL_ATTACHED;
break;
}
cfil_rw_unlock_exclusive(&cfil_lck_rw);
return (attached);
}
static void
cfil_info_free(struct socket *so, struct cfil_info *cfil_info)
{
int kcunit;
uint64_t in_drain = 0;
uint64_t out_drained = 0;
so->so_cfil = NULL;
if (so->so_flags & SOF_CONTENT_FILTER) {
so->so_flags &= ~SOF_CONTENT_FILTER;
VERIFY(so->so_usecount > 0);
so->so_usecount--;
}
if (cfil_info == NULL)
return;
CFIL_LOG(LOG_INFO, "");
cfil_rw_lock_exclusive(&cfil_lck_rw);
for (kcunit = 1;
content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
kcunit++) {
struct cfil_entry *entry;
struct content_filter *cfc;
entry = &cfil_info->cfi_entries[kcunit - 1];
if (entry->cfe_filter == NULL)
continue;
cfc = content_filters[kcunit - 1];
VERIFY(cfc == entry->cfe_filter);
entry->cfe_filter = NULL;
entry->cfe_necp_control_unit = 0;
TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
cfc->cf_sock_count--;
verify_content_filter(cfc);
}
cfil_sock_attached_count--;
TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
struct cfil_entry *entry;
entry = &cfil_info->cfi_entries[kcunit - 1];
out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
}
cfil_rw_unlock_exclusive(&cfil_lck_rw);
if (out_drained)
OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
if (in_drain)
OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
zfree(cfil_info_zone, cfil_info);
}
errno_t
cfil_sock_attach(struct socket *so)
{
errno_t error = 0;
uint32_t filter_control_unit;
socket_lock_assert_owned(so);
if ((so->so_proto->pr_domain->dom_family != PF_INET &&
so->so_proto->pr_domain->dom_family != PF_INET6) ||
so->so_proto->pr_type != SOCK_STREAM ||
so->so_proto->pr_protocol != IPPROTO_TCP ||
(so->so_flags & SOF_MP_SUBFLOW) != 0 ||
(so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0)
goto done;
filter_control_unit = necp_socket_get_content_filter_control_unit(so);
if (filter_control_unit == 0)
goto done;
if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
goto done;
}
if (cfil_active_count == 0) {
OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
goto done;
}
if (so->so_cfil != NULL) {
OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
CFIL_LOG(LOG_ERR, "already attached");
} else {
cfil_info_alloc(so);
if (so->so_cfil == NULL) {
error = ENOMEM;
OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
goto done;
}
}
if (cfil_info_attach_unit(so, filter_control_unit) == 0) {
CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
filter_control_unit);
OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
goto done;
}
CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockid %llx",
(uint64_t)VM_KERNEL_ADDRPERM(so),
filter_control_unit, so->so_cfil->cfi_sock_id);
so->so_flags |= SOF_CONTENT_FILTER;
OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
so->so_usecount++;
error = cfil_dispatch_attach_event(so, filter_control_unit);
if (error == ENOBUFS || error == ENOMEM)
error = 0;
else if (error != 0)
goto done;
CFIL_INFO_VERIFY(so->so_cfil);
done:
return (error);
}
errno_t
cfil_sock_detach(struct socket *so)
{
if (so->so_cfil) {
cfil_info_free(so, so->so_cfil);
OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
}
return (0);
}
static int
cfil_dispatch_attach_event(struct socket *so, uint32_t filter_control_unit)
{
errno_t error = 0;
struct cfil_entry *entry = NULL;
struct cfil_msg_sock_attached msg_attached;
uint32_t kcunit;
struct content_filter *cfc = NULL;
socket_lock_assert_owned(so);
cfil_rw_lock_shared(&cfil_lck_rw);
if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
error = EINVAL;
goto done;
}
for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
cfc = content_filters[kcunit - 1];
if (cfc == NULL)
continue;
if (cfc->cf_necp_control_unit != filter_control_unit)
continue;
entry = &so->so_cfil->cfi_entries[kcunit - 1];
if (entry->cfe_filter == NULL)
continue;
VERIFY(cfc == entry->cfe_filter);
break;
}
if (entry == NULL || entry->cfe_filter == NULL)
goto done;
if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED))
goto done;
CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
(uint64_t)VM_KERNEL_ADDRPERM(so), filter_control_unit, kcunit);
if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
error = ENOBUFS;
goto done;
}
bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
msg_attached.cfs_sock_type = so->so_proto->pr_type;
msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
msg_attached.cfs_pid = so->last_pid;
memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
if (so->so_flags & SOF_DELEGATED) {
msg_attached.cfs_e_pid = so->e_pid;
memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
} else {
msg_attached.cfs_e_pid = so->last_pid;
memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
}
error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
entry->cfe_filter->cf_kcunit,
&msg_attached,
sizeof(struct cfil_msg_sock_attached),
CTL_DATA_EOR);
if (error != 0) {
CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
goto done;
}
microuptime(&entry->cfe_last_event);
so->so_cfil->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
so->so_cfil->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
done:
if (error == ENOBUFS) {
entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
cfil_rw_lock_exclusive(&cfil_lck_rw);
cfc->cf_flags |= CFF_FLOW_CONTROLLED;
cfil_rw_unlock_exclusive(&cfil_lck_rw);
} else {
if (error != 0)
OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
cfil_rw_unlock_shared(&cfil_lck_rw);
}
return (error);
}
static int
cfil_dispatch_disconnect_event(struct socket *so, uint32_t kcunit, int outgoing)
{
errno_t error = 0;
struct mbuf *msg = NULL;
struct cfil_entry *entry;
struct cfe_buf *entrybuf;
struct cfil_msg_hdr msg_disconnected;
struct content_filter *cfc;
socket_lock_assert_owned(so);
cfil_rw_lock_shared(&cfil_lck_rw);
entry = &so->so_cfil->cfi_entries[kcunit - 1];
if (outgoing)
entrybuf = &entry->cfe_snd;
else
entrybuf = &entry->cfe_rcv;
cfc = entry->cfe_filter;
if (cfc == NULL)
goto done;
CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
(!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
(uint64_t)VM_KERNEL_ADDRPERM(so));
goto done;
}
if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
(uint64_t)VM_KERNEL_ADDRPERM(so));
error = EBUSY;
goto done;
}
if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
error = ENOBUFS;
goto done;
}
bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
msg_disconnected.cfm_type = CFM_TYPE_EVENT;
msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
CFM_OP_DISCONNECT_IN;
msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
entry->cfe_filter->cf_kcunit,
&msg_disconnected,
sizeof(struct cfil_msg_hdr),
CTL_DATA_EOR);
if (error != 0) {
CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
mbuf_freem(msg);
goto done;
}
microuptime(&entry->cfe_last_event);
CFI_ADD_TIME_LOG(so->so_cfil, &entry->cfe_last_event, &so->so_cfil->cfi_first_event, msg_disconnected.cfm_op);
if (outgoing) {
entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
} else {
entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
}
done:
if (error == ENOBUFS) {
entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
OSIncrementAtomic(
&cfil_stats.cfs_disconnect_event_flow_control);
if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
cfil_rw_lock_exclusive(&cfil_lck_rw);
cfc->cf_flags |= CFF_FLOW_CONTROLLED;
cfil_rw_unlock_exclusive(&cfil_lck_rw);
} else {
if (error != 0)
OSIncrementAtomic(
&cfil_stats.cfs_disconnect_event_fail);
cfil_rw_unlock_shared(&cfil_lck_rw);
}
return (error);
}
int
cfil_dispatch_closed_event(struct socket *so, int kcunit)
{
struct cfil_entry *entry;
struct cfil_msg_sock_closed msg_closed;
errno_t error = 0;
struct content_filter *cfc;
socket_lock_assert_owned(so);
cfil_rw_lock_shared(&cfil_lck_rw);
entry = &so->so_cfil->cfi_entries[kcunit - 1];
cfc = entry->cfe_filter;
if (cfc == NULL)
goto done;
CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
error = ENOBUFS;
goto done;
}
if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0)
goto done;
if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0)
goto done;
microuptime(&entry->cfe_last_event);
CFI_ADD_TIME_LOG(so->so_cfil, &entry->cfe_last_event, &so->so_cfil->cfi_first_event, CFM_OP_SOCKET_CLOSED);
bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
msg_closed.cfc_first_event.tv_sec = so->so_cfil->cfi_first_event.tv_sec;
msg_closed.cfc_first_event.tv_usec = so->so_cfil->cfi_first_event.tv_usec;
memcpy(msg_closed.cfc_op_time, so->so_cfil->cfi_op_time, sizeof(uint32_t)*CFI_MAX_TIME_LOG_ENTRY);
memcpy(msg_closed.cfc_op_list, so->so_cfil->cfi_op_list, sizeof(unsigned char)*CFI_MAX_TIME_LOG_ENTRY);
msg_closed.cfc_op_list_ctr = so->so_cfil->cfi_op_list_ctr;
CFIL_LOG(LOG_INFO, "sock id %llu, op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, so->so_cfil->cfi_op_list_ctr, so->so_cfil->cfi_first_event.tv_sec, so->so_cfil->cfi_first_event.tv_usec);
error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
entry->cfe_filter->cf_kcunit,
&msg_closed,
sizeof(struct cfil_msg_sock_closed),
CTL_DATA_EOR);
if (error != 0) {
CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
error);
goto done;
}
entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
done:
if (error == ENOBUFS) {
entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
cfil_rw_lock_exclusive(&cfil_lck_rw);
cfc->cf_flags |= CFF_FLOW_CONTROLLED;
cfil_rw_unlock_exclusive(&cfil_lck_rw);
} else {
if (error != 0)
OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
cfil_rw_unlock_shared(&cfil_lck_rw);
}
return (error);
}
static void
fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
struct in6_addr *ip6, u_int16_t port)
{
struct sockaddr_in6 *sin6 = &sin46->sin6;
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(*sin6);
sin6->sin6_port = port;
sin6->sin6_addr = *ip6;
if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
sin6->sin6_addr.s6_addr16[1] = 0;
}
}
static void
fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
struct in_addr ip, u_int16_t port)
{
struct sockaddr_in *sin = &sin46->sin;
sin->sin_family = AF_INET;
sin->sin_len = sizeof(*sin);
sin->sin_port = port;
sin->sin_addr.s_addr = ip.s_addr;
}
static int
cfil_dispatch_data_event(struct socket *so, uint32_t kcunit, int outgoing,
struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
{
errno_t error = 0;
struct mbuf *copy = NULL;
struct mbuf *msg = NULL;
unsigned int one = 1;
struct cfil_msg_data_event *data_req;
size_t hdrsize;
struct inpcb *inp = (struct inpcb *)so->so_pcb;
struct cfil_entry *entry;
struct cfe_buf *entrybuf;
struct content_filter *cfc;
struct timeval tv;
cfil_rw_lock_shared(&cfil_lck_rw);
entry = &so->so_cfil->cfi_entries[kcunit - 1];
if (outgoing)
entrybuf = &entry->cfe_snd;
else
entrybuf = &entry->cfe_rcv;
cfc = entry->cfe_filter;
if (cfc == NULL)
goto done;
CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
socket_lock_assert_owned(so);
if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
error = ENOBUFS;
goto done;
}
copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
M_COPYM_NOOP_HDR);
if (copy == NULL) {
CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
error = ENOMEM;
goto done;
}
hdrsize = sizeof(struct cfil_msg_data_event);
error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
if (error != 0) {
CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
m_freem(copy);
error = ENOMEM;
goto done;
}
mbuf_setlen(msg, hdrsize);
mbuf_pkthdr_setlen(msg, hdrsize + copylen);
msg->m_next = copy;
data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
bzero(data_req, hdrsize);
data_req->cfd_msghdr.cfm_len = hdrsize + copylen;
data_req->cfd_msghdr.cfm_version = 1;
data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
data_req->cfd_msghdr.cfm_op =
outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
data_req->cfd_msghdr.cfm_sock_id =
entry->cfe_cfil_info->cfi_sock_id;
data_req->cfd_start_offset = entrybuf->cfe_peeked;
data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
if (inp->inp_vflag & INP_IPV6) {
if (outgoing) {
fill_ip6_sockaddr_4_6(&data_req->cfc_src,
&inp->in6p_laddr, inp->inp_lport);
fill_ip6_sockaddr_4_6(&data_req->cfc_dst,
&inp->in6p_faddr, inp->inp_fport);
} else {
fill_ip6_sockaddr_4_6(&data_req->cfc_src,
&inp->in6p_faddr, inp->inp_fport);
fill_ip6_sockaddr_4_6(&data_req->cfc_dst,
&inp->in6p_laddr, inp->inp_lport);
}
} else if (inp->inp_vflag & INP_IPV4) {
if (outgoing) {
fill_ip_sockaddr_4_6(&data_req->cfc_src,
inp->inp_laddr, inp->inp_lport);
fill_ip_sockaddr_4_6(&data_req->cfc_dst,
inp->inp_faddr, inp->inp_fport);
} else {
fill_ip_sockaddr_4_6(&data_req->cfc_src,
inp->inp_faddr, inp->inp_fport);
fill_ip_sockaddr_4_6(&data_req->cfc_dst,
inp->inp_laddr, inp->inp_lport);
}
}
microuptime(&tv);
CFI_ADD_TIME_LOG(so->so_cfil, &tv, &so->so_cfil->cfi_first_event, data_req->cfd_msghdr.cfm_op);
error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
entry->cfe_filter->cf_kcunit,
msg, CTL_DATA_EOR);
if (error != 0) {
CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
mbuf_freem(msg);
goto done;
}
entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
done:
if (error == ENOBUFS) {
entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
OSIncrementAtomic(
&cfil_stats.cfs_data_event_flow_control);
if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
cfil_rw_lock_exclusive(&cfil_lck_rw);
cfc->cf_flags |= CFF_FLOW_CONTROLLED;
cfil_rw_unlock_exclusive(&cfil_lck_rw);
} else {
if (error != 0)
OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
cfil_rw_unlock_shared(&cfil_lck_rw);
}
return (error);
}
static int
cfil_data_service_ctl_q(struct socket *so, uint32_t kcunit, int outgoing)
{
errno_t error = 0;
struct mbuf *data, *tmp = NULL;
unsigned int datalen = 0, copylen = 0, copyoffset = 0;
struct cfil_entry *entry;
struct cfe_buf *entrybuf;
uint64_t currentoffset = 0;
if (so->so_cfil == NULL)
return (0);
CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
socket_lock_assert_owned(so);
entry = &so->so_cfil->cfi_entries[kcunit - 1];
if (outgoing)
entrybuf = &entry->cfe_snd;
else
entrybuf = &entry->cfe_rcv;
if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
error = cfil_dispatch_attach_event(so, kcunit);
if (error != 0) {
if (error == ENOBUFS || error == ENOMEM)
error = 0;
goto done;
}
} else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
goto done;
}
CFIL_LOG(LOG_DEBUG, "pass_offset %llu peeked %llu peek_offset %llu",
entrybuf->cfe_pass_offset,
entrybuf->cfe_peeked,
entrybuf->cfe_peek_offset);
while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
datalen = cfil_data_length(data, NULL);
tmp = data;
if (entrybuf->cfe_ctl_q.q_start + datalen <=
entrybuf->cfe_pass_offset) {
copylen = datalen;
} else {
copylen = entrybuf->cfe_pass_offset -
entrybuf->cfe_ctl_q.q_start;
}
VERIFY(copylen <= datalen);
CFIL_LOG(LOG_DEBUG,
"%llx first %llu peeked %llu pass %llu peek %llu"
"datalen %u copylen %u",
(uint64_t)VM_KERNEL_ADDRPERM(tmp),
entrybuf->cfe_ctl_q.q_start,
entrybuf->cfe_peeked,
entrybuf->cfe_pass_offset,
entrybuf->cfe_peek_offset,
datalen, copylen);
if (entrybuf->cfe_ctl_q.q_start + copylen >
entrybuf->cfe_peeked)
entrybuf->cfe_peeked =
entrybuf->cfe_ctl_q.q_start + copylen;
if (copylen < datalen)
break;
cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
if (outgoing)
OSAddAtomic64(datalen,
&cfil_stats.cfs_pending_q_out_enqueued);
else
OSAddAtomic64(datalen,
&cfil_stats.cfs_pending_q_in_enqueued);
}
CFIL_INFO_VERIFY(so->so_cfil);
if (tmp != NULL)
CFIL_LOG(LOG_DEBUG,
"%llx first %llu peeked %llu pass %llu peek %llu"
"datalen %u copylen %u",
(uint64_t)VM_KERNEL_ADDRPERM(tmp),
entrybuf->cfe_ctl_q.q_start,
entrybuf->cfe_peeked,
entrybuf->cfe_pass_offset,
entrybuf->cfe_peek_offset,
datalen, copylen);
tmp = NULL;
for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
currentoffset = entrybuf->cfe_ctl_q.q_start;
data != NULL && currentoffset < entrybuf->cfe_peek_offset;
data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
currentoffset += datalen) {
datalen = cfil_data_length(data, NULL);
tmp = data;
if (currentoffset + datalen <= entrybuf->cfe_peeked)
continue;
copyoffset = entrybuf->cfe_peeked - currentoffset;
VERIFY(copyoffset < datalen);
copylen = datalen - copyoffset;
VERIFY(copylen <= datalen);
if (currentoffset + copyoffset + copylen >
entrybuf->cfe_peek_offset) {
copylen = entrybuf->cfe_peek_offset -
(currentoffset + copyoffset);
}
CFIL_LOG(LOG_DEBUG,
"%llx current %llu peeked %llu pass %llu peek %llu"
"datalen %u copylen %u copyoffset %u",
(uint64_t)VM_KERNEL_ADDRPERM(tmp),
currentoffset,
entrybuf->cfe_peeked,
entrybuf->cfe_pass_offset,
entrybuf->cfe_peek_offset,
datalen, copylen, copyoffset);
if (copylen == 0)
break;
error = cfil_dispatch_data_event(so, kcunit,
outgoing, data, copyoffset, copylen);
if (error != 0) {
break;
}
entrybuf->cfe_peeked += copylen;
if (outgoing)
OSAddAtomic64(copylen,
&cfil_stats.cfs_ctl_q_out_peeked);
else
OSAddAtomic64(copylen,
&cfil_stats.cfs_ctl_q_in_peeked);
if (copylen + copyoffset < datalen)
break;
}
CFIL_INFO_VERIFY(so->so_cfil);
if (tmp != NULL)
CFIL_LOG(LOG_DEBUG,
"%llx first %llu peeked %llu pass %llu peek %llu"
"datalen %u copylen %u copyoffset %u",
(uint64_t)VM_KERNEL_ADDRPERM(tmp),
currentoffset,
entrybuf->cfe_peeked,
entrybuf->cfe_pass_offset,
entrybuf->cfe_peek_offset,
datalen, copylen, copyoffset);
error = cfil_service_pending_queue(so, kcunit, outgoing);
if (error != 0) {
CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
error);
goto done;
}
if (so->so_cfil == NULL)
goto done;
else if (outgoing) {
if ((so->so_cfil->cfi_flags & CFIF_SHUT_WR) &&
!(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT))
cfil_dispatch_disconnect_event(so, kcunit, 1);
} else {
if ((so->so_cfil->cfi_flags & CFIF_SHUT_RD) &&
!(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))
cfil_dispatch_disconnect_event(so, kcunit, 0);
}
done:
CFIL_LOG(LOG_DEBUG,
"first %llu peeked %llu pass %llu peek %llu",
entrybuf->cfe_ctl_q.q_start,
entrybuf->cfe_peeked,
entrybuf->cfe_pass_offset,
entrybuf->cfe_peek_offset);
CFIL_INFO_VERIFY(so->so_cfil);
return (error);
}
int
cfil_data_filter(struct socket *so, uint32_t kcunit, int outgoing,
struct mbuf *data, uint64_t datalen)
{
errno_t error = 0;
struct cfil_entry *entry;
struct cfe_buf *entrybuf;
CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
socket_lock_assert_owned(so);
entry = &so->so_cfil->cfi_entries[kcunit - 1];
if (outgoing)
entrybuf = &entry->cfe_snd;
else
entrybuf = &entry->cfe_rcv;
if (entry->cfe_filter == NULL) {
error = 0;
goto done;
}
cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
if (outgoing)
OSAddAtomic64(datalen,
&cfil_stats.cfs_ctl_q_out_enqueued);
else
OSAddAtomic64(datalen,
&cfil_stats.cfs_ctl_q_in_enqueued);
error = cfil_data_service_ctl_q(so, kcunit, outgoing);
if (error != 0) {
CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
error);
}
error = EJUSTRETURN;
done:
CFIL_INFO_VERIFY(so->so_cfil);
CFIL_LOG(LOG_INFO, "return %d", error);
return (error);
}
static int
cfil_service_inject_queue(struct socket *so, int outgoing)
{
mbuf_t data;
unsigned int datalen;
int mbcnt;
unsigned int copylen;
errno_t error = 0;
struct mbuf *copy = NULL;
struct cfi_buf *cfi_buf;
struct cfil_queue *inject_q;
int need_rwakeup = 0;
if (so->so_cfil == NULL)
return (0);
CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
(uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
socket_lock_assert_owned(so);
if (outgoing) {
cfi_buf = &so->so_cfil->cfi_snd;
so->so_cfil->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
} else {
cfi_buf = &so->so_cfil->cfi_rcv;
so->so_cfil->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
}
inject_q = &cfi_buf->cfi_inject_q;
while ((data = cfil_queue_first(inject_q)) != NULL) {
datalen = cfil_data_length(data, &mbcnt);
CFIL_LOG(LOG_INFO, "data %llx datalen %u",
(uint64_t)VM_KERNEL_ADDRPERM(data), datalen);
copy = m_copym_mode(data, 0, M_COPYALL, M_DONTWAIT,
M_COPYM_COPY_HDR);
if (copy == NULL) {
CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
error = ENOMEM;
break;
}
if ((copylen = m_length(copy)) != datalen)
panic("%s so %p copylen %d != datalen %d",
__func__, so, copylen, datalen);
if (outgoing) {
socket_unlock(so, 0);
error = sosend(so, NULL, NULL,
copy, NULL,
MSG_SKIPCFIL | MSG_DONTWAIT | MSG_NBIO);
socket_lock(so, 0);
if (error != 0) {
CFIL_LOG(LOG_ERR, "sosend() failed %d",
error);
}
} else {
copy->m_flags |= M_SKIPCFIL;
if (sbappendstream(&so->so_rcv, copy))
need_rwakeup = 1;
}
if (so->so_cfil == NULL) {
CFIL_LOG(LOG_ERR, "so %llx cfil detached",
(uint64_t)VM_KERNEL_ADDRPERM(so));
OSIncrementAtomic(&cfil_stats.cfs_inject_q_detached);
error = 0;
break;
}
if (error != 0)
break;
cfil_queue_remove(inject_q, data, datalen);
mbuf_freem(data);
cfi_buf->cfi_pending_first += datalen;
cfi_buf->cfi_pending_mbcnt -= mbcnt;
cfil_info_buf_verify(cfi_buf);
if (outgoing)
OSAddAtomic64(datalen,
&cfil_stats.cfs_inject_q_out_passed);
else
OSAddAtomic64(datalen,
&cfil_stats.cfs_inject_q_in_passed);
}
if (need_rwakeup)
sorwakeup(so);
if (error != 0 && so->so_cfil) {
if (error == ENOBUFS)
OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
if (error == ENOMEM)
OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
if (outgoing) {
so->so_cfil->cfi_flags |= CFIF_RETRY_INJECT_OUT;
OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
} else {
so->so_cfil->cfi_flags |= CFIF_RETRY_INJECT_IN;
OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
}
}
if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_SHUT_WR)) {
cfil_sock_notify_shutdown(so, SHUT_WR);
if (cfil_sock_data_pending(&so->so_snd) == 0)
soshutdownlock_final(so, SHUT_WR);
}
if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT)) {
if (cfil_filters_attached(so) == 0) {
CFIL_LOG(LOG_INFO, "so %llx waking",
(uint64_t)VM_KERNEL_ADDRPERM(so));
wakeup((caddr_t)&so->so_cfil);
}
}
CFIL_INFO_VERIFY(so->so_cfil);
return (error);
}
static int
cfil_service_pending_queue(struct socket *so, uint32_t kcunit, int outgoing)
{
uint64_t passlen, curlen;
mbuf_t data;
unsigned int datalen;
errno_t error = 0;
struct cfil_entry *entry;
struct cfe_buf *entrybuf;
struct cfil_queue *pending_q;
CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
socket_lock_assert_owned(so);
entry = &so->so_cfil->cfi_entries[kcunit - 1];
if (outgoing)
entrybuf = &entry->cfe_snd;
else
entrybuf = &entry->cfe_rcv;
pending_q = &entrybuf->cfe_pending_q;
passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
curlen = 0;
while ((data = cfil_queue_first(pending_q)) != NULL) {
datalen = cfil_data_length(data, NULL);
CFIL_LOG(LOG_INFO,
"data %llx datalen %u passlen %llu curlen %llu",
(uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
passlen, curlen);
if (curlen + datalen > passlen)
break;
cfil_queue_remove(pending_q, data, datalen);
curlen += datalen;
for (kcunit += 1;
kcunit <= MAX_CONTENT_FILTER;
kcunit++) {
error = cfil_data_filter(so, kcunit, outgoing,
data, datalen);
if (error != 0)
break;
}
if (error == 0) {
if (outgoing) {
cfil_queue_enqueue(
&so->so_cfil->cfi_snd.cfi_inject_q,
data, datalen);
OSAddAtomic64(datalen,
&cfil_stats.cfs_inject_q_out_enqueued);
} else {
cfil_queue_enqueue(
&so->so_cfil->cfi_rcv.cfi_inject_q,
data, datalen);
OSAddAtomic64(datalen,
&cfil_stats.cfs_inject_q_in_enqueued);
}
}
}
CFIL_INFO_VERIFY(so->so_cfil);
return (error);
}
int
cfil_update_data_offsets(struct socket *so, uint32_t kcunit, int outgoing,
uint64_t pass_offset, uint64_t peek_offset)
{
errno_t error = 0;
struct cfil_entry *entry = NULL;
struct cfe_buf *entrybuf;
int updated = 0;
CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
socket_lock_assert_owned(so);
if (so->so_cfil == NULL) {
CFIL_LOG(LOG_ERR, "so %llx cfil detached",
(uint64_t)VM_KERNEL_ADDRPERM(so));
error = 0;
goto done;
} else if (so->so_cfil->cfi_flags & CFIF_DROP) {
CFIL_LOG(LOG_ERR, "so %llx drop set",
(uint64_t)VM_KERNEL_ADDRPERM(so));
error = EPIPE;
goto done;
}
entry = &so->so_cfil->cfi_entries[kcunit - 1];
if (outgoing)
entrybuf = &entry->cfe_snd;
else
entrybuf = &entry->cfe_rcv;
if (pass_offset > entrybuf->cfe_pass_offset) {
entrybuf->cfe_pass_offset = pass_offset;
if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset)
entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
updated = 1;
} else {
CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
pass_offset, entrybuf->cfe_pass_offset);
}
if (peek_offset > entrybuf->cfe_pass_offset &&
peek_offset > entrybuf->cfe_peek_offset) {
entrybuf->cfe_peek_offset = peek_offset;
updated = 1;
}
if (updated == 0)
goto done;
error = cfil_data_service_ctl_q(so, kcunit, outgoing);
if (error != 0) {
CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
error);
goto done;
}
error = EJUSTRETURN;
done:
if (entry != NULL &&
((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) &&
cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
entry->cfe_flags |= CFEF_CFIL_DETACHED;
CFIL_LOG(LOG_INFO, "so %llx detached %u",
(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
if ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) &&
cfil_filters_attached(so) == 0) {
CFIL_LOG(LOG_INFO, "so %llx waking",
(uint64_t)VM_KERNEL_ADDRPERM(so));
wakeup((caddr_t)&so->so_cfil);
}
}
CFIL_INFO_VERIFY(so->so_cfil);
CFIL_LOG(LOG_INFO, "return %d", error);
return (error);
}
static int
cfil_set_socket_pass_offset(struct socket *so, int outgoing)
{
struct cfi_buf *cfi_buf;
struct cfil_entry *entry;
struct cfe_buf *entrybuf;
uint32_t kcunit;
uint64_t pass_offset = 0;
if (so->so_cfil == NULL)
return (0);
CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
(uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
socket_lock_assert_owned(so);
if (outgoing)
cfi_buf = &so->so_cfil->cfi_snd;
else
cfi_buf = &so->so_cfil->cfi_rcv;
if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
entry = &so->so_cfil->cfi_entries[kcunit - 1];
if (entry->cfe_filter == NULL)
continue;
if (outgoing)
entrybuf = &entry->cfe_snd;
else
entrybuf = &entry->cfe_rcv;
if (pass_offset == 0 ||
entrybuf->cfe_pass_offset < pass_offset)
pass_offset = entrybuf->cfe_pass_offset;
}
cfi_buf->cfi_pass_offset = pass_offset;
}
return (0);
}
int
cfil_action_data_pass(struct socket *so, uint32_t kcunit, int outgoing,
uint64_t pass_offset, uint64_t peek_offset)
{
errno_t error = 0;
CFIL_LOG(LOG_INFO, "");
socket_lock_assert_owned(so);
error = cfil_acquire_sockbuf(so, outgoing);
if (error != 0) {
CFIL_LOG(LOG_INFO, "so %llx %s dropped",
(uint64_t)VM_KERNEL_ADDRPERM(so),
outgoing ? "out" : "in");
goto release;
}
error = cfil_update_data_offsets(so, kcunit, outgoing,
pass_offset, peek_offset);
cfil_service_inject_queue(so, outgoing);
cfil_set_socket_pass_offset(so, outgoing);
release:
CFIL_INFO_VERIFY(so->so_cfil);
cfil_release_sockbuf(so, outgoing);
return (error);
}
static void
cfil_flush_queues(struct socket *so)
{
struct cfil_entry *entry;
int kcunit;
uint64_t drained;
if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
goto done;
socket_lock_assert_owned(so);
(void) cfil_acquire_sockbuf(so, 1);
if (so->so_cfil != NULL) {
drained = 0;
for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
entry = &so->so_cfil->cfi_entries[kcunit - 1];
drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
drained += cfil_queue_drain(
&entry->cfe_snd.cfe_pending_q);
}
drained += cfil_queue_drain(&so->so_cfil->cfi_snd.cfi_inject_q);
if (drained) {
if (so->so_cfil->cfi_flags & CFIF_DROP)
OSIncrementAtomic(
&cfil_stats.cfs_flush_out_drop);
else
OSIncrementAtomic(
&cfil_stats.cfs_flush_out_close);
}
}
cfil_release_sockbuf(so, 1);
(void) cfil_acquire_sockbuf(so, 0);
if (so->so_cfil != NULL) {
drained = 0;
for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
entry = &so->so_cfil->cfi_entries[kcunit - 1];
drained += cfil_queue_drain(
&entry->cfe_rcv.cfe_ctl_q);
drained += cfil_queue_drain(
&entry->cfe_rcv.cfe_pending_q);
}
drained += cfil_queue_drain(&so->so_cfil->cfi_rcv.cfi_inject_q);
if (drained) {
if (so->so_cfil->cfi_flags & CFIF_DROP)
OSIncrementAtomic(
&cfil_stats.cfs_flush_in_drop);
else
OSIncrementAtomic(
&cfil_stats.cfs_flush_in_close);
}
}
cfil_release_sockbuf(so, 0);
done:
CFIL_INFO_VERIFY(so->so_cfil);
}
int
cfil_action_drop(struct socket *so, uint32_t kcunit)
{
errno_t error = 0;
struct cfil_entry *entry;
struct proc *p;
if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
goto done;
socket_lock_assert_owned(so);
entry = &so->so_cfil->cfi_entries[kcunit - 1];
if (entry->cfe_filter == NULL)
goto done;
so->so_cfil->cfi_flags |= CFIF_DROP;
p = current_proc();
error = sosetdefunct(p, so,
SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
FALSE);
if (error == 0)
error = sodefunct(p, so,
SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
entry->cfe_flags |= CFEF_CFIL_DETACHED;
CFIL_LOG(LOG_INFO, "so %llx detached %u",
(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
cfil_flush_queues(so);
if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT)) {
if (cfil_filters_attached(so) == 0) {
CFIL_LOG(LOG_INFO, "so %llx waking",
(uint64_t)VM_KERNEL_ADDRPERM(so));
wakeup((caddr_t)&so->so_cfil);
}
}
done:
return (error);
}
int
cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
{
errno_t error = 0;
cfil_rw_lock_exclusive(&cfil_lck_rw);
bool cfil_attached = false;
struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
if (so == NULL) {
error = ENOENT;
} else {
socket_lock(so, 1);
if (cfil_attached) {
(void)cfil_action_data_pass(so, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
(void)cfil_action_data_pass(so, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
} else {
so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
}
socket_unlock(so, 1);
}
cfil_rw_unlock_exclusive(&cfil_lck_rw);
return (error);
}
static int
cfil_update_entry_offsets(struct socket *so, int outgoing, unsigned int datalen)
{
struct cfil_entry *entry;
struct cfe_buf *entrybuf;
uint32_t kcunit;
CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
(uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
entry = &so->so_cfil->cfi_entries[kcunit - 1];
if (entry->cfe_filter == NULL)
continue;
if (outgoing)
entrybuf = &entry->cfe_snd;
else
entrybuf = &entry->cfe_rcv;
entrybuf->cfe_ctl_q.q_start += datalen;
entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset)
entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
entrybuf->cfe_ctl_q.q_end += datalen;
entrybuf->cfe_pending_q.q_start += datalen;
entrybuf->cfe_pending_q.q_end += datalen;
}
CFIL_INFO_VERIFY(so->so_cfil);
return (0);
}
int
cfil_data_common(struct socket *so, int outgoing, struct sockaddr *to,
struct mbuf *data, struct mbuf *control, uint32_t flags)
{
#pragma unused(to, control, flags)
errno_t error = 0;
unsigned int datalen;
int mbcnt;
int kcunit;
struct cfi_buf *cfi_buf;
if (so->so_cfil == NULL) {
CFIL_LOG(LOG_ERR, "so %llx cfil detached",
(uint64_t)VM_KERNEL_ADDRPERM(so));
error = 0;
goto done;
} else if (so->so_cfil->cfi_flags & CFIF_DROP) {
CFIL_LOG(LOG_ERR, "so %llx drop set",
(uint64_t)VM_KERNEL_ADDRPERM(so));
error = EPIPE;
goto done;
}
datalen = cfil_data_length(data, &mbcnt);
CFIL_LOG(LOG_INFO, "so %llx %s m %llx len %u flags 0x%x nextpkt %llx",
(uint64_t)VM_KERNEL_ADDRPERM(so),
outgoing ? "out" : "in",
(uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
(uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt));
if (outgoing)
cfi_buf = &so->so_cfil->cfi_snd;
else
cfi_buf = &so->so_cfil->cfi_rcv;
cfi_buf->cfi_pending_last += datalen;
cfi_buf->cfi_pending_mbcnt += mbcnt;
cfil_info_buf_verify(cfi_buf);
CFIL_LOG(LOG_INFO, "so %llx cfi_pending_last %llu cfi_pass_offset %llu",
(uint64_t)VM_KERNEL_ADDRPERM(so),
cfi_buf->cfi_pending_last,
cfi_buf->cfi_pass_offset);
if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
cfil_update_entry_offsets(so, outgoing, datalen);
} else {
for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
error = cfil_data_filter(so, kcunit, outgoing, data,
datalen);
if (error != 0)
break;
}
}
if (error == 0) {
cfi_buf->cfi_pending_first += datalen;
cfi_buf->cfi_pending_mbcnt -= mbcnt;
cfil_info_buf_verify(cfi_buf);
}
done:
CFIL_INFO_VERIFY(so->so_cfil);
return (error);
}
int
cfil_sock_data_out(struct socket *so, struct sockaddr *to,
struct mbuf *data, struct mbuf *control, uint32_t flags)
{
int error = 0;
if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
return (0);
socket_lock_assert_owned(so);
if (so->so_cfil->cfi_flags & CFIF_DROP) {
CFIL_LOG(LOG_ERR, "so %llx drop set",
(uint64_t)VM_KERNEL_ADDRPERM(so));
return (EPIPE);
}
if (control != NULL) {
CFIL_LOG(LOG_ERR, "so %llx control",
(uint64_t)VM_KERNEL_ADDRPERM(so));
OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
}
if ((flags & MSG_OOB)) {
CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
(uint64_t)VM_KERNEL_ADDRPERM(so));
OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
}
if ((so->so_snd.sb_flags & SB_LOCK) == 0)
panic("so %p SB_LOCK not set", so);
if (so->so_snd.sb_cfil_thread != NULL)
panic("%s sb_cfil_thread %p not NULL", __func__,
so->so_snd.sb_cfil_thread);
error = cfil_data_common(so, 1, to, data, control, flags);
return (error);
}
int
cfil_sock_data_in(struct socket *so, struct sockaddr *from,
struct mbuf *data, struct mbuf *control, uint32_t flags)
{
int error = 0;
if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
return (0);
socket_lock_assert_owned(so);
if (so->so_cfil->cfi_flags & CFIF_DROP) {
CFIL_LOG(LOG_ERR, "so %llx drop set",
(uint64_t)VM_KERNEL_ADDRPERM(so));
return (EPIPE);
}
if (control != NULL) {
CFIL_LOG(LOG_ERR, "so %llx control",
(uint64_t)VM_KERNEL_ADDRPERM(so));
OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
}
if (data->m_type == MT_OOBDATA) {
CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
(uint64_t)VM_KERNEL_ADDRPERM(so));
OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
}
error = cfil_data_common(so, 0, from, data, control, flags);
return (error);
}
int
cfil_sock_shutdown(struct socket *so, int *how)
{
int error = 0;
if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
goto done;
socket_lock_assert_owned(so);
CFIL_LOG(LOG_INFO, "so %llx how %d",
(uint64_t)VM_KERNEL_ADDRPERM(so), *how);
if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
error = ENOTCONN;
goto done;
}
if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
error = ENOTCONN;
goto done;
}
if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
CFIL_LOG(LOG_ERR, "so %llx drop set",
(uint64_t)VM_KERNEL_ADDRPERM(so));
goto done;
}
if (*how != SHUT_WR) {
if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
error = ENOTCONN;
goto done;
}
so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
cfil_sock_notify_shutdown(so, SHUT_RD);
}
if (*how != SHUT_RD) {
if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
error = ENOTCONN;
goto done;
}
so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
cfil_sock_notify_shutdown(so, SHUT_WR);
if (cfil_sock_data_pending(&so->so_snd) != 0) {
if (*how == SHUT_WR) {
error = EJUSTRETURN;
} else if (*how == SHUT_RDWR) {
*how = SHUT_RD;
}
}
}
done:
return (error);
}
void
cfil_sock_is_closed(struct socket *so)
{
errno_t error = 0;
int kcunit;
if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
return;
CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
socket_lock_assert_owned(so);
for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
error = cfil_dispatch_closed_event(so, kcunit);
}
error = cfil_acquire_sockbuf(so, 1);
if (error == 0)
cfil_service_inject_queue(so, 1);
cfil_release_sockbuf(so, 1);
so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
cfil_flush_queues(so);
CFIL_INFO_VERIFY(so->so_cfil);
}
void
cfil_sock_notify_shutdown(struct socket *so, int how)
{
errno_t error = 0;
int kcunit;
if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
return;
CFIL_LOG(LOG_INFO, "so %llx how %d",
(uint64_t)VM_KERNEL_ADDRPERM(so), how);
socket_lock_assert_owned(so);
for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
if (how != SHUT_WR)
error = cfil_dispatch_disconnect_event(so, kcunit, 0);
if (how != SHUT_RD)
error = cfil_dispatch_disconnect_event(so, kcunit, 1);
}
}
static int
cfil_filters_attached(struct socket *so)
{
struct cfil_entry *entry;
uint32_t kcunit;
int attached = 0;
if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
return (0);
socket_lock_assert_owned(so);
for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
entry = &so->so_cfil->cfi_entries[kcunit - 1];
if (entry->cfe_filter == NULL)
continue;
if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0)
continue;
if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0)
continue;
attached = 1;
break;
}
return (attached);
}
void
cfil_sock_close_wait(struct socket *so)
{
lck_mtx_t *mutex_held;
struct timespec ts;
int error;
if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
return;
CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
if (so->so_proto->pr_getlock != NULL)
mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
else
mutex_held = so->so_proto->pr_domain->dom_mtx;
LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
while (cfil_filters_attached(so)) {
cfil_sock_notify_shutdown(so, SHUT_RDWR);
if (cfil_filters_attached(so) == 0)
break;
CFIL_LOG(LOG_INFO, "so %llx waiting",
(uint64_t)VM_KERNEL_ADDRPERM(so));
ts.tv_sec = cfil_close_wait_timeout / 1000;
ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
NSEC_PER_USEC * 1000;
OSIncrementAtomic(&cfil_stats.cfs_close_wait);
so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
error = msleep((caddr_t)&so->so_cfil, mutex_held,
PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
(uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
if (error != 0) {
OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
break;
}
}
}
int32_t
cfil_sock_data_pending(struct sockbuf *sb)
{
struct socket *so = sb->sb_so;
uint64_t pending = 0;
if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
struct cfi_buf *cfi_buf;
socket_lock_assert_owned(so);
if ((sb->sb_flags & SB_RECV) == 0)
cfi_buf = &so->so_cfil->cfi_snd;
else
cfi_buf = &so->so_cfil->cfi_rcv;
pending = cfi_buf->cfi_pending_last -
cfi_buf->cfi_pending_first;
if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt)
pending = cfi_buf->cfi_pending_mbcnt;
}
VERIFY(pending < INT32_MAX);
return (int32_t)(pending);
}
int32_t
cfil_sock_data_space(struct sockbuf *sb)
{
struct socket *so = sb->sb_so;
uint64_t pending = 0;
if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
so->so_snd.sb_cfil_thread != current_thread()) {
struct cfi_buf *cfi_buf;
socket_lock_assert_owned(so);
if ((sb->sb_flags & SB_RECV) == 0)
cfi_buf = &so->so_cfil->cfi_snd;
else
cfi_buf = &so->so_cfil->cfi_rcv;
pending = cfi_buf->cfi_pending_last -
cfi_buf->cfi_pending_first;
if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending)
pending = cfi_buf->cfi_pending_mbcnt;
}
VERIFY(pending < INT32_MAX);
return (int32_t)(pending);
}
void
cfil_sock_buf_update(struct sockbuf *sb)
{
int outgoing;
int error;
struct socket *so = sb->sb_so;
if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
return;
if (!cfil_sbtrim)
return;
socket_lock_assert_owned(so);
if ((sb->sb_flags & SB_RECV) == 0) {
if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0)
return;
outgoing = 1;
OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
} else {
if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0)
return;
outgoing = 0;
OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
}
CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
(uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
error = cfil_acquire_sockbuf(so, outgoing);
if (error == 0)
cfil_service_inject_queue(so, outgoing);
cfil_release_sockbuf(so, outgoing);
}
int
sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
struct sysctl_req *req)
{
#pragma unused(oidp, arg1, arg2)
int error = 0;
size_t len = 0;
u_int32_t i;
if (req->newptr != USER_ADDR_NULL)
return (EPERM);
cfil_rw_lock_shared(&cfil_lck_rw);
for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
struct cfil_filter_stat filter_stat;
struct content_filter *cfc = content_filters[i];
if (cfc == NULL)
continue;
if (req->oldptr == USER_ADDR_NULL) {
len += sizeof(struct cfil_filter_stat);
continue;
}
bzero(&filter_stat, sizeof(struct cfil_filter_stat));
filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
filter_stat.cfs_filter_id = cfc->cf_kcunit;
filter_stat.cfs_flags = cfc->cf_flags;
filter_stat.cfs_sock_count = cfc->cf_sock_count;
filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
error = SYSCTL_OUT(req, &filter_stat,
sizeof (struct cfil_filter_stat));
if (error != 0)
break;
}
if (req->oldptr == USER_ADDR_NULL)
req->oldidx = len;
cfil_rw_unlock_shared(&cfil_lck_rw);
return (error);
}
static int sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
struct sysctl_req *req)
{
#pragma unused(oidp, arg1, arg2)
int error = 0;
u_int32_t i;
struct cfil_info *cfi;
if (req->newptr != USER_ADDR_NULL)
return (EPERM);
cfil_rw_lock_shared(&cfil_lck_rw);
if (req->oldptr == USER_ADDR_NULL) {
req->oldidx = cfil_sock_attached_count *
sizeof(struct cfil_sock_stat);
req->oldidx += req->oldidx >> 3;
goto done;
}
TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
struct cfil_entry *entry;
struct cfil_sock_stat stat;
struct socket *so = cfi->cfi_so;
bzero(&stat, sizeof(struct cfil_sock_stat));
stat.cfs_len = sizeof(struct cfil_sock_stat);
stat.cfs_sock_id = cfi->cfi_sock_id;
stat.cfs_flags = cfi->cfi_flags;
if (so != NULL) {
stat.cfs_pid = so->last_pid;
memcpy(stat.cfs_uuid, so->last_uuid,
sizeof(uuid_t));
if (so->so_flags & SOF_DELEGATED) {
stat.cfs_e_pid = so->e_pid;
memcpy(stat.cfs_e_uuid, so->e_uuid,
sizeof(uuid_t));
} else {
stat.cfs_e_pid = so->last_pid;
memcpy(stat.cfs_e_uuid, so->last_uuid,
sizeof(uuid_t));
}
}
stat.cfs_snd.cbs_pending_first =
cfi->cfi_snd.cfi_pending_first;
stat.cfs_snd.cbs_pending_last =
cfi->cfi_snd.cfi_pending_last;
stat.cfs_snd.cbs_inject_q_len =
cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
stat.cfs_snd.cbs_pass_offset =
cfi->cfi_snd.cfi_pass_offset;
stat.cfs_rcv.cbs_pending_first =
cfi->cfi_rcv.cfi_pending_first;
stat.cfs_rcv.cbs_pending_last =
cfi->cfi_rcv.cfi_pending_last;
stat.cfs_rcv.cbs_inject_q_len =
cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
stat.cfs_rcv.cbs_pass_offset =
cfi->cfi_rcv.cfi_pass_offset;
for (i = 0; i < MAX_CONTENT_FILTER; i++) {
struct cfil_entry_stat *estat;
struct cfe_buf *ebuf;
struct cfe_buf_stat *sbuf;
entry = &cfi->cfi_entries[i];
estat = &stat.ces_entries[i];
estat->ces_len = sizeof(struct cfil_entry_stat);
estat->ces_filter_id = entry->cfe_filter ?
entry->cfe_filter->cf_kcunit : 0;
estat->ces_flags = entry->cfe_flags;
estat->ces_necp_control_unit =
entry->cfe_necp_control_unit;
estat->ces_last_event.tv_sec =
(int64_t)entry->cfe_last_event.tv_sec;
estat->ces_last_event.tv_usec =
(int64_t)entry->cfe_last_event.tv_usec;
estat->ces_last_action.tv_sec =
(int64_t)entry->cfe_last_action.tv_sec;
estat->ces_last_action.tv_usec =
(int64_t)entry->cfe_last_action.tv_usec;
ebuf = &entry->cfe_snd;
sbuf = &estat->ces_snd;
sbuf->cbs_pending_first =
cfil_queue_offset_first(&ebuf->cfe_pending_q);
sbuf->cbs_pending_last =
cfil_queue_offset_last(&ebuf->cfe_pending_q);
sbuf->cbs_ctl_first =
cfil_queue_offset_first(&ebuf->cfe_ctl_q);
sbuf->cbs_ctl_last =
cfil_queue_offset_last(&ebuf->cfe_ctl_q);
sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
sbuf->cbs_peeked = ebuf->cfe_peeked;
ebuf = &entry->cfe_rcv;
sbuf = &estat->ces_rcv;
sbuf->cbs_pending_first =
cfil_queue_offset_first(&ebuf->cfe_pending_q);
sbuf->cbs_pending_last =
cfil_queue_offset_last(&ebuf->cfe_pending_q);
sbuf->cbs_ctl_first =
cfil_queue_offset_first(&ebuf->cfe_ctl_q);
sbuf->cbs_ctl_last =
cfil_queue_offset_last(&ebuf->cfe_ctl_q);
sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
sbuf->cbs_peeked = ebuf->cfe_peeked;
}
error = SYSCTL_OUT(req, &stat,
sizeof (struct cfil_sock_stat));
if (error != 0)
break;
}
done:
cfil_rw_unlock_shared(&cfil_lck_rw);
return (error);
}