#define _IP_VHL
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <kern/locks.h>
#include <sys/sysctl.h>
#include <sys/mcache.h>
#include <sys/kdebug.h>
#include <machine/endian.h>
#include <pexpert/pexpert.h>
#include <mach/sdt.h>
#include <libkern/OSAtomic.h>
#include <libkern/OSByteOrder.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/ntstat.h>
#include <net/net_osdep.h>
#include <net/dlil.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#include <netinet/kpi_ipfilter_var.h>
#if CONFIG_MACF_NET
#include <security/mac_framework.h>
#endif
#define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1)
#define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3)
#define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1)
#define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1)
#if IPSEC
#include <netinet6/ipsec.h>
#include <netkey/key.h>
#if IPSEC_DEBUG
#include <netkey/key_debug.h>
#else
#define KEYDEBUG(lev, arg)
#endif
#endif
#if NECP
#include <net/necp.h>
#endif
#if IPFIREWALL
#include <netinet/ip_fw.h>
#if IPDIVERT
#include <netinet/ip_divert.h>
#endif
#endif
#if DUMMYNET
#include <netinet/ip_dummynet.h>
#endif
#if PF
#include <net/pfvar.h>
#endif
#if IPFIREWALL_FORWARD && IPFIREWALL_FORWARD_DEBUG
#define print_ip(a) \
printf("%ld.%ld.%ld.%ld", (ntohl(a.s_addr) >> 24) & 0xFF, \
(ntohl(a.s_addr) >> 16) & 0xFF, \
(ntohl(a.s_addr) >> 8) & 0xFF, \
(ntohl(a.s_addr)) & 0xFF);
#endif
u_short ip_id;
static void ip_out_cksum_stats(int, u_int32_t);
static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
static int ip_optcopy(struct ip *, struct ip *);
static int ip_pcbopts(int, struct mbuf **, struct mbuf *);
static void imo_trace(struct ip_moptions *, int);
static void ip_mloopback(struct ifnet *, struct ifnet *, struct mbuf *,
struct sockaddr_in *, int);
static struct ifaddr *in_selectsrcif(struct ip *, struct route *, unsigned int);
extern struct ip_linklocal_stat ip_linklocal_stat;
#if IPSEC
extern int ipsec_bypass;
#endif
static int ip_maxchainsent = 0;
SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent,
CTLFLAG_RW | CTLFLAG_LOCKED, &ip_maxchainsent, 0,
"use dlil_output_list");
#if DEBUG
static int forge_ce = 0;
SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce,
CTLFLAG_RW | CTLFLAG_LOCKED, &forge_ce, 0,
"Forge ECN CE");
#endif
static int ip_select_srcif_debug = 0;
SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug,
CTLFLAG_RW | CTLFLAG_LOCKED, &ip_select_srcif_debug, 0,
"log source interface selection debug info");
#define IMO_TRACE_HIST_SIZE 32
__private_extern__ unsigned int imo_trace_hist_size = IMO_TRACE_HIST_SIZE;
struct ip_moptions_dbg {
struct ip_moptions imo;
u_int16_t imo_refhold_cnt;
u_int16_t imo_refrele_cnt;
ctrace_t imo_alloc;
ctrace_t imo_free;
ctrace_t imo_refhold[IMO_TRACE_HIST_SIZE];
ctrace_t imo_refrele[IMO_TRACE_HIST_SIZE];
};
#if DEBUG
static unsigned int imo_debug = 1;
#else
static unsigned int imo_debug;
#endif
static unsigned int imo_size;
static struct zone *imo_zone;
#define IMO_ZONE_MAX 64
#define IMO_ZONE_NAME "ip_moptions"
int
ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags,
struct ip_moptions *imo, struct ip_out_args *ipoa)
{
return (ip_output_list(m0, 0, opt, ro, flags, imo, ipoa));
}
int
ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt,
struct route *ro, int flags, struct ip_moptions *imo,
struct ip_out_args *ipoa)
{
struct ip *ip;
struct ifnet *ifp = NULL;
struct mbuf *m = m0, *prevnxt = NULL, **mppn = &prevnxt;
int hlen = sizeof (struct ip);
int len = 0, error = 0;
struct sockaddr_in *dst = NULL;
struct in_ifaddr *ia = NULL, *src_ia = NULL;
struct in_addr pkt_dst;
struct ipf_pktopts *ippo = NULL;
ipfilter_t inject_filter_ref = NULL;
struct mbuf *packetlist;
uint32_t sw_csum, pktcnt = 0, scnt = 0, bytecnt = 0;
unsigned int ifscope = IFSCOPE_NONE;
struct flowadv *adv = NULL;
#if IPSEC
struct socket *so = NULL;
struct secpolicy *sp = NULL;
#endif
#if NECP
necp_kernel_policy_result necp_result = 0;
necp_kernel_policy_result_parameter necp_result_parameter;
necp_kernel_policy_id necp_matched_policy_id = 0;
#endif
#if IPFIREWALL
int ipfwoff;
struct sockaddr_in *next_hop_from_ipfwd_tag = NULL;
#endif
#if IPFIREWALL || DUMMYNET
struct m_tag *tag;
#endif
#if DUMMYNET
struct ip_out_args saved_ipoa;
struct sockaddr_in dst_buf;
#endif
struct {
#if IPSEC
struct ipsec_output_state ipsec_state;
#endif
#if NECP
struct route necp_route;
#endif
#if IPFIREWALL || DUMMYNET
struct ip_fw_args args;
#endif
#if IPFIREWALL_FORWARD
struct route sro_fwd;
#endif
#if DUMMYNET
struct route saved_route;
#endif
struct ipf_pktopts ipf_pktopts;
} ipobz;
#define ipsec_state ipobz.ipsec_state
#define necp_route ipobz.necp_route
#define args ipobz.args
#define sro_fwd ipobz.sro_fwd
#define saved_route ipobz.saved_route
#define ipf_pktopts ipobz.ipf_pktopts
union {
struct {
boolean_t select_srcif : 1;
boolean_t srcbound : 1;
boolean_t nocell : 1;
boolean_t isbroadcast : 1;
boolean_t didfilter : 1;
boolean_t noexpensive : 1;
boolean_t awdl_unrestricted : 1;
#if IPFIREWALL_FORWARD
boolean_t fwd_rewrite_src : 1;
#endif
};
uint32_t raw;
} ipobf = { .raw = 0 };
#define IP_CHECK_RESTRICTIONS(_ifp, _ipobf) \
(((_ipobf).nocell && IFNET_IS_CELLULAR(_ifp)) || \
((_ipobf).noexpensive && IFNET_IS_EXPENSIVE(_ifp)) || \
(!(_ipobf).awdl_unrestricted && IFNET_IS_AWDL_RESTRICTED(_ifp)))
KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
VERIFY(m0->m_flags & M_PKTHDR);
packetlist = m0;
bzero(&ipobz, sizeof (ipobz));
ippo = &ipf_pktopts;
#if IPFIREWALL || DUMMYNET
if (SLIST_EMPTY(&m0->m_pkthdr.tags))
goto ipfw_tags_done;
#if DUMMYNET
if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
struct dn_pkt_tag *dn_tag;
dn_tag = (struct dn_pkt_tag *)(tag+1);
args.fwa_ipfw_rule = dn_tag->dn_ipfw_rule;
args.fwa_pf_rule = dn_tag->dn_pf_rule;
opt = NULL;
saved_route = dn_tag->dn_ro;
ro = &saved_route;
imo = NULL;
bcopy(&dn_tag->dn_dst, &dst_buf, sizeof (dst_buf));
dst = &dst_buf;
ifp = dn_tag->dn_ifp;
flags = dn_tag->dn_flags;
if ((dn_tag->dn_flags & IP_OUTARGS)) {
saved_ipoa = dn_tag->dn_ipoa;
ipoa = &saved_ipoa;
}
m_tag_delete(m0, tag);
}
#endif
#if IPDIVERT
if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) {
struct divert_tag *div_tag;
div_tag = (struct divert_tag *)(tag+1);
args.fwa_divert_rule = div_tag->cookie;
m_tag_delete(m0, tag);
}
#endif
#if IPFIREWALL
if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) {
struct ip_fwd_tag *ipfwd_tag;
ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
next_hop_from_ipfwd_tag = ipfwd_tag->next_hop;
m_tag_delete(m0, tag);
}
#endif
ipfw_tags_done:
#endif
m = m0;
m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP|PKTF_IFAINFO);
#if IPSEC
if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) {
if ((flags & IP_OUTARGS) && (ipoa != NULL) &&
(ipoa->ipoa_flags & IPOAF_BOUND_IF) &&
ipoa->ipoa_boundif != IFSCOPE_NONE) {
if (ipsec4_getpolicybyinterface(m, IPSEC_DIR_OUTBOUND,
&flags, ipoa, &sp) != 0)
goto bad;
}
}
#endif
VERIFY(ro != NULL);
if (ip_doscopedroute && (flags & IP_OUTARGS)) {
if ((ipobf.select_srcif = (!(flags & IP_FORWARDING) &&
(ipoa->ipoa_flags & IPOAF_SELECT_SRCIF)))) {
ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF;
}
if ((ipoa->ipoa_flags & IPOAF_BOUND_IF) &&
ipoa->ipoa_boundif != IFSCOPE_NONE) {
ifscope = ipoa->ipoa_boundif;
ipf_pktopts.ippo_flags |=
(IPPOF_BOUND_IF | (ifscope << IPPOF_SHIFT_IFSCOPE));
}
ipobf.srcbound = !!(ipoa->ipoa_flags & IPOAF_BOUND_SRCADDR);
if (ipobf.srcbound)
ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR;
} else {
ipobf.select_srcif = FALSE;
ipobf.srcbound = FALSE;
ifscope = IFSCOPE_NONE;
if (flags & IP_OUTARGS) {
ipoa->ipoa_boundif = IFSCOPE_NONE;
ipoa->ipoa_flags &= ~(IPOAF_SELECT_SRCIF |
IPOAF_BOUND_IF | IPOAF_BOUND_SRCADDR);
}
}
if (flags & IP_OUTARGS) {
if (ipoa->ipoa_flags & IPOAF_NO_CELLULAR) {
ipobf.nocell = TRUE;
ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
}
if (ipoa->ipoa_flags & IPOAF_NO_EXPENSIVE) {
ipobf.noexpensive = TRUE;
ipf_pktopts.ippo_flags |= IPPOF_NO_IFF_EXPENSIVE;
}
if (ipoa->ipoa_flags & IPOAF_AWDL_UNRESTRICTED)
ipobf.awdl_unrestricted = TRUE;
adv = &ipoa->ipoa_flowadv;
adv->code = FADV_SUCCESS;
ipoa->ipoa_retflags = 0;
}
#if IPSEC
if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) {
so = ipsec_getsocket(m);
if (so != NULL) {
(void) ipsec_setsocket(m, NULL);
}
}
#endif
#if DUMMYNET
if (args.fwa_ipfw_rule != NULL || args.fwa_pf_rule != NULL) {
ip = mtod(m, struct ip *);
hlen = IP_VHL_HL(ip->ip_vhl) << 2;
pkt_dst = ip->ip_dst;
if (ro->ro_rt != NULL) {
RT_LOCK_SPIN(ro->ro_rt);
ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
if (ia) {
RT_CONVERT_LOCK(ro->ro_rt);
IFA_ADDREF(&ia->ia_ifa);
}
RT_UNLOCK(ro->ro_rt);
}
#if IPFIREWALL
if (args.fwa_ipfw_rule != NULL)
goto skip_ipsec;
#endif
if (args.fwa_pf_rule != NULL)
goto sendit;
}
#endif
loopit:
ipobf.isbroadcast = FALSE;
ipobf.didfilter = FALSE;
#if IPFIREWALL_FORWARD
ipobf.fwd_rewrite_src = FALSE;
#endif
VERIFY(m->m_flags & M_PKTHDR);
if (!SLIST_EMPTY(&m->m_pkthdr.tags))
inject_filter_ref = ipf_get_inject_filter(m);
else
inject_filter_ref = NULL;
if (opt) {
m = ip_insertoptions(m, opt, &len);
hlen = len;
if (m != m0) {
if (m0 == packetlist)
packetlist = m;
m0 = m;
}
}
ip = mtod(m, struct ip *);
#if IPFIREWALL
args.fwa_next_hop = next_hop_from_ipfwd_tag;
pkt_dst = args.fwa_next_hop ? args.fwa_next_hop->sin_addr : ip->ip_dst;
#else
pkt_dst = ip->ip_dst;
#endif
if (IN_ZERONET(ntohl(pkt_dst.s_addr))) {
error = EHOSTUNREACH;
goto bad;
}
if (!(flags & (IP_FORWARDING|IP_RAWOUTPUT))) {
ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
ip->ip_off &= IP_DF;
ip->ip_id = ip_randomid();
OSAddAtomic(1, &ipstat.ips_localout);
} else {
hlen = IP_VHL_HL(ip->ip_vhl) << 2;
}
#if DEBUG
if (forge_ce != 0 &&
((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 ||
(ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0)) {
ip->ip_tos = (ip->ip_tos & ~IPTOS_ECN_MASK) | IPTOS_ECN_CE;
forge_ce--;
}
#endif
KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr, ip->ip_src.s_addr,
ip->ip_p, ip->ip_off, ip->ip_len);
dst = SIN(&ro->ro_dst);
if (ro->ro_rt != NULL) {
if (ROUTE_UNUSABLE(ro) && ip->ip_src.s_addr != INADDR_ANY &&
!(flags & (IP_ROUTETOIF | IP_FORWARDING))) {
src_ia = ifa_foraddr(ip->ip_src.s_addr);
if (src_ia == NULL) {
error = EADDRNOTAVAIL;
goto bad;
}
IFA_REMREF(&src_ia->ia_ifa);
src_ia = NULL;
}
if (ROUTE_UNUSABLE(ro) || dst->sin_family != AF_INET ||
dst->sin_addr.s_addr != pkt_dst.s_addr)
ROUTE_RELEASE(ro);
if (!ipobf.select_srcif && ro->ro_rt != NULL &&
RT_GENID_OUTOFSYNC(ro->ro_rt))
RT_GENID_SYNC(ro->ro_rt);
}
if (ro->ro_rt == NULL) {
bzero(dst, sizeof (*dst));
dst->sin_family = AF_INET;
dst->sin_len = sizeof (*dst);
dst->sin_addr = pkt_dst;
}
if (flags & IP_ROUTETOIF) {
if (ia != NULL)
IFA_REMREF(&ia->ia_ifa);
if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) {
ia = ifatoia(ifa_ifwithnet(sintosa(dst)));
if (ia == NULL) {
OSAddAtomic(1, &ipstat.ips_noroute);
error = ENETUNREACH;
goto bad;
}
}
ifp = ia->ia_ifp;
ip->ip_ttl = 1;
ipobf.isbroadcast = in_broadcast(dst->sin_addr, ifp);
if ((ifp->if_flags & IFF_LOOPBACK) &&
!IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
m->m_pkthdr.rcvif = ifp;
ip_setsrcifaddr_info(m, ifp->if_index, NULL);
ip_setdstifaddr_info(m, ifp->if_index, NULL);
}
} else if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) &&
imo != NULL && (ifp = imo->imo_multicast_ifp) != NULL) {
ipobf.isbroadcast = FALSE;
if (ia != NULL)
IFA_REMREF(&ia->ia_ifa);
IFP_TO_IA(ifp, ia);
} else {
struct ifaddr *ia0 = NULL;
boolean_t cloneok = FALSE;
if (ipobf.select_srcif &&
ip->ip_src.s_addr != INADDR_ANY && (ROUTE_UNUSABLE(ro) ||
!(ro->ro_flags & ROF_SRCIF_SELECTED))) {
ia0 = in_selectsrcif(ip, ro, ifscope);
if (ia0 != NULL &&
IP_CHECK_RESTRICTIONS(ia0->ifa_ifp, ipobf)) {
IFA_REMREF(ia0);
ia0 = NULL;
error = EHOSTUNREACH;
if (flags & IP_OUTARGS)
ipoa->ipoa_retflags |= IPOARF_IFDENIED;
goto bad;
}
if (ia0 == NULL && (!(flags & IP_RAWOUTPUT) ||
ipobf.srcbound) && ifscope != lo_ifp->if_index) {
error = EADDRNOTAVAIL;
goto bad;
}
if (ia0 != NULL) {
if (ifscope == IFSCOPE_NONE)
ifscope = ia0->ifa_ifp->if_index;
cloneok = (!(flags & IP_RAWOUTPUT) &&
!(IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))));
}
}
if (ro->ro_rt == NULL) {
unsigned long ign = RTF_PRCLONING;
if (cloneok || dst->sin_addr.s_addr == INADDR_BROADCAST)
ign &= ~RTF_PRCLONING;
if (ifscope == lo_ifp->if_index)
rtalloc_ign(ro, ign);
else
rtalloc_scoped_ign(ro, ign, ifscope);
if (ro->ro_rt != NULL) {
RT_LOCK_SPIN(ro->ro_rt);
if (IP_CHECK_RESTRICTIONS(ro->ro_rt->rt_ifp,
ipobf)) {
RT_UNLOCK(ro->ro_rt);
ROUTE_RELEASE(ro);
if (flags & IP_OUTARGS) {
ipoa->ipoa_retflags |=
IPOARF_IFDENIED;
}
} else {
RT_UNLOCK(ro->ro_rt);
}
}
}
if (ro->ro_rt == NULL) {
OSAddAtomic(1, &ipstat.ips_noroute);
error = EHOSTUNREACH;
if (ia0 != NULL) {
IFA_REMREF(ia0);
ia0 = NULL;
}
goto bad;
}
if (ia != NULL)
IFA_REMREF(&ia->ia_ifa);
RT_LOCK_SPIN(ro->ro_rt);
ia = ifatoia(ro->ro_rt->rt_ifa);
if (ia != NULL) {
RT_CONVERT_LOCK(ro->ro_rt);
IFA_ADDREF(&ia->ia_ifa);
}
ifp = ro->ro_rt->rt_ifp;
ro->ro_rt->rt_use++;
if (ro->ro_rt->rt_flags & RTF_GATEWAY) {
dst = SIN(ro->ro_rt->rt_gateway);
}
if (ro->ro_rt->rt_flags & RTF_HOST) {
ipobf.isbroadcast =
!!(ro->ro_rt->rt_flags & RTF_BROADCAST);
} else {
RT_CONVERT_LOCK(ro->ro_rt);
ipobf.isbroadcast = in_broadcast(dst->sin_addr, ifp);
}
if (ia != NULL && (ifp->if_flags & IFF_LOOPBACK) &&
!IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
uint32_t srcidx;
m->m_pkthdr.rcvif = ia->ia_ifa.ifa_ifp;
if (ia0 != NULL)
srcidx = ia0->ifa_ifp->if_index;
else if ((ro->ro_flags & ROF_SRCIF_SELECTED) &&
ro->ro_srcia != NULL)
srcidx = ro->ro_srcia->ifa_ifp->if_index;
else
srcidx = 0;
ip_setsrcifaddr_info(m, srcidx, NULL);
ip_setdstifaddr_info(m, 0, ia);
}
RT_UNLOCK(ro->ro_rt);
if (ia0 != NULL) {
IFA_REMREF(ia0);
ia0 = NULL;
}
}
if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
struct ifnet *srcifp = NULL;
struct in_multi *inm;
u_int32_t vif;
u_int8_t ttl = IP_DEFAULT_MULTICAST_TTL;
u_int8_t loop = IP_DEFAULT_MULTICAST_LOOP;
m->m_flags |= M_MCAST;
dst = SIN(&ro->ro_dst);
if (imo != NULL) {
IMO_LOCK(imo);
vif = imo->imo_multicast_vif;
ttl = imo->imo_multicast_ttl;
loop = imo->imo_multicast_loop;
if (!(flags & IP_RAWOUTPUT))
ip->ip_ttl = ttl;
if (imo->imo_multicast_ifp != NULL)
ifp = imo->imo_multicast_ifp;
IMO_UNLOCK(imo);
} else if (!(flags & IP_RAWOUTPUT)) {
vif = -1;
ip->ip_ttl = ttl;
}
if (imo == NULL || vif == -1) {
if (!(ifp->if_flags & IFF_MULTICAST)) {
OSAddAtomic(1, &ipstat.ips_noroute);
error = ENETUNREACH;
goto bad;
}
}
if (ip->ip_src.s_addr == INADDR_ANY) {
struct in_ifaddr *ia1;
lck_rw_lock_shared(in_ifaddr_rwlock);
TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link) {
IFA_LOCK_SPIN(&ia1->ia_ifa);
if (ia1->ia_ifp == ifp) {
ip->ip_src = IA_SIN(ia1)->sin_addr;
srcifp = ifp;
IFA_UNLOCK(&ia1->ia_ifa);
break;
}
IFA_UNLOCK(&ia1->ia_ifa);
}
lck_rw_done(in_ifaddr_rwlock);
if (ip->ip_src.s_addr == INADDR_ANY) {
error = ENETUNREACH;
goto bad;
}
}
in_multihead_lock_shared();
IN_LOOKUP_MULTI(&pkt_dst, ifp, inm);
in_multihead_lock_done();
if (inm != NULL && (imo == NULL || loop)) {
if (!TAILQ_EMPTY(&ipv4_filters)) {
struct ipfilter *filter;
int seen = (inject_filter_ref == NULL);
if (imo != NULL) {
ipf_pktopts.ippo_flags |=
IPPOF_MCAST_OPTS;
ipf_pktopts.ippo_mcast_ifnet = ifp;
ipf_pktopts.ippo_mcast_ttl = ttl;
ipf_pktopts.ippo_mcast_loop = loop;
}
ipf_ref();
#if BYTE_ORDER != BIG_ENDIAN
HTONS(ip->ip_len);
HTONS(ip->ip_off);
#endif
TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
if (seen == 0) {
if ((struct ipfilter *)
inject_filter_ref == filter)
seen = 1;
} else if (filter->ipf_filter.
ipf_output != NULL) {
errno_t result;
result = filter->ipf_filter.
ipf_output(filter->
ipf_filter.cookie,
(mbuf_t *)&m, ippo);
if (result == EJUSTRETURN) {
ipf_unref();
INM_REMREF(inm);
goto done;
}
if (result != 0) {
ipf_unref();
INM_REMREF(inm);
goto bad;
}
}
}
ip = mtod(m, struct ip *);
#if BYTE_ORDER != BIG_ENDIAN
NTOHS(ip->ip_len);
NTOHS(ip->ip_off);
#endif
ipf_unref();
ipobf.didfilter = TRUE;
}
ip_mloopback(srcifp, ifp, m, dst, hlen);
}
if (inm != NULL)
INM_REMREF(inm);
if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
m_freem(m);
goto done;
}
goto sendit;
}
if (ip->ip_src.s_addr == INADDR_ANY) {
IFA_LOCK_SPIN(&ia->ia_ifa);
ip->ip_src = IA_SIN(ia)->sin_addr;
IFA_UNLOCK(&ia->ia_ifa);
#if IPFIREWALL_FORWARD
ipobf.fwd_rewrite_src = TRUE;
#endif
}
if (ipobf.isbroadcast) {
if (!(ifp->if_flags & IFF_BROADCAST)) {
error = EADDRNOTAVAIL;
goto bad;
}
if (!(flags & IP_ALLOWBROADCAST)) {
error = EACCES;
goto bad;
}
if ((u_short)ip->ip_len > ifp->if_mtu) {
error = EMSGSIZE;
goto bad;
}
m->m_flags |= M_BCAST;
} else {
m->m_flags &= ~M_BCAST;
}
sendit:
#if PF
if (PF_IS_ENABLED) {
int rc;
m0 = m;
#if DUMMYNET
args.fwa_m = m;
args.fwa_next_hop = dst;
args.fwa_oif = ifp;
args.fwa_ro = ro;
args.fwa_dst = dst;
args.fwa_oflags = flags;
if (flags & IP_OUTARGS)
args.fwa_ipoa = ipoa;
rc = pf_af_hook(ifp, mppn, &m, AF_INET, FALSE, &args);
#else
rc = pf_af_hook(ifp, mppn, &m, AF_INET, FALSE, NULL);
#endif
if (rc != 0 || m == NULL) {
m = *mppn;
if (packetlist == m0)
packetlist = m;
if (m != NULL) {
m0 = m;
goto loopit;
} else if (packetlist != NULL) {
goto sendchain;
}
goto done;
}
m0 = m;
ip = mtod(m, struct ip *);
pkt_dst = ip->ip_dst;
hlen = IP_VHL_HL(ip->ip_vhl) << 2;
}
#endif
if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) ||
IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
ip_linklocal_stat.iplls_out_total++;
if (ip->ip_ttl != MAXTTL) {
ip_linklocal_stat.iplls_out_badttl++;
ip->ip_ttl = MAXTTL;
}
}
if (!ipobf.didfilter && !TAILQ_EMPTY(&ipv4_filters)) {
struct ipfilter *filter;
int seen = (inject_filter_ref == NULL);
ipf_pktopts.ippo_flags &= ~IPPOF_MCAST_OPTS;
if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
error = EMSGSIZE;
goto bad;
}
ipf_ref();
#if BYTE_ORDER != BIG_ENDIAN
HTONS(ip->ip_len);
HTONS(ip->ip_off);
#endif
TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
if (seen == 0) {
if ((struct ipfilter *)inject_filter_ref ==
filter)
seen = 1;
} else if (filter->ipf_filter.ipf_output) {
errno_t result;
result = filter->ipf_filter.
ipf_output(filter->ipf_filter.cookie,
(mbuf_t *)&m, ippo);
if (result == EJUSTRETURN) {
ipf_unref();
goto done;
}
if (result != 0) {
ipf_unref();
goto bad;
}
}
}
ip = mtod(m, struct ip *);
#if BYTE_ORDER != BIG_ENDIAN
NTOHS(ip->ip_len);
NTOHS(ip->ip_off);
#endif
ipf_unref();
}
#if NECP
necp_matched_policy_id = necp_ip_output_find_policy_match (m,
flags, (flags & IP_OUTARGS) ? ipoa : NULL, &necp_result, &necp_result_parameter);
if (necp_matched_policy_id) {
necp_mark_packet_from_ip(m, necp_matched_policy_id);
switch (necp_result) {
case NECP_KERNEL_POLICY_RESULT_PASS:
goto skip_ipsec;
case NECP_KERNEL_POLICY_RESULT_DROP:
case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT:
error = EHOSTUNREACH;
goto bad;
case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: {
struct ifnet *policy_ifp = necp_get_ifnet_from_result_parameter(&necp_result_parameter);
if (policy_ifp == ifp) {
goto skip_ipsec;
} else {
if (necp_packet_can_rebind_to_ifnet(m, policy_ifp, &necp_route, AF_INET)) {
ifp = policy_ifp;
ro = &necp_route;
goto skip_ipsec;
} else {
error = ENETUNREACH;
goto bad;
}
}
break;
}
default:
break;
}
}
#endif
#if IPSEC
if (ipsec_bypass != 0 || (flags & IP_NOIPSEC))
goto skip_ipsec;
KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
if (sp == NULL) {
if (so != NULL) {
sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND,
so, &error);
} else {
sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
flags, &error);
}
if (sp == NULL) {
IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
0, 0, 0, 0, 0);
goto bad;
}
}
error = 0;
switch (sp->policy) {
case IPSEC_POLICY_DISCARD:
case IPSEC_POLICY_GENERATE:
IPSEC_STAT_INCREMENT(ipsecstat.out_polvio);
KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
1, 0, 0, 0, 0);
goto bad;
case IPSEC_POLICY_BYPASS:
case IPSEC_POLICY_NONE:
KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
2, 0, 0, 0, 0);
goto skip_ipsec;
case IPSEC_POLICY_IPSEC:
if (sp->req == NULL) {
error = key_spdacquire(sp);
KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
3, 0, 0, 0, 0);
goto bad;
}
if (sp->ipsec_if) {
if (sp->ipsec_if == ifp) {
goto skip_ipsec;
}
goto bad;
}
break;
case IPSEC_POLICY_ENTRUST:
default:
printf("ip_output: Invalid policy found. %d\n", sp->policy);
}
{
ipsec_state.m = m;
if (flags & IP_ROUTETOIF) {
bzero(&ipsec_state.ro, sizeof (ipsec_state.ro));
} else {
route_copyout(&ipsec_state.ro, ro, sizeof (ipsec_state.ro));
}
ipsec_state.dst = SA(dst);
ip->ip_sum = 0;
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
in_delayed_cksum(m);
#if BYTE_ORDER != BIG_ENDIAN
HTONS(ip->ip_len);
HTONS(ip->ip_off);
#endif
DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
struct ip *, ip, struct ifnet *, ifp,
struct ip *, ip, struct ip6_hdr *, NULL);
error = ipsec4_output(&ipsec_state, sp, flags);
m0 = m = ipsec_state.m;
#if DUMMYNET
if (ro == &saved_route &&
(!(flags & IP_ROUTETOIF) || ipsec_state.tunneled))
ROUTE_RELEASE(ro);
#endif
if (flags & IP_ROUTETOIF) {
if (ipsec_state.tunneled) {
flags &= ~IP_ROUTETOIF;
ro = &ipsec_state.ro;
}
} else {
ro = &ipsec_state.ro;
}
dst = SIN(ipsec_state.dst);
if (error) {
m0 = NULL;
switch (error) {
case EHOSTUNREACH:
case ENETUNREACH:
case EMSGSIZE:
case ENOBUFS:
case ENOMEM:
break;
default:
printf("ip4_output (ipsec): error code %d\n", error);
case ENOENT:
error = 0;
break;
}
KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
4, 0, 0, 0, 0);
goto bad;
}
}
ip = mtod(m, struct ip *);
#ifdef _IP_VHL
hlen = IP_VHL_HL(ip->ip_vhl) << 2;
#else
hlen = ip->ip_hl << 2;
#endif
if (ROUTE_UNUSABLE(ro)) {
ROUTE_RELEASE(ro);
VERIFY(src_ia == NULL);
if (ip->ip_src.s_addr != INADDR_ANY &&
!(flags & (IP_ROUTETOIF | IP_FORWARDING)) &&
(src_ia = ifa_foraddr(ip->ip_src.s_addr)) == NULL) {
error = EADDRNOTAVAIL;
KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
5, 0, 0, 0, 0);
goto bad;
}
if (src_ia != NULL) {
IFA_REMREF(&src_ia->ia_ifa);
src_ia = NULL;
}
}
if (ro->ro_rt == NULL) {
if (!(flags & IP_ROUTETOIF)) {
printf("%s: can't update route after "
"IPsec processing\n", __func__);
error = EHOSTUNREACH;
KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
6, 0, 0, 0, 0);
goto bad;
}
} else {
if (ia != NULL)
IFA_REMREF(&ia->ia_ifa);
RT_LOCK_SPIN(ro->ro_rt);
ia = ifatoia(ro->ro_rt->rt_ifa);
if (ia != NULL) {
RT_CONVERT_LOCK(ro->ro_rt);
IFA_ADDREF(&ia->ia_ifa);
}
ifp = ro->ro_rt->rt_ifp;
RT_UNLOCK(ro->ro_rt);
}
#if BYTE_ORDER != BIG_ENDIAN
NTOHS(ip->ip_len);
NTOHS(ip->ip_off);
#endif
KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END,
7, 0xff, 0xff, 0xff, 0xff);
if (!TAILQ_EMPTY(&ipv4_filters)) {
struct ipfilter *filter;
ipf_pktopts.ippo_flags &= ~IPPOF_MCAST_OPTS;
if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
error = EMSGSIZE;
goto bad;
}
ipf_ref();
#if BYTE_ORDER != BIG_ENDIAN
HTONS(ip->ip_len);
HTONS(ip->ip_off);
#endif
TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
if (filter->ipf_filter.ipf_output) {
errno_t result;
result = filter->ipf_filter.
ipf_output(filter->ipf_filter.cookie,
(mbuf_t *)&m, ippo);
if (result == EJUSTRETURN) {
ipf_unref();
goto done;
}
if (result != 0) {
ipf_unref();
goto bad;
}
}
}
ip = mtod(m, struct ip *);
#if BYTE_ORDER != BIG_ENDIAN
NTOHS(ip->ip_len);
NTOHS(ip->ip_off);
#endif
ipf_unref();
}
skip_ipsec:
#endif
#if IPFIREWALL
if (fw_enable && IPFW_LOADED && !args.fwa_next_hop) {
struct sockaddr_in *old = dst;
args.fwa_m = m;
args.fwa_next_hop = dst;
args.fwa_oif = ifp;
ipfwoff = ip_fw_chk_ptr(&args);
m = args.fwa_m;
dst = args.fwa_next_hop;
m0 = m;
if ((ipfwoff & IP_FW_PORT_DENY_FLAG) || m == NULL) {
if (m)
m_freem(m);
error = EACCES;
goto done;
}
ip = mtod(m, struct ip *);
if (ipfwoff == 0 && dst == old) {
goto pass;
}
#if DUMMYNET
if (DUMMYNET_LOADED && (ipfwoff & IP_FW_PORT_DYNT_FLAG) != 0) {
args.fwa_ro = ro;
args.fwa_dst = dst;
args.fwa_oflags = flags;
if (flags & IP_OUTARGS)
args.fwa_ipoa = ipoa;
error = ip_dn_io_ptr(m, ipfwoff & 0xffff, DN_TO_IP_OUT,
&args, DN_CLIENT_IPFW);
goto done;
}
#endif
#if IPDIVERT
if (ipfwoff != 0 && (ipfwoff & IP_FW_PORT_DYNT_FLAG) == 0) {
struct mbuf *clone = NULL;
if ((ipfwoff & IP_FW_PORT_TEE_FLAG) != 0)
clone = m_dup(m, M_DONTWAIT);
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
in_delayed_cksum(m);
#if BYTE_ORDER != BIG_ENDIAN
HTONS(ip->ip_len);
HTONS(ip->ip_off);
#endif
divert_packet(m, 0, ipfwoff & 0xffff,
args.fwa_divert_rule);
if (clone != NULL) {
m0 = m = clone;
ip = mtod(m, struct ip *);
goto pass;
}
goto done;
}
#endif
#if IPFIREWALL_FORWARD
if (ipfwoff == 0 && old != dst) {
struct in_ifaddr *ia_fw;
struct route *ro_fwd = &sro_fwd;
#if IPFIREWALL_FORWARD_DEBUG
printf("IPFIREWALL_FORWARD: New dst ip: ");
print_ip(dst->sin_addr);
printf("\n");
#endif
lck_rw_lock_shared(in_ifaddr_rwlock);
TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) {
IFA_LOCK_SPIN(&ia_fw->ia_ifa);
if (IA_SIN(ia_fw)->sin_addr.s_addr ==
dst->sin_addr.s_addr) {
IFA_UNLOCK(&ia_fw->ia_ifa);
break;
}
IFA_UNLOCK(&ia_fw->ia_ifa);
}
lck_rw_done(in_ifaddr_rwlock);
if (ia_fw) {
struct m_tag *fwd_tag;
struct ip_fwd_tag *ipfwd_tag;
fwd_tag = m_tag_create(KERNEL_MODULE_TAG_ID,
KERNEL_TAG_TYPE_IPFORWARD,
sizeof (*ipfwd_tag), M_NOWAIT, m);
if (fwd_tag == NULL) {
error = ENOBUFS;
goto bad;
}
ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
ipfwd_tag->next_hop = args.fwa_next_hop;
m_tag_prepend(m, fwd_tag);
if (m->m_pkthdr.rcvif == NULL)
m->m_pkthdr.rcvif = lo_ifp;
#if BYTE_ORDER != BIG_ENDIAN
HTONS(ip->ip_len);
HTONS(ip->ip_off);
#endif
mbuf_outbound_finalize(m, PF_INET, 0);
if (lo_ifp) {
dlil_output(lo_ifp, PF_INET, m, NULL,
SA(dst), 0, adv);
} else {
printf("%s: no loopback ifp for "
"forwarding!!!\n", __func__);
}
goto done;
}
ROUTE_RELEASE(ro_fwd);
bcopy(dst, &ro_fwd->ro_dst, sizeof (*dst));
rtalloc_ign(ro_fwd, RTF_PRCLONING);
if (ro_fwd->ro_rt == NULL) {
OSAddAtomic(1, &ipstat.ips_noroute);
error = EHOSTUNREACH;
goto bad;
}
RT_LOCK_SPIN(ro_fwd->ro_rt);
ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa);
if (ia_fw != NULL) {
RT_CONVERT_LOCK(ro_fwd->ro_rt);
IFA_ADDREF(&ia_fw->ia_ifa);
}
ifp = ro_fwd->ro_rt->rt_ifp;
ro_fwd->ro_rt->rt_use++;
if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
dst = SIN(ro_fwd->ro_rt->rt_gateway);
if (ro_fwd->ro_rt->rt_flags & RTF_HOST) {
ipobf.isbroadcast =
!!(ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
} else {
RT_CONVERT_LOCK(ro_fwd->ro_rt);
ipobf.isbroadcast =
in_broadcast(dst->sin_addr, ifp);
}
RT_UNLOCK(ro_fwd->ro_rt);
ROUTE_RELEASE(ro);
ro->ro_rt = ro_fwd->ro_rt;
ro_fwd->ro_rt = NULL;
dst = SIN(&ro_fwd->ro_dst);
if (ia_fw != NULL) {
if (ipobf.fwd_rewrite_src) {
IFA_LOCK_SPIN(&ia_fw->ia_ifa);
ip->ip_src = IA_SIN(ia_fw)->sin_addr;
IFA_UNLOCK(&ia_fw->ia_ifa);
}
IFA_REMREF(&ia_fw->ia_ifa);
}
goto pass;
}
#endif
m_freem(m);
error = EACCES;
goto done;
}
pass:
#endif
if (!(ifp->if_flags & IFF_LOOPBACK) &&
((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
(ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
OSAddAtomic(1, &ipstat.ips_badaddr);
error = EADDRNOTAVAIL;
goto bad;
}
ip_output_checksum(ifp, m, (IP_VHL_HL(ip->ip_vhl) << 2),
ip->ip_len, &sw_csum);
if ((u_short)ip->ip_len <= ifp->if_mtu || TSO_IPV4_OK(ifp, m) ||
(!(ip->ip_off & IP_DF) && (ifp->if_hwassist & CSUM_FRAGMENT))) {
#if BYTE_ORDER != BIG_ENDIAN
HTONS(ip->ip_len);
HTONS(ip->ip_off);
#endif
ip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP) {
ip->ip_sum = ip_cksum_hdr_out(m, hlen);
sw_csum &= ~CSUM_DELAY_IP;
m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP;
}
#if IPSEC
if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC))
ipsec_delaux(m);
#endif
if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) &&
(m->m_pkthdr.tso_segsz > 0))
scnt += m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
else
scnt++;
if (packetchain == 0) {
if (ro->ro_rt != NULL && nstat_collect)
nstat_route_tx(ro->ro_rt, scnt,
m->m_pkthdr.len, 0);
error = dlil_output(ifp, PF_INET, m, ro->ro_rt,
SA(dst), 0, adv);
if (dlil_verbose && error) {
printf("dlil_output error on interface %s: %d\n",
ifp->if_xname, error);
}
scnt = 0;
goto done;
} else {
bytecnt += m->m_pkthdr.len;
mppn = &m->m_nextpkt;
m = m->m_nextpkt;
if (m == NULL) {
#if PF
sendchain:
#endif
if (pktcnt > ip_maxchainsent)
ip_maxchainsent = pktcnt;
if (ro->ro_rt != NULL && nstat_collect)
nstat_route_tx(ro->ro_rt, scnt,
bytecnt, 0);
error = dlil_output(ifp, PF_INET, packetlist,
ro->ro_rt, SA(dst), 0, adv);
if (dlil_verbose && error) {
printf("dlil_output error on interface %s: %d\n",
ifp->if_xname, error);
}
pktcnt = 0;
scnt = 0;
bytecnt = 0;
goto done;
}
m0 = m;
pktcnt++;
goto loopit;
}
}
if ((ip->ip_off & IP_DF) || pktcnt > 0 ||
(m->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) {
error = EMSGSIZE;
if (ro->ro_rt) {
RT_LOCK_SPIN(ro->ro_rt);
if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
!(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) &&
(ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
}
RT_UNLOCK(ro->ro_rt);
}
if (pktcnt > 0) {
m0 = packetlist;
}
OSAddAtomic(1, &ipstat.ips_cantfrag);
goto bad;
}
error = ip_fragment(m, ifp, ifp->if_mtu, sw_csum);
if (error != 0) {
m0 = m = NULL;
goto bad;
}
KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr,
ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len);
for (m = m0; m; m = m0) {
m0 = m->m_nextpkt;
m->m_nextpkt = 0;
#if IPSEC
if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC))
ipsec_delaux(m);
#endif
if (error == 0) {
if ((packetchain != 0) && (pktcnt > 0)) {
panic("%s: mix of packet in packetlist is "
"wrong=%p", __func__, packetlist);
}
if (ro->ro_rt != NULL && nstat_collect) {
nstat_route_tx(ro->ro_rt, 1,
m->m_pkthdr.len, 0);
}
error = dlil_output(ifp, PF_INET, m, ro->ro_rt,
SA(dst), 0, adv);
if (dlil_verbose && error) {
printf("dlil_output error on interface %s: %d\n",
ifp->if_xname, error);
}
} else {
m_freem(m);
}
}
if (error == 0)
OSAddAtomic(1, &ipstat.ips_fragmented);
done:
if (ia != NULL) {
IFA_REMREF(&ia->ia_ifa);
ia = NULL;
}
#if IPSEC
ROUTE_RELEASE(&ipsec_state.ro);
if (sp != NULL) {
KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
printf("DP ip_output call free SP:%x\n", sp));
key_freesp(sp, KEY_SADB_UNLOCKED);
}
#endif
#if NECP
ROUTE_RELEASE(&necp_route);
#endif
#if DUMMYNET
ROUTE_RELEASE(&saved_route);
#endif
#if IPFIREWALL_FORWARD
ROUTE_RELEASE(&sro_fwd);
#endif
KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error, 0, 0, 0, 0);
return (error);
bad:
if (pktcnt > 0)
m0 = packetlist;
m_freem_list(m0);
goto done;
#undef ipsec_state
#undef args
#undef sro_fwd
#undef saved_route
#undef ipf_pktopts
#undef IP_CHECK_RESTRICTIONS
}
int
ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum)
{
struct ip *ip, *mhip;
int len, hlen, mhlen, firstlen, off, error = 0;
struct mbuf **mnext = &m->m_nextpkt, *m0;
int nfrags = 1;
ip = mtod(m, struct ip *);
#ifdef _IP_VHL
hlen = IP_VHL_HL(ip->ip_vhl) << 2;
#else
hlen = ip->ip_hl << 2;
#endif
firstlen = len = (mtu - hlen) &~ 7;
if (len < 8) {
m_freem(m);
return (EMSGSIZE);
}
if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) &&
!(ifp->if_hwassist & CSUM_IP_FRAGS))
in_delayed_cksum(m);
m0 = m;
mhlen = sizeof (struct ip);
for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
MGETHDR(m, M_DONTWAIT, MT_HEADER);
if (m == NULL) {
error = ENOBUFS;
OSAddAtomic(1, &ipstat.ips_odropped);
goto sendorfree;
}
m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
m->m_data += max_linkhdr;
mhip = mtod(m, struct ip *);
*mhip = *ip;
if (hlen > sizeof (struct ip)) {
mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
}
m->m_len = mhlen;
mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
if (ip->ip_off & IP_MF)
mhip->ip_off |= IP_MF;
if (off + len >= (u_short)ip->ip_len)
len = (u_short)ip->ip_len - off;
else
mhip->ip_off |= IP_MF;
mhip->ip_len = htons((u_short)(len + mhlen));
m->m_next = m_copy(m0, off, len);
if (m->m_next == NULL) {
(void) m_free(m);
error = ENOBUFS;
OSAddAtomic(1, &ipstat.ips_odropped);
goto sendorfree;
}
m->m_pkthdr.len = mhlen + len;
m->m_pkthdr.rcvif = NULL;
m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
M_COPY_CLASSIFIER(m, m0);
M_COPY_PFTAG(m, m0);
#if CONFIG_MACF_NET
mac_netinet_fragment(m0, m);
#endif
#if BYTE_ORDER != BIG_ENDIAN
HTONS(mhip->ip_off);
#endif
mhip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP) {
mhip->ip_sum = ip_cksum_hdr_out(m, mhlen);
m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP;
}
*mnext = m;
mnext = &m->m_nextpkt;
nfrags++;
}
OSAddAtomic(nfrags, &ipstat.ips_ofragments);
m->m_flags |= M_LASTFRAG;
m0->m_flags |= M_FIRSTFRAG | M_FRAG;
m0->m_pkthdr.csum_data = nfrags;
m = m0;
m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
m->m_pkthdr.len = hlen + firstlen;
ip->ip_len = htons((u_short)m->m_pkthdr.len);
ip->ip_off |= IP_MF;
#if BYTE_ORDER != BIG_ENDIAN
HTONS(ip->ip_off);
#endif
ip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP) {
ip->ip_sum = ip_cksum_hdr_out(m, hlen);
m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP;
}
sendorfree:
if (error)
m_freem_list(m0);
return (error);
}
static void
ip_out_cksum_stats(int proto, u_int32_t len)
{
switch (proto) {
case IPPROTO_TCP:
tcp_out_cksum_stats(len);
break;
case IPPROTO_UDP:
udp_out_cksum_stats(len);
break;
default:
break;
}
}
uint32_t
in_finalize_cksum(struct mbuf *m, uint32_t hoff, uint32_t csum_flags)
{
unsigned char buf[15 << 2] __attribute__((aligned(8)));
struct ip *ip;
uint32_t offset, _hlen, mlen, hlen, len, sw_csum;
uint16_t csum, ip_len;
_CASSERT(sizeof (csum) == sizeof (uint16_t));
VERIFY(m->m_flags & M_PKTHDR);
sw_csum = (csum_flags & m->m_pkthdr.csum_flags);
if ((sw_csum &= (CSUM_DELAY_IP | CSUM_DELAY_DATA)) == 0)
goto done;
mlen = m->m_pkthdr.len;
if (mlen < (hoff + sizeof (*ip))) {
panic("%s: mbuf %p pkt len (%u) < hoff+ip_hdr "
"(%u+%u)\n", __func__, m, mlen, hoff,
(uint32_t)sizeof (*ip));
}
if ((sw_csum & CSUM_DELAY_IP) || (hoff + sizeof (*ip)) > m->m_len ||
!IP_HDR_ALIGNED_P(mtod(m, caddr_t) + hoff)) {
m_copydata(m, hoff, sizeof (*ip), (caddr_t)buf);
ip = (struct ip *)(void *)buf;
_hlen = sizeof (*ip);
} else {
ip = (struct ip *)(void *)(m->m_data + hoff);
_hlen = 0;
}
hlen = IP_VHL_HL(ip->ip_vhl) << 2;
if (mlen < (hoff + hlen)) {
panic("%s: mbuf %p pkt too short (%d) for IP header (%u), "
"hoff %u", __func__, m, mlen, hlen, hoff);
}
ip_len = ip->ip_len;
if (ip_len != (mlen - hoff)) {
ip_len = OSSwapInt16(ip_len);
if (ip_len != (mlen - hoff)) {
printf("%s: mbuf 0x%llx proto %d IP len %d (%x) "
"[swapped %d (%x)] doesn't match actual packet "
"length; %d is used instead\n", __func__,
(uint64_t)VM_KERNEL_ADDRPERM(m), ip->ip_p,
ip->ip_len, ip->ip_len, ip_len, ip_len,
(mlen - hoff));
ip_len = mlen - hoff;
}
}
len = ip_len - hlen;
if (sw_csum & CSUM_DELAY_DATA) {
uint16_t ulpoff;
if ((m->m_pkthdr.csum_flags & (CSUM_PARTIAL|CSUM_DATA_VALID)) ==
(CSUM_PARTIAL|CSUM_DATA_VALID)) {
m->m_pkthdr.csum_data = (m->m_pkthdr.csum_tx_stuff -
m->m_pkthdr.csum_tx_start);
}
ulpoff = (m->m_pkthdr.csum_data & 0xffff);
offset = hoff + hlen;
if (mlen < (ulpoff + sizeof (csum))) {
panic("%s: mbuf %p pkt len (%u) proto %d invalid ULP "
"cksum offset (%u) cksum flags 0x%x\n", __func__,
m, mlen, ip->ip_p, ulpoff, m->m_pkthdr.csum_flags);
}
csum = inet_cksum(m, 0, offset, len);
ip_out_cksum_stats(ip->ip_p, len);
if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDP))
csum = 0xffff;
offset += ulpoff;
if (offset + sizeof (csum) > m->m_len) {
m_copyback(m, offset, sizeof (csum), &csum);
} else if (IP_HDR_ALIGNED_P(mtod(m, char *) + hoff)) {
*(uint16_t *)(void *)(mtod(m, char *) + offset) = csum;
} else {
bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum));
}
m->m_pkthdr.csum_flags &=
~(CSUM_DELAY_DATA | CSUM_DATA_VALID | CSUM_PARTIAL);
}
if (sw_csum & CSUM_DELAY_IP) {
VERIFY(_hlen == sizeof (*ip));
if (_hlen != hlen) {
VERIFY(hlen <= sizeof (buf));
m_copydata(m, hoff, hlen, (caddr_t)buf);
ip = (struct ip *)(void *)buf;
_hlen = hlen;
}
ip->ip_len = htons(ip_len);
ip->ip_sum = 0;
csum = in_cksum_hdr_opt(ip);
ipstat.ips_snd_swcsum++;
ipstat.ips_snd_swcsum_bytes += hlen;
offset = hoff + offsetof(struct ip, ip_sum);
if (offset + sizeof (csum) > m->m_len) {
m_copyback(m, offset, sizeof (csum), &csum);
} else if (IP_HDR_ALIGNED_P(mtod(m, char *) + hoff)) {
*(uint16_t *)(void *)(mtod(m, char *) + offset) = csum;
} else {
bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum));
}
m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP;
}
done:
return (sw_csum);
}
static struct mbuf *
ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
{
struct ipoption *p = mtod(opt, struct ipoption *);
struct mbuf *n;
struct ip *ip = mtod(m, struct ip *);
unsigned optlen;
optlen = opt->m_len - sizeof (p->ipopt_dst);
if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
return (m);
if (p->ipopt_dst.s_addr)
ip->ip_dst = p->ipopt_dst;
if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
MGETHDR(n, M_DONTWAIT, MT_HEADER);
if (n == NULL)
return (m);
n->m_pkthdr.rcvif = 0;
#if CONFIG_MACF_NET
mac_mbuf_label_copy(m, n);
#endif
n->m_pkthdr.len = m->m_pkthdr.len + optlen;
m->m_len -= sizeof (struct ip);
m->m_data += sizeof (struct ip);
n->m_next = m;
m = n;
m->m_len = optlen + sizeof (struct ip);
m->m_data += max_linkhdr;
(void) memcpy(mtod(m, void *), ip, sizeof (struct ip));
} else {
m->m_data -= optlen;
m->m_len += optlen;
m->m_pkthdr.len += optlen;
ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof (struct ip));
}
ip = mtod(m, struct ip *);
bcopy(p->ipopt_list, ip + 1, optlen);
*phlen = sizeof (struct ip) + optlen;
ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
ip->ip_len += optlen;
return (m);
}
static int
ip_optcopy(struct ip *ip, struct ip *jp)
{
u_char *cp, *dp;
int opt, optlen, cnt;
cp = (u_char *)(ip + 1);
dp = (u_char *)(jp + 1);
cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
for (; cnt > 0; cnt -= optlen, cp += optlen) {
opt = cp[0];
if (opt == IPOPT_EOL)
break;
if (opt == IPOPT_NOP) {
*dp++ = IPOPT_NOP;
optlen = 1;
continue;
}
#if DIAGNOSTIC
if (cnt < IPOPT_OLEN + sizeof (*cp)) {
panic("malformed IPv4 option passed to ip_optcopy");
}
#endif
optlen = cp[IPOPT_OLEN];
#if DIAGNOSTIC
if (optlen < IPOPT_OLEN + sizeof (*cp) || optlen > cnt) {
panic("malformed IPv4 option passed to ip_optcopy");
}
#endif
if (optlen > cnt)
optlen = cnt;
if (IPOPT_COPIED(opt)) {
bcopy(cp, dp, optlen);
dp += optlen;
}
}
for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
*dp++ = IPOPT_EOL;
return (optlen);
}
int
ip_ctloutput(struct socket *so, struct sockopt *sopt)
{
struct inpcb *inp = sotoinpcb(so);
int error, optval;
error = optval = 0;
if (sopt->sopt_level != IPPROTO_IP)
return (EINVAL);
switch (sopt->sopt_dir) {
case SOPT_SET:
switch (sopt->sopt_name) {
#ifdef notyet
case IP_RETOPTS:
#endif
case IP_OPTIONS: {
struct mbuf *m;
if (sopt->sopt_valsize > MLEN) {
error = EMSGSIZE;
break;
}
MGET(m, sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT,
MT_HEADER);
if (m == NULL) {
error = ENOBUFS;
break;
}
m->m_len = sopt->sopt_valsize;
error = sooptcopyin(sopt, mtod(m, char *),
m->m_len, m->m_len);
if (error)
break;
return (ip_pcbopts(sopt->sopt_name,
&inp->inp_options, m));
}
case IP_TOS:
case IP_TTL:
case IP_RECVOPTS:
case IP_RECVRETOPTS:
case IP_RECVDSTADDR:
case IP_RECVIF:
case IP_RECVTTL:
case IP_RECVPKTINFO:
error = sooptcopyin(sopt, &optval, sizeof (optval),
sizeof (optval));
if (error)
break;
switch (sopt->sopt_name) {
case IP_TOS:
inp->inp_ip_tos = optval;
break;
case IP_TTL:
inp->inp_ip_ttl = optval;
break;
#define OPTSET(bit) \
if (optval) \
inp->inp_flags |= bit; \
else \
inp->inp_flags &= ~bit;
case IP_RECVOPTS:
OPTSET(INP_RECVOPTS);
break;
case IP_RECVRETOPTS:
OPTSET(INP_RECVRETOPTS);
break;
case IP_RECVDSTADDR:
OPTSET(INP_RECVDSTADDR);
break;
case IP_RECVIF:
OPTSET(INP_RECVIF);
break;
case IP_RECVTTL:
OPTSET(INP_RECVTTL);
break;
case IP_RECVPKTINFO:
OPTSET(INP_PKTINFO);
break;
}
break;
#undef OPTSET
#if CONFIG_FORCE_OUT_IFP
case IP_FORCE_OUT_IFP: {
char ifname[IFNAMSIZ];
unsigned int ifscope;
if (!(inp->inp_vflag & INP_IPV4)) {
error = EINVAL;
break;
}
if (sopt->sopt_valsize > sizeof (ifname)) {
error = EINVAL;
break;
}
if (sopt->sopt_valsize != 0) {
error = sooptcopyin(sopt, ifname,
sizeof (ifname), sopt->sopt_valsize);
if (error)
break;
}
if (sopt->sopt_valsize == 0 || ifname[0] == '\0') {
ifscope = IFSCOPE_NONE;
} else {
ifnet_t ifp;
if (ifname[sopt->sopt_valsize - 1] != '\0') {
error = EINVAL;
break;
}
if (ifnet_find_by_name(ifname, &ifp) != 0) {
error = ENXIO;
break;
}
ifscope = ifp->if_index;
ifnet_release(ifp);
}
error = inp_bindif(inp, ifscope, NULL);
}
break;
#endif
case IP_MULTICAST_IF:
case IP_MULTICAST_IFINDEX:
case IP_MULTICAST_VIF:
case IP_MULTICAST_TTL:
case IP_MULTICAST_LOOP:
case IP_ADD_MEMBERSHIP:
case IP_DROP_MEMBERSHIP:
case IP_ADD_SOURCE_MEMBERSHIP:
case IP_DROP_SOURCE_MEMBERSHIP:
case IP_BLOCK_SOURCE:
case IP_UNBLOCK_SOURCE:
case IP_MSFILTER:
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
case MCAST_JOIN_SOURCE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_BLOCK_SOURCE:
case MCAST_UNBLOCK_SOURCE:
error = inp_setmoptions(inp, sopt);
break;
case IP_PORTRANGE:
error = sooptcopyin(sopt, &optval, sizeof (optval),
sizeof (optval));
if (error)
break;
switch (optval) {
case IP_PORTRANGE_DEFAULT:
inp->inp_flags &= ~(INP_LOWPORT);
inp->inp_flags &= ~(INP_HIGHPORT);
break;
case IP_PORTRANGE_HIGH:
inp->inp_flags &= ~(INP_LOWPORT);
inp->inp_flags |= INP_HIGHPORT;
break;
case IP_PORTRANGE_LOW:
inp->inp_flags &= ~(INP_HIGHPORT);
inp->inp_flags |= INP_LOWPORT;
break;
default:
error = EINVAL;
break;
}
break;
#if IPSEC
case IP_IPSEC_POLICY: {
caddr_t req = NULL;
size_t len = 0;
int priv;
struct mbuf *m;
int optname;
if ((error = soopt_getm(sopt, &m)) != 0)
break;
if ((error = soopt_mcopyin(sopt, m)) != 0)
break;
priv = (proc_suser(sopt->sopt_p) == 0);
if (m) {
req = mtod(m, caddr_t);
len = m->m_len;
}
optname = sopt->sopt_name;
error = ipsec4_set_policy(inp, optname, req, len, priv);
m_freem(m);
break;
}
#endif
#if TRAFFIC_MGT
case IP_TRAFFIC_MGT_BACKGROUND: {
unsigned background = 0;
error = sooptcopyin(sopt, &background,
sizeof (background), sizeof (background));
if (error)
break;
if (background) {
socket_set_traffic_mgt_flags_locked(so,
TRAFFIC_MGT_SO_BACKGROUND);
} else {
socket_clear_traffic_mgt_flags_locked(so,
TRAFFIC_MGT_SO_BACKGROUND);
}
break;
}
#endif
case IP_BOUND_IF:
if (!(inp->inp_vflag & INP_IPV4)) {
error = EINVAL;
break;
}
error = sooptcopyin(sopt, &optval, sizeof (optval),
sizeof (optval));
if (error)
break;
error = inp_bindif(inp, optval, NULL);
break;
case IP_NO_IFT_CELLULAR:
if (!(inp->inp_vflag & INP_IPV4)) {
error = EINVAL;
break;
}
error = sooptcopyin(sopt, &optval, sizeof (optval),
sizeof (optval));
if (error)
break;
if (!optval && INP_NO_CELLULAR(inp)) {
error = EINVAL;
break;
}
error = so_set_restrictions(so,
SO_RESTRICT_DENY_CELLULAR);
break;
case IP_OUT_IF:
error = EINVAL;
break;
default:
error = ENOPROTOOPT;
break;
}
break;
case SOPT_GET:
switch (sopt->sopt_name) {
case IP_OPTIONS:
case IP_RETOPTS:
if (inp->inp_options) {
error = sooptcopyout(sopt,
mtod(inp->inp_options, char *),
inp->inp_options->m_len);
} else {
sopt->sopt_valsize = 0;
}
break;
case IP_TOS:
case IP_TTL:
case IP_RECVOPTS:
case IP_RECVRETOPTS:
case IP_RECVDSTADDR:
case IP_RECVIF:
case IP_RECVTTL:
case IP_PORTRANGE:
case IP_RECVPKTINFO:
switch (sopt->sopt_name) {
case IP_TOS:
optval = inp->inp_ip_tos;
break;
case IP_TTL:
optval = inp->inp_ip_ttl;
break;
#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
case IP_RECVOPTS:
optval = OPTBIT(INP_RECVOPTS);
break;
case IP_RECVRETOPTS:
optval = OPTBIT(INP_RECVRETOPTS);
break;
case IP_RECVDSTADDR:
optval = OPTBIT(INP_RECVDSTADDR);
break;
case IP_RECVIF:
optval = OPTBIT(INP_RECVIF);
break;
case IP_RECVTTL:
optval = OPTBIT(INP_RECVTTL);
break;
case IP_PORTRANGE:
if (inp->inp_flags & INP_HIGHPORT)
optval = IP_PORTRANGE_HIGH;
else if (inp->inp_flags & INP_LOWPORT)
optval = IP_PORTRANGE_LOW;
else
optval = 0;
break;
case IP_RECVPKTINFO:
optval = OPTBIT(INP_PKTINFO);
break;
}
error = sooptcopyout(sopt, &optval, sizeof (optval));
break;
case IP_MULTICAST_IF:
case IP_MULTICAST_IFINDEX:
case IP_MULTICAST_VIF:
case IP_MULTICAST_TTL:
case IP_MULTICAST_LOOP:
case IP_MSFILTER:
error = inp_getmoptions(inp, sopt);
break;
#if IPSEC
case IP_IPSEC_POLICY: {
error = 0;
break;
}
#endif
#if TRAFFIC_MGT
case IP_TRAFFIC_MGT_BACKGROUND: {
unsigned background = (so->so_traffic_mgt_flags &
TRAFFIC_MGT_SO_BACKGROUND) ? 1 : 0;
return (sooptcopyout(sopt, &background,
sizeof (background)));
break;
}
#endif
case IP_BOUND_IF:
if (inp->inp_flags & INP_BOUND_IF)
optval = inp->inp_boundifp->if_index;
error = sooptcopyout(sopt, &optval, sizeof (optval));
break;
case IP_NO_IFT_CELLULAR:
optval = INP_NO_CELLULAR(inp) ? 1 : 0;
error = sooptcopyout(sopt, &optval, sizeof (optval));
break;
case IP_OUT_IF:
optval = (inp->inp_last_outifp != NULL) ?
inp->inp_last_outifp->if_index : 0;
error = sooptcopyout(sopt, &optval, sizeof (optval));
break;
default:
error = ENOPROTOOPT;
break;
}
break;
}
return (error);
}
static int
ip_pcbopts(int optname, struct mbuf **pcbopt, struct mbuf *m)
{
#pragma unused(optname)
int cnt, optlen;
u_char *cp;
u_char opt;
if (*pcbopt)
(void) m_free(*pcbopt);
*pcbopt = 0;
if (m == (struct mbuf *)0 || m->m_len == 0) {
if (m)
(void) m_free(m);
return (0);
}
if (m->m_len % sizeof (int32_t))
goto bad;
if (m->m_data + m->m_len + sizeof (struct in_addr) >= &m->m_dat[MLEN])
goto bad;
cnt = m->m_len;
m->m_len += sizeof (struct in_addr);
cp = mtod(m, u_char *) + sizeof (struct in_addr);
ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
bzero(mtod(m, caddr_t), sizeof (struct in_addr));
for (; cnt > 0; cnt -= optlen, cp += optlen) {
opt = cp[IPOPT_OPTVAL];
if (opt == IPOPT_EOL)
break;
if (opt == IPOPT_NOP)
optlen = 1;
else {
if (cnt < IPOPT_OLEN + sizeof (*cp))
goto bad;
optlen = cp[IPOPT_OLEN];
if (optlen < IPOPT_OLEN + sizeof (*cp) || optlen > cnt)
goto bad;
}
switch (opt) {
default:
break;
case IPOPT_LSRR:
case IPOPT_SSRR:
if (optlen < IPOPT_MINOFF - 1 + sizeof (struct in_addr))
goto bad;
m->m_len -= sizeof (struct in_addr);
cnt -= sizeof (struct in_addr);
optlen -= sizeof (struct in_addr);
cp[IPOPT_OLEN] = optlen;
bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
sizeof (struct in_addr));
ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
sizeof (struct in_addr)),
(caddr_t)&cp[IPOPT_OFFSET+1],
(unsigned)cnt + sizeof (struct in_addr));
break;
}
}
if (m->m_len > MAX_IPOPTLEN + sizeof (struct in_addr))
goto bad;
*pcbopt = m;
return (0);
bad:
(void) m_free(m);
return (EINVAL);
}
void
ip_moptions_init(void)
{
PE_parse_boot_argn("ifa_debug", &imo_debug, sizeof (imo_debug));
imo_size = (imo_debug == 0) ? sizeof (struct ip_moptions) :
sizeof (struct ip_moptions_dbg);
imo_zone = zinit(imo_size, IMO_ZONE_MAX * imo_size, 0,
IMO_ZONE_NAME);
if (imo_zone == NULL) {
panic("%s: failed allocating %s", __func__, IMO_ZONE_NAME);
}
zone_change(imo_zone, Z_EXPAND, TRUE);
}
void
imo_addref(struct ip_moptions *imo, int locked)
{
if (!locked)
IMO_LOCK(imo);
else
IMO_LOCK_ASSERT_HELD(imo);
if (++imo->imo_refcnt == 0) {
panic("%s: imo %p wraparound refcnt\n", __func__, imo);
} else if (imo->imo_trace != NULL) {
(*imo->imo_trace)(imo, TRUE);
}
if (!locked)
IMO_UNLOCK(imo);
}
void
imo_remref(struct ip_moptions *imo)
{
int i;
IMO_LOCK(imo);
if (imo->imo_refcnt == 0) {
panic("%s: imo %p negative refcnt", __func__, imo);
} else if (imo->imo_trace != NULL) {
(*imo->imo_trace)(imo, FALSE);
}
--imo->imo_refcnt;
if (imo->imo_refcnt > 0) {
IMO_UNLOCK(imo);
return;
}
for (i = 0; i < imo->imo_num_memberships; ++i) {
struct in_mfilter *imf;
imf = imo->imo_mfilters ? &imo->imo_mfilters[i] : NULL;
if (imf != NULL)
imf_leave(imf);
(void) in_leavegroup(imo->imo_membership[i], imf);
if (imf != NULL)
imf_purge(imf);
INM_REMREF(imo->imo_membership[i]);
imo->imo_membership[i] = NULL;
}
imo->imo_num_memberships = 0;
if (imo->imo_mfilters != NULL) {
FREE(imo->imo_mfilters, M_INMFILTER);
imo->imo_mfilters = NULL;
}
if (imo->imo_membership != NULL) {
FREE(imo->imo_membership, M_IPMOPTS);
imo->imo_membership = NULL;
}
IMO_UNLOCK(imo);
lck_mtx_destroy(&imo->imo_lock, ifa_mtx_grp);
if (!(imo->imo_debug & IFD_ALLOC)) {
panic("%s: imo %p cannot be freed", __func__, imo);
}
zfree(imo_zone, imo);
}
static void
imo_trace(struct ip_moptions *imo, int refhold)
{
struct ip_moptions_dbg *imo_dbg = (struct ip_moptions_dbg *)imo;
ctrace_t *tr;
u_int32_t idx;
u_int16_t *cnt;
if (!(imo->imo_debug & IFD_DEBUG)) {
panic("%s: imo %p has no debug structure", __func__, imo);
}
if (refhold) {
cnt = &imo_dbg->imo_refhold_cnt;
tr = imo_dbg->imo_refhold;
} else {
cnt = &imo_dbg->imo_refrele_cnt;
tr = imo_dbg->imo_refrele;
}
idx = atomic_add_16_ov(cnt, 1) % IMO_TRACE_HIST_SIZE;
ctrace_record(&tr[idx]);
}
struct ip_moptions *
ip_allocmoptions(int how)
{
struct ip_moptions *imo;
imo = (how == M_WAITOK) ? zalloc(imo_zone) : zalloc_noblock(imo_zone);
if (imo != NULL) {
bzero(imo, imo_size);
lck_mtx_init(&imo->imo_lock, ifa_mtx_grp, ifa_mtx_attr);
imo->imo_debug |= IFD_ALLOC;
if (imo_debug != 0) {
imo->imo_debug |= IFD_DEBUG;
imo->imo_trace = imo_trace;
}
IMO_ADDREF(imo);
}
return (imo);
}
static void
ip_mloopback(struct ifnet *srcifp, struct ifnet *origifp, struct mbuf *m,
struct sockaddr_in *dst, int hlen)
{
struct mbuf *copym;
struct ip *ip;
if (lo_ifp == NULL)
return;
copym = m_copym_mode(m, 0, M_COPYALL, M_DONTWAIT, M_COPYM_COPY_HDR);
if (copym != NULL && ((copym->m_flags & M_EXT) || copym->m_len < hlen))
copym = m_pullup(copym, hlen);
if (copym == NULL)
return;
ip = mtod(copym, struct ip *);
#if BYTE_ORDER != BIG_ENDIAN
HTONS(ip->ip_len);
HTONS(ip->ip_off);
#endif
ip->ip_sum = 0;
ip->ip_sum = ip_cksum_hdr_out(copym, hlen);
if (hwcksum_rx) {
copym->m_pkthdr.csum_flags &= ~CSUM_PARTIAL;
copym->m_pkthdr.csum_flags |=
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
copym->m_pkthdr.csum_data = 0xffff;
} else if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
#if BYTE_ORDER != BIG_ENDIAN
NTOHS(ip->ip_len);
#endif
in_delayed_cksum(copym);
#if BYTE_ORDER != BIG_ENDIAN
HTONS(ip->ip_len);
#endif
}
copym->m_pkthdr.rcvif = origifp;
if (srcifp == NULL) {
struct in_ifaddr *ia;
lck_rw_lock_shared(in_ifaddr_rwlock);
TAILQ_FOREACH(ia, INADDR_HASH(ip->ip_src.s_addr), ia_hash) {
IFA_LOCK_SPIN(&ia->ia_ifa);
if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_src.s_addr) {
srcifp = ia->ia_ifp;
IFA_UNLOCK(&ia->ia_ifa);
break;
}
IFA_UNLOCK(&ia->ia_ifa);
}
lck_rw_done(in_ifaddr_rwlock);
}
if (srcifp != NULL)
ip_setsrcifaddr_info(copym, srcifp->if_index, NULL);
ip_setdstifaddr_info(copym, origifp->if_index, NULL);
dlil_output(lo_ifp, PF_INET, copym, NULL, SA(dst), 0, NULL);
}
static struct ifaddr *
in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
{
struct ifaddr *ifa = NULL;
struct in_addr src = ip->ip_src;
struct in_addr dst = ip->ip_dst;
struct ifnet *rt_ifp;
char s_src[MAX_IPv4_STR_LEN], s_dst[MAX_IPv4_STR_LEN];
VERIFY(src.s_addr != INADDR_ANY);
if (ip_select_srcif_debug) {
(void) inet_ntop(AF_INET, &src.s_addr, s_src, sizeof (s_src));
(void) inet_ntop(AF_INET, &dst.s_addr, s_dst, sizeof (s_dst));
}
if (ro->ro_rt != NULL)
RT_LOCK(ro->ro_rt);
rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL;
if (ifscope != IFSCOPE_NONE || ro->ro_rt != NULL) {
unsigned int scope = ifscope;
if (scope == IFSCOPE_NONE) {
scope = rt_ifp->if_index;
if (scope != get_primary_ifscope(AF_INET) &&
ROUTE_UNUSABLE(ro))
scope = get_primary_ifscope(AF_INET);
}
ifa = (struct ifaddr *)ifa_foraddr_scoped(src.s_addr, scope);
if (ifa == NULL && ip->ip_p != IPPROTO_UDP &&
ip->ip_p != IPPROTO_TCP && ipforwarding) {
ifa = (struct ifaddr *)ifa_foraddr(src.s_addr);
if (ifa != NULL) {
IFA_REMREF(ifa);
ifa = NULL;
ifscope = IFSCOPE_NONE;
}
}
if (ip_select_srcif_debug && ifa != NULL) {
if (ro->ro_rt != NULL) {
printf("%s->%s ifscope %d->%d ifa_if %s "
"ro_if %s\n", s_src, s_dst, ifscope,
scope, if_name(ifa->ifa_ifp),
if_name(rt_ifp));
} else {
printf("%s->%s ifscope %d->%d ifa_if %s\n",
s_src, s_dst, ifscope, scope,
if_name(ifa->ifa_ifp));
}
}
}
if (ifa == NULL && ifscope == IFSCOPE_NONE) {
ifa = (struct ifaddr *)ifa_foraddr(src.s_addr);
if (ifa != NULL && ro->ro_rt == NULL) {
struct rtentry *rt;
struct sockaddr_in sin;
struct ifaddr *oifa = NULL;
bzero(&sin, sizeof (sin));
sin.sin_family = AF_INET;
sin.sin_len = sizeof (sin);
sin.sin_addr = dst;
lck_mtx_lock(rnh_lock);
if ((rt = rt_lookup(TRUE, SA(&sin), NULL,
rt_tables[AF_INET], IFSCOPE_NONE)) != NULL) {
RT_LOCK(rt);
if (ifa->ifa_ifp != rt->rt_ifp) {
oifa = ifa;
ifa = rt->rt_ifa;
IFA_ADDREF(ifa);
RT_UNLOCK(rt);
} else {
RT_UNLOCK(rt);
}
rtfree_locked(rt);
}
lck_mtx_unlock(rnh_lock);
if (oifa != NULL) {
struct ifaddr *iifa;
iifa = (struct ifaddr *)ifa_foraddr_scoped(
src.s_addr, ifa->ifa_ifp->if_index);
if (iifa != NULL) {
IFA_REMREF(oifa);
IFA_REMREF(ifa);
ifa = iifa;
} else if (!ipforwarding ||
(rt->rt_flags & RTF_GATEWAY)) {
IFA_REMREF(ifa);
ifa = oifa;
} else {
IFA_REMREF(oifa);
}
}
} else if (ifa != NULL && ro->ro_rt != NULL &&
!(ro->ro_rt->rt_flags & RTF_GATEWAY) &&
ifa->ifa_ifp != ro->ro_rt->rt_ifp && ipforwarding) {
IFA_REMREF(ifa);
ifa = ro->ro_rt->rt_ifa;
IFA_ADDREF(ifa);
}
if (ip_select_srcif_debug && ifa != NULL) {
printf("%s->%s ifscope %d ifa_if %s\n",
s_src, s_dst, ifscope, if_name(ifa->ifa_ifp));
}
}
if (ro->ro_rt != NULL)
RT_LOCK_ASSERT_HELD(ro->ro_rt);
if (ro->ro_rt != NULL &&
(ifa == NULL || (ifa->ifa_ifp != rt_ifp && rt_ifp != lo_ifp) ||
!(ro->ro_rt->rt_flags & RTF_UP))) {
if (ip_select_srcif_debug) {
if (ifa != NULL) {
printf("%s->%s ifscope %d ro_if %s != "
"ifa_if %s (cached route cleared)\n",
s_src, s_dst, ifscope, if_name(rt_ifp),
if_name(ifa->ifa_ifp));
} else {
printf("%s->%s ifscope %d ro_if %s "
"(no ifa_if found)\n",
s_src, s_dst, ifscope, if_name(rt_ifp));
}
}
RT_UNLOCK(ro->ro_rt);
ROUTE_RELEASE(ro);
if (IN_LINKLOCAL(ntohl(dst.s_addr)) &&
!IN_LINKLOCAL(ntohl(src.s_addr)) && ifa != NULL) {
IFA_REMREF(ifa);
ifa = NULL;
}
}
if (ip_select_srcif_debug && ifa == NULL) {
printf("%s->%s ifscope %d (neither ro_if/ifa_if found)\n",
s_src, s_dst, ifscope);
}
if (ro->ro_rt != NULL && (!IN_LINKLOCAL(ntohl(dst.s_addr)) ||
(ro->ro_rt->rt_gateway->sa_family == AF_LINK &&
SDL(ro->ro_rt->rt_gateway)->sdl_alen != 0))) {
if (ifa != NULL)
IFA_ADDREF(ifa);
if (ro->ro_srcia != NULL)
IFA_REMREF(ro->ro_srcia);
ro->ro_srcia = ifa;
ro->ro_flags |= ROF_SRCIF_SELECTED;
RT_GENID_SYNC(ro->ro_rt);
}
if (ro->ro_rt != NULL)
RT_UNLOCK(ro->ro_rt);
return (ifa);
}
void
ip_output_checksum(struct ifnet *ifp, struct mbuf *m, int hlen, int ip_len,
uint32_t *sw_csum)
{
int tso = TSO_IPV4_OK(ifp, m);
uint32_t hwcap = ifp->if_hwassist;
m->m_pkthdr.csum_flags |= CSUM_IP;
if (!hwcksum_tx) {
*sw_csum = (CSUM_DELAY_DATA | CSUM_DELAY_IP) &
m->m_pkthdr.csum_flags;
} else {
*sw_csum = m->m_pkthdr.csum_flags &
~IF_HWASSIST_CSUM_FLAGS(hwcap);
}
if (hlen != sizeof (struct ip)) {
*sw_csum |= ((CSUM_DELAY_DATA | CSUM_DELAY_IP) &
m->m_pkthdr.csum_flags);
} else if (!(*sw_csum & CSUM_DELAY_DATA) && (hwcap & CSUM_PARTIAL)) {
if (hwcksum_tx && !tso &&
(m->m_pkthdr.csum_flags & CSUM_TCP) &&
ip_len <= ifp->if_mtu) {
uint16_t start = sizeof (struct ip);
uint16_t ulpoff = m->m_pkthdr.csum_data & 0xffff;
m->m_pkthdr.csum_flags |=
(CSUM_DATA_VALID | CSUM_PARTIAL);
m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
m->m_pkthdr.csum_tx_start = start;
*sw_csum = CSUM_DELAY_IP;
} else {
*sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags);
}
}
if (*sw_csum & CSUM_DELAY_DATA) {
in_delayed_cksum(m);
*sw_csum &= ~CSUM_DELAY_DATA;
}
if (hwcksum_tx) {
m->m_pkthdr.csum_flags =
((m->m_pkthdr.csum_flags &
(IF_HWASSIST_CSUM_FLAGS(hwcap) | CSUM_DATA_VALID)) |
(m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_MASK));
} else {
m->m_pkthdr.csum_flags = 0;
}
}
int
ip_gre_output(struct mbuf *m)
{
struct route ro;
int error;
bzero(&ro, sizeof (ro));
error = ip_output(m, NULL, &ro, 0, NULL, NULL);
ROUTE_RELEASE(&ro);
return (error);
}