#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/sysctl.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <kern/locks.h>
#include <kern/cpu_number.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_pcb.h>
#if INET6
#include <netinet6/in6_pcb.h>
#endif
#include <netinet/ip_var.h>
#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcpip.h>
#if TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
#include <sys/kdebug.h>
#define DBG_FNC_TCP_FAST NETDBG_CODE(DBG_NETTCP, (5 << 8))
#define DBG_FNC_TCP_SLOW NETDBG_CODE(DBG_NETTCP, (5 << 8) | 1)
static int
sysctl_msec_to_ticks SYSCTL_HANDLER_ARGS
{
int error, s, tt;
tt = *(int *)oidp->oid_arg1;
s = tt * 1000 / hz;
error = sysctl_handle_int(oidp, &s, 0, req);
if (error || !req->newptr)
return (error);
tt = s * hz / 1000;
if (tt < 1)
return (EINVAL);
*(int *)oidp->oid_arg1 = tt;
return (0);
}
int tcp_keepinit;
SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
&tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "");
int tcp_keepidle;
SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
&tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "");
int tcp_keepintvl;
SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
&tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "");
int tcp_delacktime;
SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime,
CTLTYPE_INT|CTLFLAG_RW, &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
"Time before a delayed ACK is sent");
int tcp_msl;
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
&tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
static int always_keepalive = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
&always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
static int tcp_keepcnt = TCPTV_KEEPCNT;
int tcp_maxpersistidle;
int tcp_maxidle;
struct inpcbhead time_wait_slots[N_TIME_WAIT_SLOTS];
int cur_tw_slot = 0;
u_long *delack_bitmask;
void add_to_time_wait_locked(tp)
struct tcpcb *tp;
{
int tw_slot;
#if 0
lck_mtx_assert(tp->t_inpcb->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
#endif
LIST_REMOVE(tp->t_inpcb, inp_list);
if (tp->t_timer[TCPT_2MSL] == 0)
tp->t_timer[TCPT_2MSL] = 1;
tp->t_rcvtime += tp->t_timer[TCPT_2MSL] & (N_TIME_WAIT_SLOTS - 1);
tw_slot = (tp->t_timer[TCPT_2MSL] & (N_TIME_WAIT_SLOTS - 1)) + cur_tw_slot;
if (tw_slot >= N_TIME_WAIT_SLOTS)
tw_slot -= N_TIME_WAIT_SLOTS;
LIST_INSERT_HEAD(&time_wait_slots[tw_slot], tp->t_inpcb, inp_list);
}
void add_to_time_wait(tp)
struct tcpcb *tp;
{
struct inpcbinfo *pcbinfo = &tcbinfo;
if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) {
tcp_unlock(tp->t_inpcb->inp_socket, 0, 0);
lck_rw_lock_exclusive(pcbinfo->mtx);
tcp_lock(tp->t_inpcb->inp_socket, 0, 0);
}
add_to_time_wait_locked(tp);
lck_rw_done(pcbinfo->mtx);
}
void
tcp_fasttimo()
{
struct inpcb *inp, *inpnxt;
register struct tcpcb *tp;
struct inpcbinfo *pcbinfo = &tcbinfo;
int delack_checked = 0, delack_done = 0;
KERNEL_DEBUG(DBG_FNC_TCP_FAST | DBG_FUNC_START, 0,0,0,0,0);
if (tcp_delack_enabled == 0)
return;
lck_rw_lock_shared(pcbinfo->mtx);
for (inp = tcb.lh_first; inp != NULL; inp = inpnxt) {
inpnxt = inp->inp_list.le_next;
if ((tp = (struct tcpcb *)inp->inp_ppcb) && (tp->t_flags & TF_DELACK)) {
if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
continue;
tcp_lock(inp->inp_socket, 1, 0);
if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
tcp_unlock(inp->inp_socket, 1, 0);
continue;
}
if (tp->t_flags & TF_DELACK) {
delack_done++;
tp->t_flags &= ~TF_DELACK;
tp->t_flags |= TF_ACKNOW;
tcpstat.tcps_delack++;
(void) tcp_output(tp);
}
tcp_unlock(inp->inp_socket, 1, 0);
}
}
KERNEL_DEBUG(DBG_FNC_TCP_FAST | DBG_FUNC_END, delack_checked, delack_done, tcpstat.tcps_delack,0,0);
lck_rw_done(pcbinfo->mtx);
}
void
tcp_slowtimo()
{
struct inpcb *inp, *inpnxt;
struct tcpcb *tp;
struct socket *so;
int i;
#if TCPDEBUG
int ostate;
#endif
#if KDEBUG
static int tws_checked;
#endif
struct inpcbinfo *pcbinfo = &tcbinfo;
KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_START, 0,0,0,0,0);
tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
lck_rw_lock_shared(pcbinfo->mtx);
for (inp = tcb.lh_first; inp != NULL; inp = inpnxt) {
inpnxt = inp->inp_list.le_next;
if (in_pcb_checkstate(inp, WNT_ACQUIRE,0) == WNT_STOPUSING)
continue;
so = inp->inp_socket;
tcp_lock(so, 1, 0);
if ((in_pcb_checkstate(inp, WNT_RELEASE,1) == WNT_STOPUSING) && so->so_usecount == 1) {
tcp_unlock(so, 1, 0);
continue;
}
tp = intotcpcb(inp);
if (tp == 0 || tp->t_state == TCPS_LISTEN) {
tcp_unlock(so, 1, 0);
continue;
}
if (tp->t_state == TCP_NSTATES) {
tcp_unlock(so, 1, 0);
continue;
}
for (i = 0; i < TCPT_NTIMERS; i++) {
if (tp->t_timer[i] && --tp->t_timer[i] == 0) {
#if TCPDEBUG
ostate = tp->t_state;
#endif
tp = tcp_timers(tp, i);
if (tp == NULL)
goto tpgone;
#if TCPDEBUG
if (tp->t_inpcb->inp_socket->so_options
& SO_DEBUG)
tcp_trace(TA_USER, ostate, tp,
(void *)0,
(struct tcphdr *)0,
PRU_SLOWTIMO);
#endif
}
}
tp->t_rcvtime++;
tp->t_starttime++;
if (tp->t_rtttime)
tp->t_rtttime++;
tpgone:
tcp_unlock(so, 1, 0);
}
#if KDEBUG
tws_checked = 0;
#endif
KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_NONE, tws_checked,0,0,0,0);
for (inp = time_wait_slots[cur_tw_slot].lh_first; inp; inp = inpnxt)
{
inpnxt = inp->inp_list.le_next;
#if KDEBUG
tws_checked++;
#endif
if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
continue;
tcp_lock(inp->inp_socket, 1, 0);
if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING)
goto twunlock;
tp = intotcpcb(inp);
if (tp == NULL) {
#if TEMPDEBUG
printf("tcp_slowtimo: tp is null in time-wait slot!\n");
#endif
goto twunlock;
}
if (tp->t_timer[TCPT_2MSL] >= N_TIME_WAIT_SLOTS) {
tp->t_timer[TCPT_2MSL] -= N_TIME_WAIT_SLOTS;
tp->t_rcvtime += N_TIME_WAIT_SLOTS;
}
else
tp->t_timer[TCPT_2MSL] = 0;
if (tp->t_timer[TCPT_2MSL] == 0)
tp = tcp_timers(tp, TCPT_2MSL);
twunlock:
tcp_unlock(inp->inp_socket, 1, 0);
}
if (lck_rw_lock_shared_to_exclusive(pcbinfo->mtx) != 0)
lck_rw_lock_exclusive(pcbinfo->mtx);
for (inp = tcb.lh_first; inp != NULL; inp = inpnxt) {
inpnxt = inp->inp_list.le_next;
if (inp->inp_socket == &tcbinfo.nat_dummy_socket)
continue;
if (inp->inp_wantcnt != WNT_STOPUSING)
continue;
so = inp->inp_socket;
if (!lck_mtx_try_lock(inp->inpcb_mtx)) {
#if TEMPDEBUG
printf("tcp_slowtimo so=%x STOPUSING but locked...\n", so);
#endif
continue;
}
if (so->so_usecount == 0)
in_pcbdispose(inp);
else {
tp = intotcpcb(inp);
if ((so->so_usecount == 1) && (tp->t_state == TCPS_CLOSED) &&
(so->so_head != NULL) && (so->so_state & SS_INCOMP)) {
so->so_usecount--;
in_pcbdispose(inp);
} else
lck_mtx_unlock(inp->inpcb_mtx);
}
}
for (inp = time_wait_slots[cur_tw_slot].lh_first; inp; inp = inpnxt)
{
inpnxt = inp->inp_list.le_next;
if (inp->inp_wantcnt != WNT_STOPUSING)
continue;
so = inp->inp_socket;
if (!lck_mtx_try_lock(inp->inpcb_mtx))
continue;
if (so->so_usecount == 0)
in_pcbdispose(inp);
else {
tp = intotcpcb(inp);
if ((so->so_usecount == 1) && (tp->t_state == TCPS_CLOSED) &&
(so->so_head != NULL) && (so->so_state & SS_INCOMP)) {
so->so_usecount--;
in_pcbdispose(inp);
} else
lck_mtx_unlock(inp->inpcb_mtx);
}
}
tcp_now++;
if (++cur_tw_slot >= N_TIME_WAIT_SLOTS)
cur_tw_slot = 0;
lck_rw_done(pcbinfo->mtx);
KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_END, tws_checked, cur_tw_slot,0,0,0);
}
void
tcp_canceltimers(tp)
struct tcpcb *tp;
{
register int i;
for (i = 0; i < TCPT_NTIMERS; i++)
tp->t_timer[i] = 0;
}
int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
{ 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
int tcp_backoff[TCP_MAXRXTSHIFT + 1] =
{ 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
static int tcp_totbackoff = 511;
struct tcpcb *
tcp_timers(tp, timer)
register struct tcpcb *tp;
int timer;
{
register int rexmt;
struct socket *so_tmp;
struct tcptemp *t_template;
#if TCPDEBUG
int ostate;
#endif
#if INET6
int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV4) == 0;
#endif
so_tmp = tp->t_inpcb->inp_socket;
switch (timer) {
case TCPT_2MSL:
if (tp->t_state != TCPS_TIME_WAIT &&
tp->t_rcvtime <= tcp_maxidle) {
tp->t_timer[TCPT_2MSL] = (unsigned long)tcp_keepintvl;
add_to_time_wait_locked(tp);
}
else {
tp = tcp_close(tp);
return(tp);
}
break;
case TCPT_REXMT:
if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
tp->t_rxtshift = TCP_MAXRXTSHIFT;
tcpstat.tcps_timeoutdrop++;
tp = tcp_drop(tp, tp->t_softerror ?
tp->t_softerror : ETIMEDOUT);
postevent(so_tmp, 0, EV_TIMEOUT);
break;
}
if (tp->t_rxtshift == 1) {
tp->snd_cwnd_prev = tp->snd_cwnd;
tp->snd_ssthresh_prev = tp->snd_ssthresh;
tp->t_badrxtwin = tcp_now + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
}
tcpstat.tcps_rexmttimeo++;
if (tp->t_state == TCPS_SYN_SENT)
rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
else
rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
TCPT_RANGESET(tp->t_rxtcur, rexmt,
tp->t_rttmin, TCPTV_REXMTMAX);
tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3))
tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_REQ_CC);
if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
#if INET6
if (isipv6)
in6_losing(tp->t_inpcb);
else
#endif
in_losing(tp->t_inpcb);
tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
tp->t_srtt = 0;
}
tp->snd_nxt = tp->snd_una;
tp->snd_recover = tp->snd_max;
tp->t_flags |= TF_ACKNOW;
tp->t_rtttime = 0;
{
u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
if (win < 2)
win = 2;
tp->snd_cwnd = tp->t_maxseg;
tp->snd_ssthresh = win * tp->t_maxseg;
tp->t_dupacks = 0;
}
(void) tcp_output(tp);
break;
case TCPT_PERSIST:
tcpstat.tcps_persisttimeo++;
if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
(tp->t_rcvtime >= tcp_maxpersistidle ||
tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
tcpstat.tcps_persistdrop++;
so_tmp = tp->t_inpcb->inp_socket;
tp = tcp_drop(tp, ETIMEDOUT);
postevent(so_tmp, 0, EV_TIMEOUT);
break;
}
tcp_setpersist(tp);
tp->t_force = 1;
(void) tcp_output(tp);
tp->t_force = 0;
break;
case TCPT_KEEP:
tcpstat.tcps_keeptimeo++;
if (tp->t_state < TCPS_ESTABLISHED)
goto dropit;
if ((always_keepalive ||
tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) &&
tp->t_state <= TCPS_CLOSING) {
if (tp->t_rcvtime >= TCP_KEEPIDLE(tp) + (unsigned long)tcp_maxidle)
goto dropit;
tcpstat.tcps_keepprobe++;
t_template = tcp_maketemplate(tp);
if (t_template) {
tcp_respond(tp, t_template->tt_ipgen,
&t_template->tt_t, (struct mbuf *)NULL,
tp->rcv_nxt, tp->snd_una - 1, 0);
(void) m_free(dtom(t_template));
}
tp->t_timer[TCPT_KEEP] = tcp_keepintvl;
} else
tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp);
break;
#if TCPDEBUG
if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
PRU_SLOWTIMO);
#endif
dropit:
tcpstat.tcps_keepdrops++;
tp = tcp_drop(tp, ETIMEDOUT);
postevent(so_tmp, 0, EV_TIMEOUT);
break;
}
return (tp);
}