#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/protosw.h>
#include <sys/socketvar.h>
#include <sys/syslog.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#if INET6
#include <netinet/ip6.h>
#endif
#include <netinet/ip_var.h>
#include <netinet/tcp.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_cc.h>
#include <netinet/tcpip.h>
#include <netinet/tcp_seq.h>
#include <kern/task.h>
#include <libkern/OSAtomic.h>
static int tcp_cubic_init(struct tcpcb *tp);
static int tcp_cubic_cleanup(struct tcpcb *tp);
static void tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp);
static void tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th);
static void tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th);
static void tcp_cubic_pre_fr(struct tcpcb *tp);
static void tcp_cubic_post_fr(struct tcpcb *tp, struct tcphdr *th);
static void tcp_cubic_after_timeout(struct tcpcb *tp);
static int tcp_cubic_delay_ack(struct tcpcb *tp, struct tcphdr *th);
static void tcp_cubic_switch_cc(struct tcpcb *tp, u_int16_t old_index);
static uint32_t tcp_cubic_update(struct tcpcb *tp, u_int32_t rtt);
static uint32_t tcp_cubic_tcpwin(struct tcpcb *tp, struct tcphdr *th);
static inline void tcp_cubic_clear_state(struct tcpcb *tp);
extern float cbrtf(float x);
struct tcp_cc_algo tcp_cc_cubic = {
.name = "cubic",
.init = tcp_cubic_init,
.cleanup = tcp_cubic_cleanup,
.cwnd_init = tcp_cubic_cwnd_init_or_reset,
.congestion_avd = tcp_cubic_congestion_avd,
.ack_rcvd = tcp_cubic_ack_rcvd,
.pre_fr = tcp_cubic_pre_fr,
.post_fr = tcp_cubic_post_fr,
.after_idle = tcp_cubic_cwnd_init_or_reset,
.after_timeout = tcp_cubic_after_timeout,
.delay_ack = tcp_cubic_delay_ack,
.switch_to = tcp_cubic_switch_cc
};
const float tcp_cubic_backoff = 0.2;
const float tcp_cubic_coeff = 0.4;
const float tcp_cubic_fast_convergence_factor = 0.875;
static int tcp_cubic_tcp_friendliness = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, cubic_tcp_friendliness,
CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_cubic_tcp_friendliness, 0,
"Enable TCP friendliness");
static int tcp_cubic_fast_convergence = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, cubic_fast_convergence,
CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_cubic_fast_convergence, 0,
"Enable fast convergence");
static int tcp_cubic_use_minrtt = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, cubic_use_minrtt,
CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_cubic_use_minrtt, 0,
"use a min of 5 sec rtt");
static int tcp_cubic_init(struct tcpcb *tp)
{
OSIncrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets);
VERIFY(tp->t_ccstate != NULL);
tcp_cubic_clear_state(tp);
return (0);
}
static int tcp_cubic_cleanup(struct tcpcb *tp)
{
#pragma unused(tp)
OSDecrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets);
return (0);
}
static void tcp_cubic_cwnd_init_or_reset(struct tcpcb *tp)
{
VERIFY(tp->t_ccstate != NULL);
tcp_cubic_clear_state(tp);
tcp_cc_cwnd_init_or_reset(tp);
if (tp->t_inpcb->inp_stat->txbytes <= TCP_CC_CWND_INIT_BYTES
&& tp->snd_ssthresh < (TCP_MAXWIN << TCP_MAX_WINSHIFT))
tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
tp->t_ccstate->cub_last_max = tp->snd_ssthresh;
tcp_cc_after_idle_stretchack(tp);
}
static uint32_t
tcp_cubic_update(struct tcpcb *tp, u_int32_t rtt)
{
float K, var;
u_int32_t elapsed_time, win;
win = min(tp->snd_cwnd, tp->snd_wnd);
if (tp->t_ccstate->cub_last_max == 0)
tp->t_ccstate->cub_last_max = tp->snd_ssthresh;
if (tp->t_ccstate->cub_epoch_start == 0) {
tp->t_ccstate->cub_epoch_start = tcp_now;
if (tp->t_ccstate->cub_epoch_start == 0)
tp->t_ccstate->cub_epoch_start = 1;
if (win < tp->t_ccstate->cub_last_max) {
VERIFY(current_task() == kernel_task);
K = (tp->t_ccstate->cub_last_max - win)
/ tp->t_maxseg / tcp_cubic_coeff;
K = cbrtf(K);
tp->t_ccstate->cub_epoch_period = K * TCP_RETRANSHZ;
tp->t_ccstate->cub_origin_point =
tp->t_ccstate->cub_last_max;
} else {
tp->t_ccstate->cub_epoch_period = 0;
tp->t_ccstate->cub_origin_point = win;
}
tp->t_ccstate->cub_target_win = 0;
}
VERIFY(tp->t_ccstate->cub_origin_point > 0);
elapsed_time = timer_diff(tcp_now, 0,
tp->t_ccstate->cub_epoch_start, 0);
if (tcp_cubic_use_minrtt)
elapsed_time += max(tcp_cubic_use_minrtt, rtt);
else
elapsed_time += rtt;
var = (elapsed_time - tp->t_ccstate->cub_epoch_period) / TCP_RETRANSHZ;
var = var * var * var * (tcp_cubic_coeff * tp->t_maxseg);
tp->t_ccstate->cub_target_win = tp->t_ccstate->cub_origin_point + var;
return (tp->t_ccstate->cub_target_win);
}
#define TCP_CUBIC_ENABLE_TCPMODE(_tp_) \
((!soissrcrealtime((_tp_)->t_inpcb->inp_socket) && \
(_tp_)->t_ccstate->cub_mean_dev > (tp->t_maxseg << 1)) ? 1 : 0)
static uint32_t
tcp_cubic_tcpwin(struct tcpcb *tp, struct tcphdr *th)
{
if (tp->t_ccstate->cub_tcp_win == 0) {
tp->t_ccstate->cub_tcp_win = min(tp->snd_cwnd, tp->snd_wnd);
tp->t_ccstate->cub_tcp_bytes_acked = 0;
} else {
tp->t_ccstate->cub_tcp_bytes_acked +=
BYTES_ACKED(th, tp);
if (tp->t_ccstate->cub_tcp_bytes_acked >=
tp->t_ccstate->cub_tcp_win) {
tp->t_ccstate->cub_tcp_bytes_acked -=
tp->t_ccstate->cub_tcp_win;
tp->t_ccstate->cub_tcp_win += tp->t_maxseg;
}
}
return (tp->t_ccstate->cub_tcp_win);
}
static void
tcp_cubic_congestion_avd(struct tcpcb *tp, struct tcphdr *th)
{
u_int32_t cubic_target_win, tcp_win, rtt;
tp->t_bytes_acked += BYTES_ACKED(th, tp);
rtt = get_base_rtt(tp);
cubic_target_win = tcp_cubic_update(tp, rtt);
tcp_win = tcp_cubic_tcpwin(tp, th);
if (tp->snd_cwnd < tcp_win &&
(tcp_cubic_tcp_friendliness == 1 ||
TCP_CUBIC_ENABLE_TCPMODE(tp))) {
if (tp->t_bytes_acked >= tp->snd_cwnd) {
tp->t_bytes_acked -= tp->snd_cwnd;
tp->snd_cwnd = min(tcp_win, TCP_MAXWIN << tp->snd_scale);
}
} else {
if (cubic_target_win > tp->snd_cwnd) {
u_int64_t incr_win;
incr_win = tp->snd_cwnd * tp->t_maxseg;
incr_win /= (cubic_target_win - tp->snd_cwnd);
if (incr_win > 0 &&
tp->t_bytes_acked >= incr_win) {
tp->t_bytes_acked -= incr_win;
tp->snd_cwnd =
min((tp->snd_cwnd + tp->t_maxseg),
TCP_MAXWIN << tp->snd_scale);
}
}
}
}
static void
tcp_cubic_ack_rcvd(struct tcpcb *tp, struct tcphdr *th)
{
if (tp->snd_cwnd >= tp->snd_ssthresh) {
tcp_cubic_congestion_avd(tp, th);
} else {
uint32_t acked, abc_lim, incr;
acked = BYTES_ACKED(th, tp);
abc_lim = (tcp_do_rfc3465_lim2 &&
tp->snd_nxt == tp->snd_max) ?
2 * tp->t_maxseg : tp->t_maxseg;
incr = min(acked, abc_lim);
tp->snd_cwnd += incr;
tp->snd_cwnd = min(tp->snd_cwnd,
TCP_MAXWIN << tp->snd_scale);
}
}
static void
tcp_cubic_pre_fr(struct tcpcb *tp)
{
uint32_t win, avg;
int32_t dev;
tp->t_ccstate->cub_epoch_start = 0;
tp->t_ccstate->cub_tcp_win = 0;
tp->t_ccstate->cub_target_win = 0;
tp->t_ccstate->cub_tcp_bytes_acked = 0;
win = min(tp->snd_cwnd, tp->snd_wnd);
if (win < tp->t_ccstate->cub_last_max &&
tcp_cubic_fast_convergence == 1)
tp->t_ccstate->cub_last_max = win *
tcp_cubic_fast_convergence_factor;
else
tp->t_ccstate->cub_last_max = win;
if (tp->t_ccstate->cub_last_max == 0) {
tp->t_ccstate->cub_last_max = tp->snd_max - tp->snd_una;
}
if (tp->t_ccstate->cub_avg_lastmax == 0) {
tp->t_ccstate->cub_avg_lastmax = tp->t_ccstate->cub_last_max;
} else {
avg = tp->t_ccstate->cub_avg_lastmax;
avg = (avg << 6) - avg;
tp->t_ccstate->cub_avg_lastmax =
(avg + tp->t_ccstate->cub_last_max) >> 6;
}
dev = tp->t_ccstate->cub_avg_lastmax - tp->t_ccstate->cub_last_max;
if (dev < 0)
dev = -dev;
if (tp->t_ccstate->cub_mean_dev == 0) {
tp->t_ccstate->cub_mean_dev = dev;
} else {
dev = dev + ((tp->t_ccstate->cub_mean_dev << 4)
- tp->t_ccstate->cub_mean_dev);
tp->t_ccstate->cub_mean_dev = dev >> 4;
}
win = win - (win * tcp_cubic_backoff);
win = (win / tp->t_maxseg);
if (win < 2)
win = 2;
tp->snd_ssthresh = win * tp->t_maxseg;
tcp_cc_resize_sndbuf(tp);
}
static void
tcp_cubic_post_fr(struct tcpcb *tp, struct tcphdr *th)
{
uint32_t flight_size = 0;
if (SEQ_LEQ(th->th_ack, tp->snd_max))
flight_size = tp->snd_max - th->th_ack;
if (flight_size < tp->snd_ssthresh)
tp->snd_cwnd = max(flight_size, tp->t_maxseg)
+ tp->t_maxseg;
else
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_ccstate->cub_tcp_win = 0;
tp->t_ccstate->cub_target_win = 0;
tp->t_ccstate->cub_tcp_bytes_acked = 0;
}
static void
tcp_cubic_after_timeout(struct tcpcb *tp)
{
VERIFY(tp->t_ccstate != NULL);
if (!IN_FASTRECOVERY(tp)) {
tcp_cubic_clear_state(tp);
tcp_cubic_pre_fr(tp);
}
tp->snd_cwnd = tp->t_maxseg;
}
static int
tcp_cubic_delay_ack(struct tcpcb *tp, struct tcphdr *th)
{
return (tcp_cc_delay_ack(tp, th));
}
static void
tcp_cubic_switch_cc(struct tcpcb *tp, uint16_t old_cc_index)
{
#pragma unused(old_cc_index)
tcp_cubic_cwnd_init_or_reset(tp);
tp->t_bytes_acked = 0;
OSIncrementAtomic((volatile SInt32 *)&tcp_cc_cubic.num_sockets);
}
static inline void tcp_cubic_clear_state(struct tcpcb *tp)
{
tp->t_ccstate->cub_last_max = 0;
tp->t_ccstate->cub_epoch_start = 0;
tp->t_ccstate->cub_origin_point = 0;
tp->t_ccstate->cub_tcp_win = 0;
tp->t_ccstate->cub_tcp_bytes_acked = 0;
tp->t_ccstate->cub_epoch_period = 0;
tp->t_ccstate->cub_target_win = 0;
}