Blame examples/tcp_cubic-better-follow-cubic-curve-converted.patch

Packit c71e3f
The original patch changes the initialization of 'cubictcp' instance of
Packit c71e3f
struct tcp_congestion_ops ('cubictcp.cwnd_event' field). Kpatch
Packit c71e3f
intentionally rejects to process such changes.
Packit c71e3f
Packit c71e3f
This modification of the patch uses Kpatch load/unload hooks to set
Packit c71e3f
'cubictcp.cwnd_event' when the binary patch is loaded and reset it to NULL
Packit c71e3f
when the patch is unloaded.
Packit c71e3f
Packit c71e3f
It is still needed to check if changing that field could be problematic
Packit c71e3f
due to concurrency issues, etc.
Packit c71e3f
Packit c71e3f
'cwnd_event' callback is used only via tcp_ca_event() function.
Packit c71e3f
Packit c71e3f
include/net/tcp.h:
Packit c71e3f
Packit c71e3f
static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
Packit c71e3f
{
Packit c71e3f
	const struct inet_connection_sock *icsk = inet_csk(sk);
Packit c71e3f
Packit c71e3f
	if (icsk->icsk_ca_ops->cwnd_event)
Packit c71e3f
		icsk->icsk_ca_ops->cwnd_event(sk, event);
Packit c71e3f
}
Packit c71e3f
Packit c71e3f
In turn, tcp_ca_event() is called in a number of places in
Packit c71e3f
net/ipv4/tcp_output.c and net/ipv4/tcp_input.c.
Packit c71e3f
Packit c71e3f
One problem with this modification of the patch is that it may not be safe
Packit c71e3f
to unload it. If it is possible for tcp_ca_event() to run concurrently with
Packit c71e3f
the unloading of the patch, it may happen that 'icsk->icsk_ca_ops->cwnd_event'
Packit c71e3f
is the address of bictcp_cwnd_event() when tcp_ca_event() checks it but is
Packit c71e3f
set to NULL right after. So 'icsk->icsk_ca_ops->cwnd_event(sk, event)' would
Packit c71e3f
result in a kernel oops.
Packit c71e3f
Packit c71e3f
Whether such scenario is possible or not, it should be analyzed. If it is,
Packit c71e3f
then at least, the body of tcp_ca_event() should be made atomic w.r.t.
Packit c71e3f
changing 'cwnd_event' in the patch somehow. Perhaps, RCU could be suitable
Packit c71e3f
for that: a read-side critical section for the body of tcp_ca_event() with
Packit c71e3f
a single read of icsk->icsk_ca_ops->cwnd_event pointer with rcu_dereference().
Packit c71e3f
The pointer could be set by the patch with rcu_assign_pointer().
Packit c71e3f
Packit c71e3f
An alternative suggested by Josh Poimboeuf would be to patch the functions
Packit c71e3f
that call 'cwnd_event' callback (tcp_ca_event() in this case) so that they
Packit c71e3f
call bictcp_cwnd_event() directly when they detect the cubictcp struct [1].
Packit c71e3f
Packit c71e3f
Note that tcp_ca_event() is inlined in a number of places, so the binary
Packit c71e3f
patch will provide replacements for all of the corresponding functions
Packit c71e3f
rather than for just one. It is still needed to check if replacing these
Packit c71e3f
functions in runtime is safe.
Packit c71e3f
Packit c71e3f
References:
Packit c71e3f
[1] https://www.redhat.com/archives/kpatch/2015-September/msg00005.html
Packit c71e3f
Packit c71e3f
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
Packit c71e3f
index 894b7ce..9bff8a0 100644
Packit c71e3f
--- a/net/ipv4/tcp_cubic.c
Packit c71e3f
+++ b/net/ipv4/tcp_cubic.c
Packit c71e3f
@@ -153,6 +153,27 @@ static void bictcp_init(struct sock *sk)
Packit c71e3f
 		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
Packit c71e3f
 }
Packit c71e3f
Packit c71e3f
+static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
Packit c71e3f
+{
Packit c71e3f
+	if (event == CA_EVENT_TX_START) {
Packit c71e3f
+		struct bictcp *ca = inet_csk_ca(sk);
Packit c71e3f
+		u32 now = tcp_time_stamp;
Packit c71e3f
+		s32 delta;
Packit c71e3f
+
Packit c71e3f
+		delta = now - tcp_sk(sk)->lsndtime;
Packit c71e3f
+
Packit c71e3f
+		/* We were application limited (idle) for a while.
Packit c71e3f
+		 * Shift epoch_start to keep cwnd growth to cubic curve.
Packit c71e3f
+		 */
Packit c71e3f
+		if (ca->epoch_start && delta > 0) {
Packit c71e3f
+			ca->epoch_start += delta;
Packit c71e3f
+			if (after(ca->epoch_start, now))
Packit c71e3f
+				ca->epoch_start = now;
Packit c71e3f
+		}
Packit c71e3f
+		return;
Packit c71e3f
+	}
Packit c71e3f
+}
Packit c71e3f
+
Packit c71e3f
 /* calculate the cubic root of x using a table lookup followed by one
Packit c71e3f
  * Newton-Raphson iteration.
Packit c71e3f
  * Avg err ~= 0.195%
Packit c71e3f
@@ -444,6 +465,20 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
Packit c71e3f
 	.name		= "cubic",
Packit c71e3f
 };
Packit c71e3f
Packit c71e3f
+void kpatch_load_cubictcp_cwnd_event(void)
Packit c71e3f
+{
Packit c71e3f
+	cubictcp.cwnd_event = bictcp_cwnd_event;
Packit c71e3f
+}
Packit c71e3f
+
Packit c71e3f
+void kpatch_unload_cubictcp_cwnd_event(void)
Packit c71e3f
+{
Packit c71e3f
+	cubictcp.cwnd_event = NULL;
Packit c71e3f
+}
Packit c71e3f
+
Packit c71e3f
+#include "kpatch-macros.h"
Packit c71e3f
+KPATCH_LOAD_HOOK(kpatch_load_cubictcp_cwnd_event);
Packit c71e3f
+KPATCH_UNLOAD_HOOK(kpatch_unload_cubictcp_cwnd_event);
Packit c71e3f
+
Packit c71e3f
 static int __init cubictcp_register(void)
Packit c71e3f
 {
Packit c71e3f
 	BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);