diff --git a/Makefile b/Makefile index 5b04041..19bd163 100644 --- a/Makefile +++ b/Makefile @@ -55,7 +55,7 @@ WFLAGS += -Wmissing-declarations -Wold-style-definition -Wformat=2 CFLAGS := $(WFLAGS) $(CCOPTS) -I../include -I../include/uapi $(DEFINES) $(CFLAGS) YACCFLAGS = -d -t -v -SUBDIRS=lib ip tc bridge misc netem genl tipc devlink rdma man +SUBDIRS=lib ip tc bridge misc netem genl tipc devlink rdma dcb man vdpa LIBNETLINK=../lib/libutil.a ../lib/libnetlink.a LDLIBS += $(LIBNETLINK) diff --git a/README b/README index bc82187..fa0c786 100644 --- a/README +++ b/README @@ -28,17 +28,12 @@ The makefile will automatically build a config.mk file which contains definitions of libraries that may or may not be available on the system such as: ATM, ELF, MNL, and SELINUX. -3. To make documentation, cd to doc/ directory , then - look at start of Makefile and set correct values for - PAGESIZE=a4 , ie: a4 , letter ... (string) - PAGESPERPAGE=2 , ie: 1 , 2 ... (numeric) - and make there. It assumes, that latex, dvips and psnup - are in your path. - -4. This package includes matching sanitized kernel headers because - the build environment may not have up to date versions. See Makefile - if you have special requirements and need to point at different - kernel include files. +3. include/uapi + +This package includes matching sanitized kernel headers because +the build environment may not have up to date versions. See Makefile +if you have special requirements and need to point at different +kernel include files. Stephen Hemminger stephen@networkplumber.org diff --git a/bash-completion/devlink b/bash-completion/devlink index f710c88..361be9f 100644 --- a/bash-completion/devlink +++ b/bash-completion/devlink @@ -319,6 +319,57 @@ _devlink_port_split() esac } +# Completion for devlink port param set +_devlink_port_param_set() +{ + case $cword in + 7) + COMPREPLY=( $( compgen -W "value" -- "$cur" ) ) + return + ;; + 8) + # String argument + return + ;; + 9) + COMPREPLY=( $( compgen -W "cmode" -- "$cur" ) ) + return + ;; + 10) + COMPREPLY=( $( compgen -W "runtime driverinit permanent" -- \ + "$cur" ) ) + return + ;; + esac +} + +# Completion for devlink port param +_devlink_port_param() +{ + case "$cword" in + 3) + COMPREPLY=( $( compgen -W "show set" -- "$cur" ) ) + return + ;; + 4) + _devlink_direct_complete "port" + return + ;; + 5) + COMPREPLY=( $( compgen -W "name" -- "$cur" ) ) + return + ;; + 6) + _devlink_direct_complete "param_name" + return + ;; + esac + + if [[ "${words[3]}" == "set" ]]; then + _devlink_port_param_set + fi +} + # Completion for devlink port _devlink_port() { @@ -331,6 +382,10 @@ _devlink_port() _devlink_port_split return ;; + param) + _devlink_port_param + return + ;; show|unsplit) if [[ $cword -eq 3 ]]; then _devlink_direct_complete "port" @@ -635,7 +690,7 @@ _devlink_health_reporter() _devlink_health() { case $command in - show|recover|diagnose|set) + show|recover|diagnose|set|test) _devlink_health_reporter 0 if [[ $command == "set" ]]; then case $cword in diff --git a/bridge/bridge.c b/bridge/bridge.c index 453d689..f7bfe0b 100644 --- a/bridge/bridge.c +++ b/bridge/bridge.c @@ -77,20 +77,14 @@ static int do_cmd(const char *argv0, int argc, char **argv) return -1; } -static int batch(const char *name) +static int br_batch_cmd(int argc, char *argv[], void *data) { - char *line = NULL; - size_t len = 0; - int ret = EXIT_SUCCESS; + return do_cmd(argv[0], argc, argv); +} - if (name && strcmp(name, "-") != 0) { - if (freopen(name, "r", stdin) == NULL) { - fprintf(stderr, - "Cannot open file \"%s\" for reading: %s\n", - name, strerror(errno)); - return EXIT_FAILURE; - } - } +static int batch(const char *name) +{ + int ret; if (rtnl_open(&rth, 0) < 0) { fprintf(stderr, "Cannot open rtnetlink\n"); @@ -99,25 +93,7 @@ static int batch(const char *name) rtnl_set_strict_dump(&rth); - cmdlineno = 0; - while (getcmdline(&line, &len, stdin) != -1) { - char *largv[100]; - int largc; - - largc = makeargs(line, largv, 100); - if (largc == 0) - continue; /* blank line */ - - if (do_cmd(largv[0], largc, largv)) { - fprintf(stderr, "Command failed %s:%d\n", - name, cmdlineno); - ret = EXIT_FAILURE; - if (!force) - break; - } - } - if (line) - free(line); + ret = do_batch(name, force, br_batch_cmd, NULL); rtnl_close(&rth); return ret; @@ -141,7 +117,7 @@ main(int argc, char **argv) if (matches(opt, "-help") == 0) { usage(); } else if (matches(opt, "-Version") == 0) { - printf("bridge utility, 0.0\n"); + printf("bridge utility, %s\n", version); exit(0); } else if (matches(opt, "-stats") == 0 || matches(opt, "-statistics") == 0) { diff --git a/bridge/link.c b/bridge/link.c index 3bc7af2..d88c469 100644 --- a/bridge/link.c +++ b/bridge/link.c @@ -78,14 +78,6 @@ static void print_portstate(__u8 state) "state (%d) ", state); } -static void print_onoff(FILE *fp, const char *flag, __u8 val) -{ - if (is_json_context()) - print_bool(PRINT_JSON, flag, NULL, val); - else - fprintf(fp, "%s %s ", flag, val ? "on" : "off"); -} - static void print_hwmode(__u16 mode) { if (mode >= ARRAY_SIZE(hw_mode)) @@ -123,38 +115,38 @@ static void print_protinfo(FILE *fp, struct rtattr *attr) fprintf(fp, "%s ", _SL_); if (prtb[IFLA_BRPORT_MODE]) - print_onoff(fp, "hairpin", - rta_getattr_u8(prtb[IFLA_BRPORT_MODE])); + print_on_off(PRINT_ANY, "hairpin", "hairpin %s ", + rta_getattr_u8(prtb[IFLA_BRPORT_MODE])); if (prtb[IFLA_BRPORT_GUARD]) - print_onoff(fp, "guard", - rta_getattr_u8(prtb[IFLA_BRPORT_GUARD])); + print_on_off(PRINT_ANY, "guard", "guard %s ", + rta_getattr_u8(prtb[IFLA_BRPORT_GUARD])); if (prtb[IFLA_BRPORT_PROTECT]) - print_onoff(fp, "root_block", - rta_getattr_u8(prtb[IFLA_BRPORT_PROTECT])); + print_on_off(PRINT_ANY, "root_block", "root_block %s ", + rta_getattr_u8(prtb[IFLA_BRPORT_PROTECT])); if (prtb[IFLA_BRPORT_FAST_LEAVE]) - print_onoff(fp, "fastleave", - rta_getattr_u8(prtb[IFLA_BRPORT_FAST_LEAVE])); + print_on_off(PRINT_ANY, "fastleave", "fastleave %s ", + rta_getattr_u8(prtb[IFLA_BRPORT_FAST_LEAVE])); if (prtb[IFLA_BRPORT_LEARNING]) - print_onoff(fp, "learning", - rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING])); + print_on_off(PRINT_ANY, "learning", "learning %s ", + rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING])); if (prtb[IFLA_BRPORT_LEARNING_SYNC]) - print_onoff(fp, "learning_sync", - rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING_SYNC])); + print_on_off(PRINT_ANY, "learning_sync", "learning_sync %s ", + rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING_SYNC])); if (prtb[IFLA_BRPORT_UNICAST_FLOOD]) - print_onoff(fp, "flood", - rta_getattr_u8(prtb[IFLA_BRPORT_UNICAST_FLOOD])); + print_on_off(PRINT_ANY, "flood", "flood %s ", + rta_getattr_u8(prtb[IFLA_BRPORT_UNICAST_FLOOD])); if (prtb[IFLA_BRPORT_MCAST_FLOOD]) - print_onoff(fp, "mcast_flood", - rta_getattr_u8(prtb[IFLA_BRPORT_MCAST_FLOOD])); + print_on_off(PRINT_ANY, "mcast_flood", "mcast_flood %s ", + rta_getattr_u8(prtb[IFLA_BRPORT_MCAST_FLOOD])); if (prtb[IFLA_BRPORT_MCAST_TO_UCAST]) - print_onoff(fp, "mcast_to_unicast", - rta_getattr_u8(prtb[IFLA_BRPORT_MCAST_TO_UCAST])); + print_on_off(PRINT_ANY, "mcast_to_unicast", "mcast_to_unicast %s ", + rta_getattr_u8(prtb[IFLA_BRPORT_MCAST_TO_UCAST])); if (prtb[IFLA_BRPORT_NEIGH_SUPPRESS]) - print_onoff(fp, "neigh_suppress", - rta_getattr_u8(prtb[IFLA_BRPORT_NEIGH_SUPPRESS])); + print_on_off(PRINT_ANY, "neigh_suppress", "neigh_suppress %s ", + rta_getattr_u8(prtb[IFLA_BRPORT_NEIGH_SUPPRESS])); if (prtb[IFLA_BRPORT_VLAN_TUNNEL]) - print_onoff(fp, "vlan_tunnel", - rta_getattr_u8(prtb[IFLA_BRPORT_VLAN_TUNNEL])); + print_on_off(PRINT_ANY, "vlan_tunnel", "vlan_tunnel %s ", + rta_getattr_u8(prtb[IFLA_BRPORT_VLAN_TUNNEL])); if (prtb[IFLA_BRPORT_BACKUP_PORT]) { int ifidx; @@ -166,8 +158,8 @@ static void print_protinfo(FILE *fp, struct rtattr *attr) } if (prtb[IFLA_BRPORT_ISOLATED]) - print_onoff(fp, "isolated", - rta_getattr_u8(prtb[IFLA_BRPORT_ISOLATED])); + print_on_off(PRINT_ANY, "isolated", "isolated %s ", + rta_getattr_u8(prtb[IFLA_BRPORT_ISOLATED])); } else print_portstate(rta_getattr_u8(attr)); } @@ -275,22 +267,6 @@ static void usage(void) exit(-1); } -static bool on_off(char *arg, __s8 *attr, char *val) -{ - if (strcmp(val, "on") == 0) - *attr = 1; - else if (strcmp(val, "off") == 0) - *attr = 0; - else { - fprintf(stderr, - "Error: argument of \"%s\" must be \"on\" or \"off\"\n", - arg); - return false; - } - - return true; -} - static int brlink_modify(int argc, char **argv) { struct { @@ -323,6 +299,7 @@ static int brlink_modify(int argc, char **argv) __s16 mode = -1; __u16 flags = 0; struct rtattr *nest; + int ret; while (argc > 0) { if (strcmp(*argv, "dev") == 0) { @@ -330,40 +307,49 @@ static int brlink_modify(int argc, char **argv) d = *argv; } else if (strcmp(*argv, "guard") == 0) { NEXT_ARG(); - if (!on_off("guard", &bpdu_guard, *argv)) - return -1; + bpdu_guard = parse_on_off("guard", *argv, &ret); + if (ret) + return ret; } else if (strcmp(*argv, "hairpin") == 0) { NEXT_ARG(); - if (!on_off("hairpin", &hairpin, *argv)) - return -1; + hairpin = parse_on_off("hairpin", *argv, &ret); + if (ret) + return ret; } else if (strcmp(*argv, "fastleave") == 0) { NEXT_ARG(); - if (!on_off("fastleave", &fast_leave, *argv)) - return -1; + fast_leave = parse_on_off("fastleave", *argv, &ret); + if (ret) + return ret; } else if (strcmp(*argv, "root_block") == 0) { NEXT_ARG(); - if (!on_off("root_block", &root_block, *argv)) - return -1; + root_block = parse_on_off("root_block", *argv, &ret); + if (ret) + return ret; } else if (strcmp(*argv, "learning") == 0) { NEXT_ARG(); - if (!on_off("learning", &learning, *argv)) - return -1; + learning = parse_on_off("learning", *argv, &ret); + if (ret) + return ret; } else if (strcmp(*argv, "learning_sync") == 0) { NEXT_ARG(); - if (!on_off("learning_sync", &learning_sync, *argv)) - return -1; + learning_sync = parse_on_off("learning_sync", *argv, &ret); + if (ret) + return ret; } else if (strcmp(*argv, "flood") == 0) { NEXT_ARG(); - if (!on_off("flood", &flood, *argv)) - return -1; + flood = parse_on_off("flood", *argv, &ret); + if (ret) + return ret; } else if (strcmp(*argv, "mcast_flood") == 0) { NEXT_ARG(); - if (!on_off("mcast_flood", &mcast_flood, *argv)) - return -1; + mcast_flood = parse_on_off("mcast_flood", *argv, &ret); + if (ret) + return ret; } else if (strcmp(*argv, "mcast_to_unicast") == 0) { NEXT_ARG(); - if (!on_off("mcast_to_unicast", &mcast_to_unicast, *argv)) - return -1; + mcast_to_unicast = parse_on_off("mcast_to_unicast", *argv, &ret); + if (ret) + return ret; } else if (strcmp(*argv, "cost") == 0) { NEXT_ARG(); cost = atoi(*argv); @@ -404,18 +390,19 @@ static int brlink_modify(int argc, char **argv) flags |= BRIDGE_FLAGS_MASTER; } else if (strcmp(*argv, "neigh_suppress") == 0) { NEXT_ARG(); - if (!on_off("neigh_suppress", &neigh_suppress, - *argv)) - return -1; + neigh_suppress = parse_on_off("neigh_suppress", *argv, &ret); + if (ret) + return ret; } else if (strcmp(*argv, "vlan_tunnel") == 0) { NEXT_ARG(); - if (!on_off("vlan_tunnel", &vlan_tunnel, - *argv)) - return -1; + vlan_tunnel = parse_on_off("vlan_tunnel", *argv, &ret); + if (ret) + return ret; } else if (strcmp(*argv, "isolated") == 0) { NEXT_ARG(); - if (!on_off("isolated", &isolated, *argv)) - return -1; + isolated = parse_on_off("isolated", *argv, &ret); + if (ret) + return ret; } else if (strcmp(*argv, "backup_port") == 0) { NEXT_ARG(); backup_port_idx = ll_name_to_index(*argv); diff --git a/bridge/mdb.c b/bridge/mdb.c index 928ae56..ef89258 100644 --- a/bridge/mdb.c +++ b/bridge/mdb.c @@ -31,7 +31,7 @@ static unsigned int filter_index, filter_vlan; static void usage(void) { fprintf(stderr, - "Usage: bridge mdb { add | del } dev DEV port PORT grp GROUP [permanent | temp] [vid VID]\n" + "Usage: bridge mdb { add | del } dev DEV port PORT grp GROUP [src SOURCE] [permanent | temp] [vid VID]\n" " bridge mdb {show} [ dev DEV ] [ vid VID ]\n"); exit(-1); } @@ -41,15 +41,20 @@ static bool is_temp_mcast_rtr(__u8 type) return type == MDB_RTR_TYPE_TEMP_QUERY || type == MDB_RTR_TYPE_TEMP; } -static const char *format_timer(__u32 ticks) +static const char *format_timer(__u32 ticks, int align) { struct timeval tv; static char tbuf[32]; __jiffies_to_tv(&tv, ticks); - snprintf(tbuf, sizeof(tbuf), "%4lu.%.2lu", - (unsigned long)tv.tv_sec, - (unsigned long)tv.tv_usec / 10000); + if (align) + snprintf(tbuf, sizeof(tbuf), "%4lu.%.2lu", + (unsigned long)tv.tv_sec, + (unsigned long)tv.tv_usec / 10000); + else + snprintf(tbuf, sizeof(tbuf), "%lu.%.2lu", + (unsigned long)tv.tv_sec, + (unsigned long)tv.tv_usec / 10000); return tbuf; } @@ -65,7 +70,7 @@ static void __print_router_port_stats(FILE *f, struct rtattr *pattr) __u32 timer = rta_getattr_u32(tb[MDBA_ROUTER_PATTR_TIMER]); print_string(PRINT_ANY, "timer", " %s", - format_timer(timer)); + format_timer(timer, 1)); } if (tb[MDBA_ROUTER_PATTR_TYPE]) { @@ -115,20 +120,53 @@ static void br_print_router_ports(FILE *f, struct rtattr *attr, close_json_array(PRINT_JSON, NULL); } +static void print_src_entry(struct rtattr *src_attr, int af, const char *sep) +{ + struct rtattr *stb[MDBA_MDB_SRCATTR_MAX + 1]; + SPRINT_BUF(abuf); + const char *addr; + __u32 timer_val; + + parse_rtattr_nested(stb, MDBA_MDB_SRCATTR_MAX, src_attr); + if (!stb[MDBA_MDB_SRCATTR_ADDRESS] || !stb[MDBA_MDB_SRCATTR_TIMER]) + return; + + addr = inet_ntop(af, RTA_DATA(stb[MDBA_MDB_SRCATTR_ADDRESS]), abuf, + sizeof(abuf)); + if (!addr) + return; + timer_val = rta_getattr_u32(stb[MDBA_MDB_SRCATTR_TIMER]); + + open_json_object(NULL); + print_string(PRINT_FP, NULL, "%s", sep); + print_color_string(PRINT_ANY, ifa_family_color(af), + "address", "%s", addr); + print_string(PRINT_ANY, "timer", "/%s", format_timer(timer_val, 0)); + close_json_object(); +} + static void print_mdb_entry(FILE *f, int ifindex, const struct br_mdb_entry *e, struct nlmsghdr *n, struct rtattr **tb) { + const void *grp, *src; + const char *addr; SPRINT_BUF(abuf); const char *dev; - const void *src; int af; if (filter_vlan && e->vid != filter_vlan) return; - af = e->addr.proto == htons(ETH_P_IP) ? AF_INET : AF_INET6; - src = af == AF_INET ? (const void *)&e->addr.u.ip4 : - (const void *)&e->addr.u.ip6; + if (!e->addr.proto) { + af = AF_PACKET; + grp = &e->addr.u.mac_addr; + } else if (e->addr.proto == htons(ETH_P_IP)) { + af = AF_INET; + grp = &e->addr.u.ip4; + } else { + af = AF_INET6; + grp = &e->addr.u.ip6; + } dev = ll_index_to_name(ifindex); open_json_object(NULL); @@ -138,16 +176,64 @@ static void print_mdb_entry(FILE *f, int ifindex, const struct br_mdb_entry *e, print_string(PRINT_ANY, "port", " port %s", ll_index_to_name(e->ifindex)); + /* The ETH_ALEN argument is ignored for all cases but AF_PACKET */ + addr = rt_addr_n2a_r(af, ETH_ALEN, grp, abuf, sizeof(abuf)); + if (!addr) + return; + print_color_string(PRINT_ANY, ifa_family_color(af), - "grp", " grp %s", - inet_ntop(af, src, abuf, sizeof(abuf))); + "grp", " grp %s", addr); + if (tb && tb[MDBA_MDB_EATTR_SOURCE]) { + src = (const void *)RTA_DATA(tb[MDBA_MDB_EATTR_SOURCE]); + print_color_string(PRINT_ANY, ifa_family_color(af), + "src", " src %s", + inet_ntop(af, src, abuf, sizeof(abuf))); + } print_string(PRINT_ANY, "state", " %s", (e->state & MDB_PERMANENT) ? "permanent" : "temp"); + if (show_details && tb) { + if (tb[MDBA_MDB_EATTR_GROUP_MODE]) { + __u8 mode = rta_getattr_u8(tb[MDBA_MDB_EATTR_GROUP_MODE]); + + print_string(PRINT_ANY, "filter_mode", " filter_mode %s", + mode == MCAST_INCLUDE ? "include" : + "exclude"); + } + if (tb[MDBA_MDB_EATTR_SRC_LIST]) { + struct rtattr *i, *attr = tb[MDBA_MDB_EATTR_SRC_LIST]; + const char *sep = " "; + int rem; + + open_json_array(PRINT_ANY, is_json_context() ? + "source_list" : + " source_list"); + rem = RTA_PAYLOAD(attr); + for (i = RTA_DATA(attr); RTA_OK(i, rem); + i = RTA_NEXT(i, rem)) { + print_src_entry(i, af, sep); + sep = ","; + } + close_json_array(PRINT_JSON, NULL); + } + if (tb[MDBA_MDB_EATTR_RTPROT]) { + __u8 rtprot = rta_getattr_u8(tb[MDBA_MDB_EATTR_RTPROT]); + SPRINT_BUF(rtb); + + print_string(PRINT_ANY, "protocol", " proto %s ", + rtnl_rtprot_n2a(rtprot, rtb, sizeof(rtb))); + } + } open_json_array(PRINT_JSON, "flags"); if (e->flags & MDB_FLAGS_OFFLOAD) print_string(PRINT_ANY, NULL, " %s", "offload"); + if (e->flags & MDB_FLAGS_FAST_LEAVE) + print_string(PRINT_ANY, NULL, " %s", "fast_leave"); + if (e->flags & MDB_FLAGS_STAR_EXCL) + print_string(PRINT_ANY, NULL, " %s", "added_by_star_ex"); + if (e->flags & MDB_FLAGS_BLOCKED) + print_string(PRINT_ANY, NULL, " %s", "blocked"); close_json_array(PRINT_JSON, NULL); if (e->vid) @@ -157,7 +243,7 @@ static void print_mdb_entry(FILE *f, int ifindex, const struct br_mdb_entry *e, __u32 timer = rta_getattr_u32(tb[MDBA_MDB_EATTR_TIMER]); print_string(PRINT_ANY, "timer", " %s", - format_timer(timer)); + format_timer(timer, 1)); } print_nl(); @@ -175,8 +261,9 @@ static void br_print_mdb_entry(FILE *f, int ifindex, struct rtattr *attr, rem = RTA_PAYLOAD(attr); for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) { e = RTA_DATA(i); - parse_rtattr(etb, MDBA_MDB_EATTR_MAX, MDB_RTA(RTA_DATA(i)), - RTA_PAYLOAD(i) - RTA_ALIGN(sizeof(*e))); + parse_rtattr_flags(etb, MDBA_MDB_EATTR_MAX, MDB_RTA(RTA_DATA(i)), + RTA_PAYLOAD(i) - RTA_ALIGN(sizeof(*e)), + NLA_F_NESTED); print_mdb_entry(f, ifindex, e, n, etb); } } @@ -366,6 +453,25 @@ static int mdb_show(int argc, char **argv) return 0; } +static int mdb_parse_grp(const char *grp, struct br_mdb_entry *e) +{ + if (inet_pton(AF_INET, grp, &e->addr.u.ip4)) { + e->addr.proto = htons(ETH_P_IP); + return 0; + } + if (inet_pton(AF_INET6, grp, &e->addr.u.ip6)) { + e->addr.proto = htons(ETH_P_IPV6); + return 0; + } + if (ll_addr_a2n((char *)e->addr.u.mac_addr, sizeof(e->addr.u.mac_addr), + grp) == ETH_ALEN) { + e->addr.proto = 0; + return 0; + } + + return -1; +} + static int mdb_modify(int cmd, int flags, int argc, char **argv) { struct { @@ -378,8 +484,8 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) .n.nlmsg_type = cmd, .bpm.family = PF_BRIDGE, }; + char *d = NULL, *p = NULL, *grp = NULL, *src = NULL; struct br_mdb_entry entry = {}; - char *d = NULL, *p = NULL, *grp = NULL; short vid = 0; while (argc > 0) { @@ -400,6 +506,9 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) } else if (strcmp(*argv, "vid") == 0) { NEXT_ARG(); vid = atoi(*argv); + } else if (strcmp(*argv, "src") == 0) { + NEXT_ARG(); + src = *argv; } else { if (matches(*argv, "help") == 0) usage(); @@ -420,17 +529,31 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) if (!entry.ifindex) return nodev(p); - if (!inet_pton(AF_INET, grp, &entry.addr.u.ip4)) { - if (!inet_pton(AF_INET6, grp, &entry.addr.u.ip6)) { - fprintf(stderr, "Invalid address \"%s\"\n", grp); - return -1; - } else - entry.addr.proto = htons(ETH_P_IPV6); - } else - entry.addr.proto = htons(ETH_P_IP); + if (mdb_parse_grp(grp, &entry)) { + fprintf(stderr, "Invalid address \"%s\"\n", grp); + return -1; + } entry.vid = vid; addattr_l(&req.n, sizeof(req), MDBA_SET_ENTRY, &entry, sizeof(entry)); + if (src) { + struct rtattr *nest = addattr_nest(&req.n, sizeof(req), + MDBA_SET_ENTRY_ATTRS); + struct in6_addr src_ip6; + __be32 src_ip4; + + nest->rta_type |= NLA_F_NESTED; + if (!inet_pton(AF_INET, src, &src_ip4)) { + if (!inet_pton(AF_INET6, src, &src_ip6)) { + fprintf(stderr, "Invalid source address \"%s\"\n", src); + return -1; + } + addattr_l(&req.n, sizeof(req), MDBE_ATTR_SOURCE, &src_ip6, sizeof(src_ip6)); + } else { + addattr32(&req.n, sizeof(req), MDBE_ATTR_SOURCE, src_ip4); + } + addattr_nest_end(&req.n, nest); + } if (rtnl_talk(&rth, &req.n, NULL) < 0) return -1; diff --git a/configure b/configure index 307912a..2c363d3 100755 --- a/configure +++ b/configure @@ -2,6 +2,11 @@ # SPDX-License-Identifier: GPL-2.0 # This is not an autoconf generated configure # +# Influential LIBBPF environment variables: +# LIBBPF_FORCE={on,off} on: require link against libbpf; +# off: disable libbpf probing +# LIBBPF_DIR Path to libbpf DESTDIR to use + INCLUDE=${1:-"$PWD/include"} # Output file which is input to Makefile @@ -240,6 +245,111 @@ check_elf() fi } +have_libbpf_basic() +{ + cat >$TMPDIR/libbpf_test.c < +int main(int argc, char **argv) { + bpf_program__set_autoload(NULL, false); + bpf_map__ifindex(NULL); + bpf_map__set_pin_path(NULL, NULL); + bpf_object__open_file(NULL, NULL); + return 0; +} +EOF + + $CC -o $TMPDIR/libbpf_test $TMPDIR/libbpf_test.c $LIBBPF_CFLAGS $LIBBPF_LDLIBS >/dev/null 2>&1 + local ret=$? + + rm -f $TMPDIR/libbpf_test.c $TMPDIR/libbpf_test + return $ret +} + +have_libbpf_sec_name() +{ + cat >$TMPDIR/libbpf_sec_test.c < +int main(int argc, char **argv) { + void *ptr; + bpf_program__section_name(NULL); + return 0; +} +EOF + + $CC -o $TMPDIR/libbpf_sec_test $TMPDIR/libbpf_sec_test.c $LIBBPF_CFLAGS $LIBBPF_LDLIBS >/dev/null 2>&1 + local ret=$? + + rm -f $TMPDIR/libbpf_sec_test.c $TMPDIR/libbpf_sec_test + return $ret +} + +check_force_libbpf_on() +{ + # if set LIBBPF_FORCE=on but no libbpf support, just exist the config + # process to make sure we don't build without libbpf. + if [ "$LIBBPF_FORCE" = on ]; then + echo " LIBBPF_FORCE=on set, but couldn't find a usable libbpf" + exit 1 + fi +} + +check_libbpf() +{ + # if set LIBBPF_FORCE=off, disable libbpf entirely + if [ "$LIBBPF_FORCE" = off ]; then + echo "no" + return + fi + + if ! ${PKG_CONFIG} libbpf --exists && [ -z "$LIBBPF_DIR" ] ; then + echo "no" + check_force_libbpf_on + return + fi + + if [ $(uname -m) = x86_64 ]; then + local LIBBPF_LIBDIR="${LIBBPF_DIR}/usr/lib64" + else + local LIBBPF_LIBDIR="${LIBBPF_DIR}/usr/lib" + fi + + if [ -n "$LIBBPF_DIR" ]; then + LIBBPF_CFLAGS="-I${LIBBPF_DIR}/usr/include" + LIBBPF_LDLIBS="${LIBBPF_LIBDIR}/libbpf.a -lz -lelf" + LIBBPF_VERSION=$(PKG_CONFIG_LIBDIR=${LIBBPF_LIBDIR}/pkgconfig ${PKG_CONFIG} libbpf --modversion) + else + LIBBPF_CFLAGS=$(${PKG_CONFIG} libbpf --cflags) + LIBBPF_LDLIBS=$(${PKG_CONFIG} libbpf --libs) + LIBBPF_VERSION=$(${PKG_CONFIG} libbpf --modversion) + fi + + if ! have_libbpf_basic; then + echo "no" + echo " libbpf version $LIBBPF_VERSION is too low, please update it to at least 0.1.0" + check_force_libbpf_on + return + else + echo "HAVE_LIBBPF:=y" >> $CONFIG + echo 'CFLAGS += -DHAVE_LIBBPF ' $LIBBPF_CFLAGS >> $CONFIG + echo "CFLAGS += -DLIBBPF_VERSION=\\\"$LIBBPF_VERSION\\\"" >> $CONFIG + echo 'LDLIBS += ' $LIBBPF_LDLIBS >> $CONFIG + + if [ -z "$LIBBPF_DIR" ]; then + echo "CFLAGS += -DLIBBPF_DYNAMIC" >> $CONFIG + fi + fi + + # bpf_program__title() is deprecated since libbpf 0.2.0, use + # bpf_program__section_name() instead if we support + if have_libbpf_sec_name; then + echo "HAVE_LIBBPF_SECTION_NAME:=y" >> $CONFIG + echo 'CFLAGS += -DHAVE_LIBBPF_SECTION_NAME ' >> $CONFIG + fi + + echo "yes" + echo " libbpf version $LIBBPF_VERSION" +} + check_selinux() # SELinux is a compile time option in the ss utility { @@ -385,6 +495,9 @@ check_setns echo -n "SELinux support: " check_selinux +echo -n "libbpf support: " +check_libbpf + echo -n "ELF support: " check_elf diff --git a/dcb/.gitignore b/dcb/.gitignore new file mode 100644 index 0000000..3f26856 --- /dev/null +++ b/dcb/.gitignore @@ -0,0 +1 @@ +dcb diff --git a/dcb/Makefile b/dcb/Makefile new file mode 100644 index 0000000..3a2e5d4 --- /dev/null +++ b/dcb/Makefile @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../config.mk + +TARGETS := + +ifeq ($(HAVE_MNL),y) + +DCBOBJ = dcb.o \ + dcb_app.o \ + dcb_buffer.o \ + dcb_dcbx.o \ + dcb_ets.o \ + dcb_maxrate.o \ + dcb_pfc.o +TARGETS += dcb +LDLIBS += -lm + +endif + +all: $(TARGETS) $(LIBS) + +dcb: $(DCBOBJ) $(LIBNETLINK) + $(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@ + +install: all + for i in $(TARGETS); \ + do install -m 0755 $$i $(DESTDIR)$(SBINDIR); \ + done + +clean: + rm -f $(DCBOBJ) $(TARGETS) diff --git a/dcb/dcb.c b/dcb/dcb.c new file mode 100644 index 0000000..64a9ef0 --- /dev/null +++ b/dcb/dcb.c @@ -0,0 +1,610 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include +#include +#include +#include +#include + +#include "dcb.h" +#include "mnl_utils.h" +#include "namespace.h" +#include "utils.h" +#include "version.h" + +static int dcb_init(struct dcb *dcb) +{ + dcb->buf = malloc(MNL_SOCKET_BUFFER_SIZE); + if (dcb->buf == NULL) { + perror("Netlink buffer allocation"); + return -1; + } + + dcb->nl = mnlu_socket_open(NETLINK_ROUTE); + if (dcb->nl == NULL) { + perror("Open netlink socket"); + goto err_socket_open; + } + + new_json_obj_plain(dcb->json_output); + return 0; + +err_socket_open: + free(dcb->buf); + return -1; +} + +static void dcb_fini(struct dcb *dcb) +{ + delete_json_obj_plain(); + mnl_socket_close(dcb->nl); + free(dcb->buf); +} + +static struct dcb *dcb_alloc(void) +{ + struct dcb *dcb; + + dcb = calloc(1, sizeof(*dcb)); + if (!dcb) + return NULL; + return dcb; +} + +static void dcb_free(struct dcb *dcb) +{ + free(dcb); +} + +struct dcb_get_attribute { + struct dcb *dcb; + int attr; + void *payload; + __u16 payload_len; +}; + +static int dcb_get_attribute_attr_ieee_cb(const struct nlattr *attr, void *data) +{ + struct dcb_get_attribute *ga = data; + + if (mnl_attr_get_type(attr) != ga->attr) + return MNL_CB_OK; + + ga->payload = mnl_attr_get_payload(attr); + ga->payload_len = mnl_attr_get_payload_len(attr); + return MNL_CB_STOP; +} + +static int dcb_get_attribute_attr_cb(const struct nlattr *attr, void *data) +{ + if (mnl_attr_get_type(attr) != DCB_ATTR_IEEE) + return MNL_CB_OK; + + return mnl_attr_parse_nested(attr, dcb_get_attribute_attr_ieee_cb, data); +} + +static int dcb_get_attribute_cb(const struct nlmsghdr *nlh, void *data) +{ + return mnl_attr_parse(nlh, sizeof(struct dcbmsg), dcb_get_attribute_attr_cb, data); +} + +static int dcb_get_attribute_bare_cb(const struct nlmsghdr *nlh, void *data) +{ + /* Bare attributes (e.g. DCB_ATTR_DCBX) are not wrapped inside an IEEE + * container, so this does not have to go through unpacking in + * dcb_get_attribute_attr_cb(). + */ + return mnl_attr_parse(nlh, sizeof(struct dcbmsg), + dcb_get_attribute_attr_ieee_cb, data); +} + +struct dcb_set_attribute_response { + int response_attr; +}; + +static int dcb_set_attribute_attr_cb(const struct nlattr *attr, void *data) +{ + struct dcb_set_attribute_response *resp = data; + uint16_t len; + uint8_t err; + + if (mnl_attr_get_type(attr) != resp->response_attr) + return MNL_CB_OK; + + len = mnl_attr_get_payload_len(attr); + if (len != 1) { + fprintf(stderr, "Response attribute expected to have size 1, not %d\n", len); + return MNL_CB_ERROR; + } + + err = mnl_attr_get_u8(attr); + if (err) { + fprintf(stderr, "Error when attempting to set attribute: %s\n", + strerror(err)); + return MNL_CB_ERROR; + } + + return MNL_CB_STOP; +} + +static int dcb_set_attribute_cb(const struct nlmsghdr *nlh, void *data) +{ + return mnl_attr_parse(nlh, sizeof(struct dcbmsg), dcb_set_attribute_attr_cb, data); +} + +static int dcb_talk(struct dcb *dcb, struct nlmsghdr *nlh, mnl_cb_t cb, void *data) +{ + int ret; + + ret = mnl_socket_sendto(dcb->nl, nlh, nlh->nlmsg_len); + if (ret < 0) { + perror("mnl_socket_sendto"); + return -1; + } + + return mnlu_socket_recv_run(dcb->nl, nlh->nlmsg_seq, dcb->buf, MNL_SOCKET_BUFFER_SIZE, + cb, data); +} + +static struct nlmsghdr *dcb_prepare(struct dcb *dcb, const char *dev, + uint32_t nlmsg_type, uint8_t dcb_cmd) +{ + struct dcbmsg dcbm = { + .cmd = dcb_cmd, + }; + struct nlmsghdr *nlh; + + nlh = mnlu_msg_prepare(dcb->buf, nlmsg_type, NLM_F_REQUEST, &dcbm, sizeof(dcbm)); + mnl_attr_put_strz(nlh, DCB_ATTR_IFNAME, dev); + return nlh; +} + +static int __dcb_get_attribute(struct dcb *dcb, int command, + const char *dev, int attr, + void **payload_p, __u16 *payload_len_p, + int (*get_attribute_cb)(const struct nlmsghdr *nlh, + void *data)) +{ + struct dcb_get_attribute ga; + struct nlmsghdr *nlh; + int ret; + + nlh = dcb_prepare(dcb, dev, RTM_GETDCB, command); + + ga = (struct dcb_get_attribute) { + .dcb = dcb, + .attr = attr, + .payload = NULL, + }; + ret = dcb_talk(dcb, nlh, get_attribute_cb, &ga); + if (ret) { + perror("Attribute read"); + return ret; + } + if (ga.payload == NULL) { + perror("Attribute not found"); + return -ENOENT; + } + + *payload_p = ga.payload; + *payload_len_p = ga.payload_len; + return 0; +} + +int dcb_get_attribute_va(struct dcb *dcb, const char *dev, int attr, + void **payload_p, __u16 *payload_len_p) +{ + return __dcb_get_attribute(dcb, DCB_CMD_IEEE_GET, dev, attr, + payload_p, payload_len_p, + dcb_get_attribute_cb); +} + +int dcb_get_attribute_bare(struct dcb *dcb, int cmd, const char *dev, int attr, + void **payload_p, __u16 *payload_len_p) +{ + return __dcb_get_attribute(dcb, cmd, dev, attr, + payload_p, payload_len_p, + dcb_get_attribute_bare_cb); +} + +int dcb_get_attribute(struct dcb *dcb, const char *dev, int attr, void *data, size_t data_len) +{ + __u16 payload_len; + void *payload; + int ret; + + ret = dcb_get_attribute_va(dcb, dev, attr, &payload, &payload_len); + if (ret) + return ret; + + if (payload_len != data_len) { + fprintf(stderr, "Wrong len %d, expected %zd\n", payload_len, data_len); + return -EINVAL; + } + + memcpy(data, payload, data_len); + return 0; +} + +static int __dcb_set_attribute(struct dcb *dcb, int command, const char *dev, + int (*cb)(struct dcb *, struct nlmsghdr *, void *), + void *data, int response_attr) +{ + struct dcb_set_attribute_response resp = { + .response_attr = response_attr, + }; + struct nlmsghdr *nlh; + int ret; + + nlh = dcb_prepare(dcb, dev, RTM_SETDCB, command); + + ret = cb(dcb, nlh, data); + if (ret) + return ret; + + ret = dcb_talk(dcb, nlh, dcb_set_attribute_cb, &resp); + if (ret) { + perror("Attribute write"); + return ret; + } + return 0; +} + +struct dcb_set_attribute_ieee_cb { + int (*cb)(struct dcb *dcb, struct nlmsghdr *nlh, void *data); + void *data; +}; + +static int dcb_set_attribute_ieee_cb(struct dcb *dcb, struct nlmsghdr *nlh, void *data) +{ + struct dcb_set_attribute_ieee_cb *ieee_data = data; + struct nlattr *nest; + int ret; + + nest = mnl_attr_nest_start(nlh, DCB_ATTR_IEEE); + ret = ieee_data->cb(dcb, nlh, ieee_data->data); + if (ret) + return ret; + mnl_attr_nest_end(nlh, nest); + + return 0; +} + +int dcb_set_attribute_va(struct dcb *dcb, int command, const char *dev, + int (*cb)(struct dcb *dcb, struct nlmsghdr *nlh, void *data), + void *data) +{ + struct dcb_set_attribute_ieee_cb ieee_data = { + .cb = cb, + .data = data, + }; + + return __dcb_set_attribute(dcb, command, dev, + &dcb_set_attribute_ieee_cb, &ieee_data, + DCB_ATTR_IEEE); +} + +struct dcb_set_attribute { + int attr; + const void *data; + size_t data_len; +}; + +static int dcb_set_attribute_put(struct dcb *dcb, struct nlmsghdr *nlh, void *data) +{ + struct dcb_set_attribute *dsa = data; + + mnl_attr_put(nlh, dsa->attr, dsa->data_len, dsa->data); + return 0; +} + +int dcb_set_attribute(struct dcb *dcb, const char *dev, int attr, const void *data, size_t data_len) +{ + struct dcb_set_attribute dsa = { + .attr = attr, + .data = data, + .data_len = data_len, + }; + + return dcb_set_attribute_va(dcb, DCB_CMD_IEEE_SET, dev, + &dcb_set_attribute_put, &dsa); +} + +int dcb_set_attribute_bare(struct dcb *dcb, int command, const char *dev, + int attr, const void *data, size_t data_len, + int response_attr) +{ + struct dcb_set_attribute dsa = { + .attr = attr, + .data = data, + .data_len = data_len, + }; + + return __dcb_set_attribute(dcb, command, dev, + &dcb_set_attribute_put, &dsa, response_attr); +} + +void dcb_print_array_u8(const __u8 *array, size_t size) +{ + SPRINT_BUF(b); + size_t i; + + for (i = 0; i < size; i++) { + snprintf(b, sizeof(b), "%zd:%%d ", i); + print_uint(PRINT_ANY, NULL, b, array[i]); + } +} + +void dcb_print_array_u64(const __u64 *array, size_t size) +{ + SPRINT_BUF(b); + size_t i; + + for (i = 0; i < size; i++) { + snprintf(b, sizeof(b), "%zd:%%" PRIu64 " ", i); + print_u64(PRINT_ANY, NULL, b, array[i]); + } +} + +void dcb_print_array_on_off(const __u8 *array, size_t size) +{ + SPRINT_BUF(b); + size_t i; + + for (i = 0; i < size; i++) { + snprintf(b, sizeof(b), "%zd:%%s ", i); + print_on_off(PRINT_ANY, NULL, b, array[i]); + } +} + +void dcb_print_array_kw(const __u8 *array, size_t array_size, + const char *const kw[], size_t kw_size) +{ + SPRINT_BUF(b); + size_t i; + + for (i = 0; i < array_size; i++) { + __u8 emt = array[i]; + + snprintf(b, sizeof(b), "%zd:%%s ", i); + if (emt < kw_size && kw[emt]) + print_string(PRINT_ANY, NULL, b, kw[emt]); + else + print_string(PRINT_ANY, NULL, b, "???"); + } +} + +void dcb_print_named_array(const char *json_name, const char *fp_name, + const __u8 *array, size_t size, + void (*print_array)(const __u8 *, size_t)) +{ + open_json_array(PRINT_JSON, json_name); + print_string(PRINT_FP, NULL, "%s ", fp_name); + print_array(array, size); + close_json_array(PRINT_JSON, json_name); +} + +int dcb_parse_mapping(const char *what_key, __u32 key, __u32 max_key, + const char *what_value, __u64 value, __u64 max_value, + void (*set_array)(__u32 index, __u64 value, void *data), + void *set_array_data) +{ + bool is_all = key == (__u32) -1; + + if (!is_all && key > max_key) { + fprintf(stderr, "In %s:%s mapping, %s is expected to be 0..%d\n", + what_key, what_value, what_key, max_key); + return -EINVAL; + } + + if (value > max_value) { + fprintf(stderr, "In %s:%s mapping, %s is expected to be 0..%llu\n", + what_key, what_value, what_value, max_value); + return -EINVAL; + } + + if (is_all) { + for (key = 0; key <= max_key; key++) + set_array(key, value, set_array_data); + } else { + set_array(key, value, set_array_data); + } + + return 0; +} + +void dcb_set_u8(__u32 key, __u64 value, void *data) +{ + __u8 *array = data; + + array[key] = value; +} + +void dcb_set_u32(__u32 key, __u64 value, void *data) +{ + __u32 *array = data; + + array[key] = value; +} + +void dcb_set_u64(__u32 key, __u64 value, void *data) +{ + __u64 *array = data; + + array[key] = value; +} + +int dcb_cmd_parse_dev(struct dcb *dcb, int argc, char **argv, + int (*and_then)(struct dcb *dcb, const char *dev, + int argc, char **argv), + void (*help)(void)) +{ + const char *dev; + + if (!argc || matches(*argv, "help") == 0) { + help(); + return 0; + } else if (matches(*argv, "dev") == 0) { + NEXT_ARG(); + dev = *argv; + if (check_ifname(dev)) { + invarg("not a valid ifname", *argv); + return -EINVAL; + } + NEXT_ARG_FWD(); + return and_then(dcb, dev, argc, argv); + } else { + fprintf(stderr, "Expected `dev DEV', not `%s'", *argv); + help(); + return -EINVAL; + } +} + +static void dcb_help(void) +{ + fprintf(stderr, + "Usage: dcb [ OPTIONS ] OBJECT { COMMAND | help }\n" + " dcb [ -f | --force ] { -b | --batch } filename [ -n | --netns ] netnsname\n" + "where OBJECT := { app | buffer | dcbx | ets | maxrate | pfc }\n" + " OPTIONS := [ -V | --Version | -i | --iec | -j | --json\n" + " | -N | --Numeric | -p | --pretty\n" + " | -s | --statistics | -v | --verbose]\n"); +} + +static int dcb_cmd(struct dcb *dcb, int argc, char **argv) +{ + if (!argc || matches(*argv, "help") == 0) { + dcb_help(); + return 0; + } else if (matches(*argv, "app") == 0) { + return dcb_cmd_app(dcb, argc - 1, argv + 1); + } else if (matches(*argv, "buffer") == 0) { + return dcb_cmd_buffer(dcb, argc - 1, argv + 1); + } else if (matches(*argv, "dcbx") == 0) { + return dcb_cmd_dcbx(dcb, argc - 1, argv + 1); + } else if (matches(*argv, "ets") == 0) { + return dcb_cmd_ets(dcb, argc - 1, argv + 1); + } else if (matches(*argv, "maxrate") == 0) { + return dcb_cmd_maxrate(dcb, argc - 1, argv + 1); + } else if (matches(*argv, "pfc") == 0) { + return dcb_cmd_pfc(dcb, argc - 1, argv + 1); + } + + fprintf(stderr, "Object \"%s\" is unknown\n", *argv); + return -ENOENT; +} + +static int dcb_batch_cmd(int argc, char *argv[], void *data) +{ + struct dcb *dcb = data; + + return dcb_cmd(dcb, argc, argv); +} + +static int dcb_batch(struct dcb *dcb, const char *name, bool force) +{ + return do_batch(name, force, dcb_batch_cmd, dcb); +} + +int main(int argc, char **argv) +{ + static const struct option long_options[] = { + { "Version", no_argument, NULL, 'V' }, + { "force", no_argument, NULL, 'f' }, + { "batch", required_argument, NULL, 'b' }, + { "iec", no_argument, NULL, 'i' }, + { "json", no_argument, NULL, 'j' }, + { "Numeric", no_argument, NULL, 'N' }, + { "pretty", no_argument, NULL, 'p' }, + { "statistics", no_argument, NULL, 's' }, + { "netns", required_argument, NULL, 'n' }, + { "help", no_argument, NULL, 'h' }, + { NULL, 0, NULL, 0 } + }; + const char *batch_file = NULL; + bool force = false; + struct dcb *dcb; + int opt; + int err; + int ret; + + dcb = dcb_alloc(); + if (!dcb) { + fprintf(stderr, "Failed to allocate memory for dcb\n"); + return EXIT_FAILURE; + } + + while ((opt = getopt_long(argc, argv, "b:fhijn:psvNV", + long_options, NULL)) >= 0) { + + switch (opt) { + case 'V': + printf("dcb utility, iproute2-%s\n", version); + ret = EXIT_SUCCESS; + goto dcb_free; + case 'f': + force = true; + break; + case 'b': + batch_file = optarg; + break; + case 'j': + dcb->json_output = true; + break; + case 'N': + dcb->numeric = true; + break; + case 'p': + pretty = true; + break; + case 's': + dcb->stats = true; + break; + case 'n': + if (netns_switch(optarg)) { + ret = EXIT_FAILURE; + goto dcb_free; + } + break; + case 'i': + dcb->use_iec = true; + break; + case 'h': + dcb_help(); + return 0; + default: + fprintf(stderr, "Unknown option.\n"); + dcb_help(); + ret = EXIT_FAILURE; + goto dcb_free; + } + } + + argc -= optind; + argv += optind; + + err = dcb_init(dcb); + if (err) { + ret = EXIT_FAILURE; + goto dcb_free; + } + + if (batch_file) + err = dcb_batch(dcb, batch_file, force); + else + err = dcb_cmd(dcb, argc, argv); + + if (err) { + ret = EXIT_FAILURE; + goto dcb_fini; + } + + ret = EXIT_SUCCESS; + +dcb_fini: + dcb_fini(dcb); +dcb_free: + dcb_free(dcb); + + return ret; +} diff --git a/dcb/dcb.h b/dcb/dcb.h new file mode 100644 index 0000000..244c3d3 --- /dev/null +++ b/dcb/dcb.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __DCB_H__ +#define __DCB_H__ 1 + +#include +#include +#include + +/* dcb.c */ + +struct dcb { + char *buf; + struct mnl_socket *nl; + bool json_output; + bool stats; + bool use_iec; + bool numeric; +}; + +int dcb_parse_mapping(const char *what_key, __u32 key, __u32 max_key, + const char *what_value, __u64 value, __u64 max_value, + void (*set_array)(__u32 index, __u64 value, void *data), + void *set_array_data); +int dcb_cmd_parse_dev(struct dcb *dcb, int argc, char **argv, + int (*and_then)(struct dcb *dcb, const char *dev, + int argc, char **argv), + void (*help)(void)); + +void dcb_set_u8(__u32 key, __u64 value, void *data); +void dcb_set_u32(__u32 key, __u64 value, void *data); +void dcb_set_u64(__u32 key, __u64 value, void *data); + +int dcb_get_attribute(struct dcb *dcb, const char *dev, int attr, + void *data, size_t data_len); +int dcb_set_attribute(struct dcb *dcb, const char *dev, int attr, + const void *data, size_t data_len); +int dcb_get_attribute_va(struct dcb *dcb, const char *dev, int attr, + void **payload_p, __u16 *payload_len_p); +int dcb_set_attribute_va(struct dcb *dcb, int command, const char *dev, + int (*cb)(struct dcb *dcb, struct nlmsghdr *nlh, void *data), + void *data); +int dcb_get_attribute_bare(struct dcb *dcb, int cmd, const char *dev, int attr, + void **payload_p, __u16 *payload_len_p); +int dcb_set_attribute_bare(struct dcb *dcb, int command, const char *dev, + int attr, const void *data, size_t data_len, + int response_attr); + +void dcb_print_named_array(const char *json_name, const char *fp_name, + const __u8 *array, size_t size, + void (*print_array)(const __u8 *, size_t)); +void dcb_print_array_u8(const __u8 *array, size_t size); +void dcb_print_array_u64(const __u64 *array, size_t size); +void dcb_print_array_on_off(const __u8 *array, size_t size); +void dcb_print_array_kw(const __u8 *array, size_t array_size, + const char *const kw[], size_t kw_size); + +/* dcb_app.c */ + +int dcb_cmd_app(struct dcb *dcb, int argc, char **argv); + +/* dcb_buffer.c */ + +int dcb_cmd_buffer(struct dcb *dcb, int argc, char **argv); + +/* dcb_dcbx.c */ + +int dcb_cmd_dcbx(struct dcb *dcb, int argc, char **argv); + +/* dcb_ets.c */ + +int dcb_cmd_ets(struct dcb *dcb, int argc, char **argv); + +/* dcb_maxrate.c */ + +int dcb_cmd_maxrate(struct dcb *dcb, int argc, char **argv); + +/* dcb_pfc.c */ + +int dcb_cmd_pfc(struct dcb *dcb, int argc, char **argv); + +#endif /* __DCB_H__ */ diff --git a/dcb/dcb_app.c b/dcb/dcb_app.c new file mode 100644 index 0000000..c4816bc --- /dev/null +++ b/dcb/dcb_app.c @@ -0,0 +1,795 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include +#include +#include +#include +#include + +#include "dcb.h" +#include "utils.h" +#include "rt_names.h" + +static void dcb_app_help_add(void) +{ + fprintf(stderr, + "Usage: dcb app { add | del | replace } dev STRING\n" + " [ default-prio PRIO ]\n" + " [ ethtype-prio ET:PRIO ]\n" + " [ stream-port-prio PORT:PRIO ]\n" + " [ dgram-port-prio PORT:PRIO ]\n" + " [ port-prio PORT:PRIO ]\n" + " [ dscp-prio INTEGER:PRIO ]\n" + "\n" + " where PRIO := { 0 .. 7 }\n" + " ET := { 0x600 .. 0xffff }\n" + " PORT := { 1 .. 65535 }\n" + " DSCP := { 0 .. 63 }\n" + "\n" + ); +} + +static void dcb_app_help_show_flush(void) +{ + fprintf(stderr, + "Usage: dcb app { show | flush } dev STRING\n" + " [ default-prio ]\n" + " [ ethtype-prio ]\n" + " [ stream-port-prio ]\n" + " [ dgram-port-prio ]\n" + " [ port-prio ]\n" + " [ dscp-prio ]\n" + "\n" + ); +} + +static void dcb_app_help(void) +{ + fprintf(stderr, + "Usage: dcb app help\n" + "\n" + ); + dcb_app_help_show_flush(); + dcb_app_help_add(); +} + +struct dcb_app_table { + struct dcb_app *apps; + size_t n_apps; +}; + +static void dcb_app_table_fini(struct dcb_app_table *tab) +{ + free(tab->apps); +} + +static int dcb_app_table_push(struct dcb_app_table *tab, struct dcb_app *app) +{ + struct dcb_app *apps = realloc(tab->apps, (tab->n_apps + 1) * sizeof(*tab->apps)); + + if (apps == NULL) { + perror("Cannot allocate APP table"); + return -ENOMEM; + } + + tab->apps = apps; + tab->apps[tab->n_apps++] = *app; + return 0; +} + +static void dcb_app_table_remove_existing(struct dcb_app_table *a, + const struct dcb_app_table *b) +{ + size_t ia, ja; + size_t ib; + + for (ia = 0, ja = 0; ia < a->n_apps; ia++) { + struct dcb_app *aa = &a->apps[ia]; + bool found = false; + + for (ib = 0; ib < b->n_apps; ib++) { + const struct dcb_app *ab = &b->apps[ib]; + + if (aa->selector == ab->selector && + aa->protocol == ab->protocol && + aa->priority == ab->priority) { + found = true; + break; + } + } + + if (!found) + a->apps[ja++] = *aa; + } + + a->n_apps = ja; +} + +static void dcb_app_table_remove_replaced(struct dcb_app_table *a, + const struct dcb_app_table *b) +{ + size_t ia, ja; + size_t ib; + + for (ia = 0, ja = 0; ia < a->n_apps; ia++) { + struct dcb_app *aa = &a->apps[ia]; + bool present = false; + bool found = false; + + for (ib = 0; ib < b->n_apps; ib++) { + const struct dcb_app *ab = &b->apps[ib]; + + if (aa->selector == ab->selector && + aa->protocol == ab->protocol) + present = true; + else + continue; + + if (aa->priority == ab->priority) { + found = true; + break; + } + } + + /* Entries that remain in A will be removed, so keep in the + * table only APP entries whose sel/pid is mentioned in B, + * but that do not have the full sel/pid/prio match. + */ + if (present && !found) + a->apps[ja++] = *aa; + } + + a->n_apps = ja; +} + +static int dcb_app_table_copy(struct dcb_app_table *a, + const struct dcb_app_table *b) +{ + size_t i; + int ret; + + for (i = 0; i < b->n_apps; i++) { + ret = dcb_app_table_push(a, &b->apps[i]); + if (ret != 0) + return ret; + } + return 0; +} + +static int dcb_app_cmp(const struct dcb_app *a, const struct dcb_app *b) +{ + if (a->protocol < b->protocol) + return -1; + if (a->protocol > b->protocol) + return 1; + return a->priority - b->priority; +} + +static int dcb_app_cmp_cb(const void *a, const void *b) +{ + return dcb_app_cmp(a, b); +} + +static void dcb_app_table_sort(struct dcb_app_table *tab) +{ + qsort(tab->apps, tab->n_apps, sizeof(*tab->apps), dcb_app_cmp_cb); +} + +struct dcb_app_parse_mapping { + __u8 selector; + struct dcb_app_table *tab; + int err; +}; + +static void dcb_app_parse_mapping_cb(__u32 key, __u64 value, void *data) +{ + struct dcb_app_parse_mapping *pm = data; + struct dcb_app app = { + .selector = pm->selector, + .priority = value, + .protocol = key, + }; + + if (pm->err) + return; + + pm->err = dcb_app_table_push(pm->tab, &app); +} + +static int dcb_app_parse_mapping_ethtype_prio(__u32 key, char *value, void *data) +{ + __u8 prio; + + if (key < 0x600) { + fprintf(stderr, "Protocol IDs < 0x600 are reserved for EtherType\n"); + return -EINVAL; + } + + if (get_u8(&prio, value, 0)) + return -EINVAL; + + return dcb_parse_mapping("ETHTYPE", key, 0xffff, + "PRIO", prio, IEEE_8021QAZ_MAX_TCS - 1, + dcb_app_parse_mapping_cb, data); +} + +static int dcb_app_parse_dscp(__u32 *key, const char *arg) +{ + if (parse_mapping_num_all(key, arg) == 0) + return 0; + + if (rtnl_dsfield_a2n(key, arg) != 0) + return -1; + + if (*key & 0x03) { + fprintf(stderr, "The values `%s' uses non-DSCP bits.\n", arg); + return -1; + } + + /* Unshift the value to convert it from dsfield to DSCP. */ + *key >>= 2; + return 0; +} + +static int dcb_app_parse_mapping_dscp_prio(__u32 key, char *value, void *data) +{ + __u8 prio; + + if (get_u8(&prio, value, 0)) + return -EINVAL; + + return dcb_parse_mapping("DSCP", key, 63, + "PRIO", prio, IEEE_8021QAZ_MAX_TCS - 1, + dcb_app_parse_mapping_cb, data); +} + +static int dcb_app_parse_mapping_port_prio(__u32 key, char *value, void *data) +{ + __u8 prio; + + if (key == 0) { + fprintf(stderr, "Port ID of 0 is invalid\n"); + return -EINVAL; + } + + if (get_u8(&prio, value, 0)) + return -EINVAL; + + return dcb_parse_mapping("PORT", key, 0xffff, + "PRIO", prio, IEEE_8021QAZ_MAX_TCS - 1, + dcb_app_parse_mapping_cb, data); +} + +static int dcb_app_parse_default_prio(int *argcp, char ***argvp, struct dcb_app_table *tab) +{ + int argc = *argcp; + char **argv = *argvp; + int ret = 0; + + while (argc > 0) { + struct dcb_app app; + __u8 prio; + + if (get_u8(&prio, *argv, 0)) { + ret = 1; + break; + } + + app = (struct dcb_app){ + .selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE, + .protocol = 0, + .priority = prio, + }; + ret = dcb_app_table_push(tab, &app); + if (ret != 0) + break; + + argc--, argv++; + } + + *argcp = argc; + *argvp = argv; + return ret; +} + +static bool dcb_app_is_ethtype(const struct dcb_app *app) +{ + return app->selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE && + app->protocol != 0; +} + +static bool dcb_app_is_default(const struct dcb_app *app) +{ + return app->selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE && + app->protocol == 0; +} + +static bool dcb_app_is_dscp(const struct dcb_app *app) +{ + return app->selector == IEEE_8021QAZ_APP_SEL_DSCP; +} + +static bool dcb_app_is_stream_port(const struct dcb_app *app) +{ + return app->selector == IEEE_8021QAZ_APP_SEL_STREAM; +} + +static bool dcb_app_is_dgram_port(const struct dcb_app *app) +{ + return app->selector == IEEE_8021QAZ_APP_SEL_DGRAM; +} + +static bool dcb_app_is_port(const struct dcb_app *app) +{ + return app->selector == IEEE_8021QAZ_APP_SEL_ANY; +} + +static int dcb_app_print_key_dec(__u16 protocol) +{ + return print_uint(PRINT_ANY, NULL, "%d:", protocol); +} + +static int dcb_app_print_key_hex(__u16 protocol) +{ + return print_uint(PRINT_ANY, NULL, "%x:", protocol); +} + +static int dcb_app_print_key_dscp(__u16 protocol) +{ + const char *name = rtnl_dsfield_get_name(protocol << 2); + + + if (!is_json_context() && name != NULL) + return print_string(PRINT_FP, NULL, "%s:", name); + return print_uint(PRINT_ANY, NULL, "%d:", protocol); +} + +static void dcb_app_print_filtered(const struct dcb_app_table *tab, + bool (*filter)(const struct dcb_app *), + int (*print_key)(__u16 protocol), + const char *json_name, + const char *fp_name) +{ + bool first = true; + size_t i; + + for (i = 0; i < tab->n_apps; i++) { + struct dcb_app *app = &tab->apps[i]; + + if (!filter(app)) + continue; + if (first) { + open_json_array(PRINT_JSON, json_name); + print_string(PRINT_FP, NULL, "%s ", fp_name); + first = false; + } + + open_json_array(PRINT_JSON, NULL); + print_key(app->protocol); + print_uint(PRINT_ANY, NULL, "%d ", app->priority); + close_json_array(PRINT_JSON, NULL); + } + + if (!first) { + close_json_array(PRINT_JSON, json_name); + print_nl(); + } +} + +static void dcb_app_print_ethtype_prio(const struct dcb_app_table *tab) +{ + dcb_app_print_filtered(tab, dcb_app_is_ethtype, dcb_app_print_key_hex, + "ethtype_prio", "ethtype-prio"); +} + +static void dcb_app_print_dscp_prio(const struct dcb *dcb, + const struct dcb_app_table *tab) +{ + dcb_app_print_filtered(tab, dcb_app_is_dscp, + dcb->numeric ? dcb_app_print_key_dec + : dcb_app_print_key_dscp, + "dscp_prio", "dscp-prio"); +} + +static void dcb_app_print_stream_port_prio(const struct dcb_app_table *tab) +{ + dcb_app_print_filtered(tab, dcb_app_is_stream_port, dcb_app_print_key_dec, + "stream_port_prio", "stream-port-prio"); +} + +static void dcb_app_print_dgram_port_prio(const struct dcb_app_table *tab) +{ + dcb_app_print_filtered(tab, dcb_app_is_dgram_port, dcb_app_print_key_dec, + "dgram_port_prio", "dgram-port-prio"); +} + +static void dcb_app_print_port_prio(const struct dcb_app_table *tab) +{ + dcb_app_print_filtered(tab, dcb_app_is_port, dcb_app_print_key_dec, + "port_prio", "port-prio"); +} + +static void dcb_app_print_default_prio(const struct dcb_app_table *tab) +{ + bool first = true; + size_t i; + + for (i = 0; i < tab->n_apps; i++) { + if (!dcb_app_is_default(&tab->apps[i])) + continue; + if (first) { + open_json_array(PRINT_JSON, "default_prio"); + print_string(PRINT_FP, NULL, "default-prio ", NULL); + first = false; + } + print_uint(PRINT_ANY, NULL, "%d ", tab->apps[i].priority); + } + + if (!first) { + close_json_array(PRINT_JSON, "default_prio"); + print_nl(); + } +} + +static void dcb_app_print(const struct dcb *dcb, const struct dcb_app_table *tab) +{ + dcb_app_print_ethtype_prio(tab); + dcb_app_print_default_prio(tab); + dcb_app_print_dscp_prio(dcb, tab); + dcb_app_print_stream_port_prio(tab); + dcb_app_print_dgram_port_prio(tab); + dcb_app_print_port_prio(tab); +} + +static int dcb_app_get_table_attr_cb(const struct nlattr *attr, void *data) +{ + struct dcb_app_table *tab = data; + struct dcb_app *app; + int ret; + + if (mnl_attr_get_type(attr) != DCB_ATTR_IEEE_APP) { + fprintf(stderr, "Unknown attribute in DCB_ATTR_IEEE_APP_TABLE: %d\n", + mnl_attr_get_type(attr)); + return MNL_CB_OK; + } + if (mnl_attr_get_payload_len(attr) < sizeof(struct dcb_app)) { + fprintf(stderr, "DCB_ATTR_IEEE_APP payload expected to have size %zd, not %d\n", + sizeof(struct dcb_app), mnl_attr_get_payload_len(attr)); + return MNL_CB_OK; + } + + app = mnl_attr_get_payload(attr); + ret = dcb_app_table_push(tab, app); + if (ret != 0) + return MNL_CB_ERROR; + + return MNL_CB_OK; +} + +static int dcb_app_get(struct dcb *dcb, const char *dev, struct dcb_app_table *tab) +{ + uint16_t payload_len; + void *payload; + int ret; + + ret = dcb_get_attribute_va(dcb, dev, DCB_ATTR_IEEE_APP_TABLE, &payload, &payload_len); + if (ret != 0) + return ret; + + ret = mnl_attr_parse_payload(payload, payload_len, dcb_app_get_table_attr_cb, tab); + if (ret != MNL_CB_OK) + return -EINVAL; + + return 0; +} + +struct dcb_app_add_del { + const struct dcb_app_table *tab; + bool (*filter)(const struct dcb_app *app); +}; + +static int dcb_app_add_del_cb(struct dcb *dcb, struct nlmsghdr *nlh, void *data) +{ + struct dcb_app_add_del *add_del = data; + struct nlattr *nest; + size_t i; + + nest = mnl_attr_nest_start(nlh, DCB_ATTR_IEEE_APP_TABLE); + + for (i = 0; i < add_del->tab->n_apps; i++) { + const struct dcb_app *app = &add_del->tab->apps[i]; + + if (add_del->filter == NULL || add_del->filter(app)) + mnl_attr_put(nlh, DCB_ATTR_IEEE_APP, sizeof(*app), app); + } + + mnl_attr_nest_end(nlh, nest); + return 0; +} + +static int dcb_app_add_del(struct dcb *dcb, const char *dev, int command, + const struct dcb_app_table *tab, + bool (*filter)(const struct dcb_app *)) +{ + struct dcb_app_add_del add_del = { + .tab = tab, + .filter = filter, + }; + + if (tab->n_apps == 0) + return 0; + + return dcb_set_attribute_va(dcb, command, dev, dcb_app_add_del_cb, &add_del); +} + +static int dcb_cmd_app_parse_add_del(struct dcb *dcb, const char *dev, + int argc, char **argv, struct dcb_app_table *tab) +{ + struct dcb_app_parse_mapping pm = { + .tab = tab, + }; + int ret; + + if (!argc) { + dcb_app_help_add(); + return 0; + } + + do { + if (matches(*argv, "help") == 0) { + dcb_app_help_add(); + return 0; + } else if (matches(*argv, "ethtype-prio") == 0) { + NEXT_ARG(); + pm.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE; + ret = parse_mapping(&argc, &argv, false, + &dcb_app_parse_mapping_ethtype_prio, + &pm); + } else if (matches(*argv, "default-prio") == 0) { + NEXT_ARG(); + ret = dcb_app_parse_default_prio(&argc, &argv, pm.tab); + if (ret != 0) { + fprintf(stderr, "Invalid default priority %s\n", *argv); + return ret; + } + } else if (matches(*argv, "dscp-prio") == 0) { + NEXT_ARG(); + pm.selector = IEEE_8021QAZ_APP_SEL_DSCP; + ret = parse_mapping_gen(&argc, &argv, + &dcb_app_parse_dscp, + &dcb_app_parse_mapping_dscp_prio, + &pm); + } else if (matches(*argv, "stream-port-prio") == 0) { + NEXT_ARG(); + pm.selector = IEEE_8021QAZ_APP_SEL_STREAM; + ret = parse_mapping(&argc, &argv, false, + &dcb_app_parse_mapping_port_prio, + &pm); + } else if (matches(*argv, "dgram-port-prio") == 0) { + NEXT_ARG(); + pm.selector = IEEE_8021QAZ_APP_SEL_DGRAM; + ret = parse_mapping(&argc, &argv, false, + &dcb_app_parse_mapping_port_prio, + &pm); + } else if (matches(*argv, "port-prio") == 0) { + NEXT_ARG(); + pm.selector = IEEE_8021QAZ_APP_SEL_ANY; + ret = parse_mapping(&argc, &argv, false, + &dcb_app_parse_mapping_port_prio, + &pm); + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_app_help_add(); + return -EINVAL; + } + + if (ret != 0) { + fprintf(stderr, "Invalid mapping %s\n", *argv); + return ret; + } + if (pm.err) + return pm.err; + } while (argc > 0); + + return 0; +} + +static int dcb_cmd_app_add(struct dcb *dcb, const char *dev, int argc, char **argv) +{ + struct dcb_app_table tab = {}; + int ret; + + ret = dcb_cmd_app_parse_add_del(dcb, dev, argc, argv, &tab); + if (ret != 0) + return ret; + + ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_SET, &tab, NULL); + dcb_app_table_fini(&tab); + return ret; +} + +static int dcb_cmd_app_del(struct dcb *dcb, const char *dev, int argc, char **argv) +{ + struct dcb_app_table tab = {}; + int ret; + + ret = dcb_cmd_app_parse_add_del(dcb, dev, argc, argv, &tab); + if (ret != 0) + return ret; + + ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab, NULL); + dcb_app_table_fini(&tab); + return ret; +} + +static int dcb_cmd_app_show(struct dcb *dcb, const char *dev, int argc, char **argv) +{ + struct dcb_app_table tab = {}; + int ret; + + ret = dcb_app_get(dcb, dev, &tab); + if (ret != 0) + return ret; + + dcb_app_table_sort(&tab); + + open_json_object(NULL); + + if (!argc) { + dcb_app_print(dcb, &tab); + goto out; + } + + do { + if (matches(*argv, "help") == 0) { + dcb_app_help_show_flush(); + goto out; + } else if (matches(*argv, "ethtype-prio") == 0) { + dcb_app_print_ethtype_prio(&tab); + } else if (matches(*argv, "dscp-prio") == 0) { + dcb_app_print_dscp_prio(dcb, &tab); + } else if (matches(*argv, "stream-port-prio") == 0) { + dcb_app_print_stream_port_prio(&tab); + } else if (matches(*argv, "dgram-port-prio") == 0) { + dcb_app_print_dgram_port_prio(&tab); + } else if (matches(*argv, "port-prio") == 0) { + dcb_app_print_port_prio(&tab); + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_app_help_show_flush(); + ret = -EINVAL; + goto out; + } + + NEXT_ARG_FWD(); + } while (argc > 0); + +out: + close_json_object(); + dcb_app_table_fini(&tab); + return 0; +} + +static int dcb_cmd_app_flush(struct dcb *dcb, const char *dev, int argc, char **argv) +{ + struct dcb_app_table tab = {}; + int ret; + + ret = dcb_app_get(dcb, dev, &tab); + if (ret != 0) + return ret; + + if (!argc) { + ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab, NULL); + goto out; + } + + do { + if (matches(*argv, "help") == 0) { + dcb_app_help_show_flush(); + goto out; + } else if (matches(*argv, "ethtype-prio") == 0) { + ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab, + &dcb_app_is_ethtype); + if (ret != 0) + goto out; + } else if (matches(*argv, "default-prio") == 0) { + ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab, + &dcb_app_is_default); + if (ret != 0) + goto out; + } else if (matches(*argv, "dscp-prio") == 0) { + ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab, + &dcb_app_is_dscp); + if (ret != 0) + goto out; + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_app_help_show_flush(); + ret = -EINVAL; + goto out; + } + + NEXT_ARG_FWD(); + } while (argc > 0); + +out: + dcb_app_table_fini(&tab); + return ret; +} + +static int dcb_cmd_app_replace(struct dcb *dcb, const char *dev, int argc, char **argv) +{ + struct dcb_app_table orig = {}; + struct dcb_app_table tab = {}; + struct dcb_app_table new = {}; + int ret; + + ret = dcb_app_get(dcb, dev, &orig); + if (ret != 0) + return ret; + + ret = dcb_cmd_app_parse_add_del(dcb, dev, argc, argv, &tab); + if (ret != 0) + goto out; + + /* Attempts to add an existing entry would be rejected, so drop + * these entries from tab. + */ + ret = dcb_app_table_copy(&new, &tab); + if (ret != 0) + goto out; + dcb_app_table_remove_existing(&new, &orig); + + ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_SET, &new, NULL); + if (ret != 0) { + fprintf(stderr, "Could not add new APP entries\n"); + goto out; + } + + /* Remove the obsolete entries. */ + dcb_app_table_remove_replaced(&orig, &tab); + ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &orig, NULL); + if (ret != 0) { + fprintf(stderr, "Could not remove replaced APP entries\n"); + goto out; + } + +out: + dcb_app_table_fini(&new); + dcb_app_table_fini(&tab); + dcb_app_table_fini(&orig); + return 0; +} + +int dcb_cmd_app(struct dcb *dcb, int argc, char **argv) +{ + if (!argc || matches(*argv, "help") == 0) { + dcb_app_help(); + return 0; + } else if (matches(*argv, "show") == 0) { + NEXT_ARG_FWD(); + return dcb_cmd_parse_dev(dcb, argc, argv, + dcb_cmd_app_show, dcb_app_help_show_flush); + } else if (matches(*argv, "flush") == 0) { + NEXT_ARG_FWD(); + return dcb_cmd_parse_dev(dcb, argc, argv, + dcb_cmd_app_flush, dcb_app_help_show_flush); + } else if (matches(*argv, "add") == 0) { + NEXT_ARG_FWD(); + return dcb_cmd_parse_dev(dcb, argc, argv, + dcb_cmd_app_add, dcb_app_help_add); + } else if (matches(*argv, "del") == 0) { + NEXT_ARG_FWD(); + return dcb_cmd_parse_dev(dcb, argc, argv, + dcb_cmd_app_del, dcb_app_help_add); + } else if (matches(*argv, "replace") == 0) { + NEXT_ARG_FWD(); + return dcb_cmd_parse_dev(dcb, argc, argv, + dcb_cmd_app_replace, dcb_app_help_add); + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_app_help(); + return -EINVAL; + } +} diff --git a/dcb/dcb_buffer.c b/dcb/dcb_buffer.c new file mode 100644 index 0000000..e6a88a0 --- /dev/null +++ b/dcb/dcb_buffer.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include +#include +#include +#include + +#include "dcb.h" +#include "utils.h" + +static void dcb_buffer_help_set(void) +{ + fprintf(stderr, + "Usage: dcb buffer set dev STRING\n" + " [ prio-buffer PRIO-MAP ]\n" + " [ buffer-size SIZE-MAP ]\n" + "\n" + " where PRIO-MAP := [ PRIO-MAP ] PRIO-MAPPING\n" + " PRIO-MAPPING := { all | PRIO }:BUFFER\n" + " SIZE-MAP := [ SIZE-MAP ] SIZE-MAPPING\n" + " SIZE-MAPPING := { all | BUFFER }:INTEGER\n" + " PRIO := { 0 .. 7 }\n" + " BUFFER := { 0 .. 7 }\n" + "\n" + ); +} + +static void dcb_buffer_help_show(void) +{ + fprintf(stderr, + "Usage: dcb buffer show dev STRING\n" + " [ prio-buffer ] [ buffer-size ] [ total-size ]\n" + "\n" + ); +} + +static void dcb_buffer_help(void) +{ + fprintf(stderr, + "Usage: dcb buffer help\n" + "\n" + ); + dcb_buffer_help_show(); + dcb_buffer_help_set(); +} + +static int dcb_buffer_parse_mapping_prio_buffer(__u32 key, char *value, void *data) +{ + struct dcbnl_buffer *buffer = data; + __u8 buf; + + if (get_u8(&buf, value, 0)) + return -EINVAL; + + return dcb_parse_mapping("PRIO", key, IEEE_8021Q_MAX_PRIORITIES - 1, + "BUFFER", buf, DCBX_MAX_BUFFERS - 1, + dcb_set_u8, buffer->prio2buffer); +} + +static int dcb_buffer_parse_mapping_buffer_size(__u32 key, char *value, void *data) +{ + struct dcbnl_buffer *buffer = data; + unsigned int size; + + if (get_size(&size, value)) { + fprintf(stderr, "%d:%s: Illegal value for buffer size\n", key, value); + return -EINVAL; + } + + return dcb_parse_mapping("BUFFER", key, DCBX_MAX_BUFFERS - 1, + "INTEGER", size, -1, + dcb_set_u32, buffer->buffer_size); +} + +static void dcb_buffer_print_total_size(const struct dcbnl_buffer *buffer) +{ + print_size(PRINT_ANY, "total_size", "total-size %s ", buffer->total_size); +} + +static void dcb_buffer_print_prio_buffer(const struct dcbnl_buffer *buffer) +{ + dcb_print_named_array("prio_buffer", "prio-buffer", + buffer->prio2buffer, ARRAY_SIZE(buffer->prio2buffer), + dcb_print_array_u8); +} + +static void dcb_buffer_print_buffer_size(const struct dcbnl_buffer *buffer) +{ + size_t size = ARRAY_SIZE(buffer->buffer_size); + SPRINT_BUF(b); + size_t i; + + open_json_array(PRINT_JSON, "buffer_size"); + print_string(PRINT_FP, NULL, "buffer-size ", NULL); + + for (i = 0; i < size; i++) { + snprintf(b, sizeof(b), "%zd:%%s ", i); + print_size(PRINT_ANY, NULL, b, buffer->buffer_size[i]); + } + + close_json_array(PRINT_JSON, "buffer_size"); +} + +static void dcb_buffer_print(const struct dcbnl_buffer *buffer) +{ + dcb_buffer_print_prio_buffer(buffer); + print_nl(); + + dcb_buffer_print_buffer_size(buffer); + print_nl(); + + dcb_buffer_print_total_size(buffer); + print_nl(); +} + +static int dcb_buffer_get(struct dcb *dcb, const char *dev, struct dcbnl_buffer *buffer) +{ + return dcb_get_attribute(dcb, dev, DCB_ATTR_DCB_BUFFER, buffer, sizeof(*buffer)); +} + +static int dcb_buffer_set(struct dcb *dcb, const char *dev, const struct dcbnl_buffer *buffer) +{ + return dcb_set_attribute(dcb, dev, DCB_ATTR_DCB_BUFFER, buffer, sizeof(*buffer)); +} + +static int dcb_cmd_buffer_set(struct dcb *dcb, const char *dev, int argc, char **argv) +{ + struct dcbnl_buffer buffer; + int ret; + + if (!argc) { + dcb_buffer_help_set(); + return 0; + } + + ret = dcb_buffer_get(dcb, dev, &buffer); + if (ret) + return ret; + + do { + if (matches(*argv, "help") == 0) { + dcb_buffer_help_set(); + return 0; + } else if (matches(*argv, "prio-buffer") == 0) { + NEXT_ARG(); + ret = parse_mapping(&argc, &argv, true, + &dcb_buffer_parse_mapping_prio_buffer, &buffer); + if (ret) { + fprintf(stderr, "Invalid priority mapping %s\n", *argv); + return ret; + } + continue; + } else if (matches(*argv, "buffer-size") == 0) { + NEXT_ARG(); + ret = parse_mapping(&argc, &argv, true, + &dcb_buffer_parse_mapping_buffer_size, &buffer); + if (ret) { + fprintf(stderr, "Invalid buffer size mapping %s\n", *argv); + return ret; + } + continue; + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_buffer_help_set(); + return -EINVAL; + } + + NEXT_ARG_FWD(); + } while (argc > 0); + + return dcb_buffer_set(dcb, dev, &buffer); +} + +static int dcb_cmd_buffer_show(struct dcb *dcb, const char *dev, int argc, char **argv) +{ + struct dcbnl_buffer buffer; + int ret; + + ret = dcb_buffer_get(dcb, dev, &buffer); + if (ret) + return ret; + + open_json_object(NULL); + + if (!argc) { + dcb_buffer_print(&buffer); + goto out; + } + + do { + if (matches(*argv, "help") == 0) { + dcb_buffer_help_show(); + return 0; + } else if (matches(*argv, "prio-buffer") == 0) { + dcb_buffer_print_prio_buffer(&buffer); + print_nl(); + } else if (matches(*argv, "buffer-size") == 0) { + dcb_buffer_print_buffer_size(&buffer); + print_nl(); + } else if (matches(*argv, "total-size") == 0) { + dcb_buffer_print_total_size(&buffer); + print_nl(); + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_buffer_help_show(); + return -EINVAL; + } + + NEXT_ARG_FWD(); + } while (argc > 0); + +out: + close_json_object(); + return 0; +} + +int dcb_cmd_buffer(struct dcb *dcb, int argc, char **argv) +{ + if (!argc || matches(*argv, "help") == 0) { + dcb_buffer_help(); + return 0; + } else if (matches(*argv, "show") == 0) { + NEXT_ARG_FWD(); + return dcb_cmd_parse_dev(dcb, argc, argv, + dcb_cmd_buffer_show, dcb_buffer_help_show); + } else if (matches(*argv, "set") == 0) { + NEXT_ARG_FWD(); + return dcb_cmd_parse_dev(dcb, argc, argv, + dcb_cmd_buffer_set, dcb_buffer_help_set); + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_buffer_help(); + return -EINVAL; + } +} diff --git a/dcb/dcb_dcbx.c b/dcb/dcb_dcbx.c new file mode 100644 index 0000000..244b671 --- /dev/null +++ b/dcb/dcb_dcbx.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include +#include +#include +#include + +#include "dcb.h" +#include "utils.h" + +static void dcb_dcbx_help_set(void) +{ + fprintf(stderr, + "Usage: dcb dcbx set dev STRING\n" + " [ host | lld-managed ]\n" + " [ cee | ieee ] [ static ]\n" + "\n" + ); +} + +static void dcb_dcbx_help_show(void) +{ + fprintf(stderr, + "Usage: dcb dcbx show dev STRING\n" + "\n" + ); +} + +static void dcb_dcbx_help(void) +{ + fprintf(stderr, + "Usage: dcb dcbx help\n" + "\n" + ); + dcb_dcbx_help_show(); + dcb_dcbx_help_set(); +} + +struct dcb_dcbx_flag { + __u8 value; + const char *key_fp; + const char *key_json; +}; + +static struct dcb_dcbx_flag dcb_dcbx_flags[] = { + {DCB_CAP_DCBX_HOST, "host"}, + {DCB_CAP_DCBX_LLD_MANAGED, "lld-managed", "lld_managed"}, + {DCB_CAP_DCBX_VER_CEE, "cee"}, + {DCB_CAP_DCBX_VER_IEEE, "ieee"}, + {DCB_CAP_DCBX_STATIC, "static"}, +}; + +static void dcb_dcbx_print(__u8 dcbx) +{ + int bit; + int i; + + while ((bit = ffs(dcbx))) { + bool found = false; + + bit--; + for (i = 0; i < ARRAY_SIZE(dcb_dcbx_flags); i++) { + struct dcb_dcbx_flag *flag = &dcb_dcbx_flags[i]; + + if (flag->value == 1 << bit) { + print_bool(PRINT_JSON, flag->key_json ?: flag->key_fp, + NULL, true); + print_string(PRINT_FP, NULL, "%s ", flag->key_fp); + found = true; + break; + } + } + + if (!found) + fprintf(stderr, "Unknown DCBX bit %#x.\n", 1 << bit); + + dcbx &= ~(1 << bit); + } + + print_nl(); +} + +static int dcb_dcbx_get(struct dcb *dcb, const char *dev, __u8 *dcbx) +{ + __u16 payload_len; + void *payload; + int err; + + err = dcb_get_attribute_bare(dcb, DCB_CMD_IEEE_GET, dev, DCB_ATTR_DCBX, + &payload, &payload_len); + if (err != 0) + return err; + + if (payload_len != 1) { + fprintf(stderr, "DCB_ATTR_DCBX payload has size %d, expected 1.\n", + payload_len); + return -EINVAL; + } + *dcbx = *(__u8 *) payload; + return 0; +} + +static int dcb_dcbx_set(struct dcb *dcb, const char *dev, __u8 dcbx) +{ + return dcb_set_attribute_bare(dcb, DCB_CMD_SDCBX, dev, DCB_ATTR_DCBX, + &dcbx, 1, DCB_ATTR_DCBX); +} + +static int dcb_cmd_dcbx_set(struct dcb *dcb, const char *dev, int argc, char **argv) +{ + __u8 dcbx = 0; + __u8 i; + + if (!argc) { + dcb_dcbx_help_set(); + return 0; + } + + do { + if (matches(*argv, "help") == 0) { + dcb_dcbx_help_set(); + return 0; + } + + for (i = 0; i < ARRAY_SIZE(dcb_dcbx_flags); i++) { + struct dcb_dcbx_flag *flag = &dcb_dcbx_flags[i]; + + if (matches(*argv, flag->key_fp) == 0) { + dcbx |= flag->value; + NEXT_ARG_FWD(); + goto next; + } + } + + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_dcbx_help_set(); + return -EINVAL; + +next: + ; + } while (argc > 0); + + return dcb_dcbx_set(dcb, dev, dcbx); +} + +static int dcb_cmd_dcbx_show(struct dcb *dcb, const char *dev, int argc, char **argv) +{ + __u8 dcbx; + int ret; + + ret = dcb_dcbx_get(dcb, dev, &dcbx); + if (ret != 0) + return ret; + + while (argc > 0) { + if (matches(*argv, "help") == 0) { + dcb_dcbx_help_show(); + return 0; + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_dcbx_help_show(); + return -EINVAL; + } + + NEXT_ARG_FWD(); + } + + open_json_object(NULL); + dcb_dcbx_print(dcbx); + close_json_object(); + return 0; +} + +int dcb_cmd_dcbx(struct dcb *dcb, int argc, char **argv) +{ + if (!argc || matches(*argv, "help") == 0) { + dcb_dcbx_help(); + return 0; + } else if (matches(*argv, "show") == 0) { + NEXT_ARG_FWD(); + return dcb_cmd_parse_dev(dcb, argc, argv, + dcb_cmd_dcbx_show, dcb_dcbx_help_show); + } else if (matches(*argv, "set") == 0) { + NEXT_ARG_FWD(); + return dcb_cmd_parse_dev(dcb, argc, argv, + dcb_cmd_dcbx_set, dcb_dcbx_help_set); + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_dcbx_help(); + return -EINVAL; + } +} diff --git a/dcb/dcb_ets.c b/dcb/dcb_ets.c new file mode 100644 index 0000000..c208810 --- /dev/null +++ b/dcb/dcb_ets.c @@ -0,0 +1,435 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include +#include +#include + +#include "dcb.h" +#include "utils.h" + +static void dcb_ets_help_set(void) +{ + fprintf(stderr, + "Usage: dcb ets set dev STRING\n" + " [ willing { on | off } ]\n" + " [ { tc-tsa | reco-tc-tsa } TSA-MAP ]\n" + " [ { pg-bw | tc-bw | reco-tc-bw } BW-MAP ]\n" + " [ { prio-tc | reco-prio-tc } PRIO-MAP ]\n" + "\n" + " where TSA-MAP := [ TSA-MAP ] TSA-MAPPING\n" + " TSA-MAPPING := { all | TC }:{ strict | cbs | ets | vendor }\n" + " BW-MAP := [ BW-MAP ] BW-MAPPING\n" + " BW-MAPPING := { all | TC }:INTEGER\n" + " PRIO-MAP := [ PRIO-MAP ] PRIO-MAPPING\n" + " PRIO-MAPPING := { all | PRIO }:TC\n" + " TC := { 0 .. 7 }\n" + " PRIO := { 0 .. 7 }\n" + "\n" + ); +} + +static void dcb_ets_help_show(void) +{ + fprintf(stderr, + "Usage: dcb ets show dev STRING\n" + " [ willing ] [ ets-cap ] [ cbs ] [ tc-tsa ]\n" + " [ reco-tc-tsa ] [ pg-bw ] [ tc-bw ] [ reco-tc-bw ]\n" + " [ prio-tc ] [ reco-prio-tc ]\n" + "\n" + ); +} + +static void dcb_ets_help(void) +{ + fprintf(stderr, + "Usage: dcb ets help\n" + "\n" + ); + dcb_ets_help_show(); + dcb_ets_help_set(); +} + +static const char *const tsa_names[] = { + [IEEE_8021QAZ_TSA_STRICT] = "strict", + [IEEE_8021QAZ_TSA_CB_SHAPER] = "cbs", + [IEEE_8021QAZ_TSA_ETS] = "ets", + [IEEE_8021QAZ_TSA_VENDOR] = "vendor", +}; + +static int dcb_ets_parse_mapping_tc_tsa(__u32 key, char *value, void *data) +{ + __u8 tsa; + int ret; + + tsa = parse_one_of("TSA", value, tsa_names, ARRAY_SIZE(tsa_names), &ret); + if (ret) + return ret; + + return dcb_parse_mapping("TC", key, IEEE_8021QAZ_MAX_TCS - 1, + "TSA", tsa, -1U, + dcb_set_u8, data); +} + +static int dcb_ets_parse_mapping_tc_bw(__u32 key, char *value, void *data) +{ + __u8 bw; + + if (get_u8(&bw, value, 0)) + return -EINVAL; + + return dcb_parse_mapping("TC", key, IEEE_8021QAZ_MAX_TCS - 1, + "BW", bw, 100, + dcb_set_u8, data); +} + +static int dcb_ets_parse_mapping_prio_tc(unsigned int key, char *value, void *data) +{ + __u8 tc; + + if (get_u8(&tc, value, 0)) + return -EINVAL; + + return dcb_parse_mapping("PRIO", key, IEEE_8021QAZ_MAX_TCS - 1, + "TC", tc, IEEE_8021QAZ_MAX_TCS - 1, + dcb_set_u8, data); +} + +static void dcb_print_array_tsa(const __u8 *array, size_t size) +{ + dcb_print_array_kw(array, size, tsa_names, ARRAY_SIZE(tsa_names)); +} + +static void dcb_ets_print_willing(const struct ieee_ets *ets) +{ + print_on_off(PRINT_ANY, "willing", "willing %s ", ets->willing); +} + +static void dcb_ets_print_ets_cap(const struct ieee_ets *ets) +{ + print_uint(PRINT_ANY, "ets_cap", "ets-cap %d ", ets->ets_cap); +} + +static void dcb_ets_print_cbs(const struct ieee_ets *ets) +{ + print_on_off(PRINT_ANY, "cbs", "cbs %s ", ets->cbs); +} + +static void dcb_ets_print_tc_bw(const struct ieee_ets *ets) +{ + dcb_print_named_array("tc_bw", "tc-bw", + ets->tc_tx_bw, ARRAY_SIZE(ets->tc_tx_bw), + dcb_print_array_u8); +} + +static void dcb_ets_print_pg_bw(const struct ieee_ets *ets) +{ + dcb_print_named_array("pg_bw", "pg-bw", + ets->tc_rx_bw, ARRAY_SIZE(ets->tc_rx_bw), + dcb_print_array_u8); +} + +static void dcb_ets_print_tc_tsa(const struct ieee_ets *ets) +{ + dcb_print_named_array("tc_tsa", "tc-tsa", + ets->tc_tsa, ARRAY_SIZE(ets->tc_tsa), + dcb_print_array_tsa); +} + +static void dcb_ets_print_prio_tc(const struct ieee_ets *ets) +{ + dcb_print_named_array("prio_tc", "prio-tc", + ets->prio_tc, ARRAY_SIZE(ets->prio_tc), + dcb_print_array_u8); +} + +static void dcb_ets_print_reco_tc_bw(const struct ieee_ets *ets) +{ + dcb_print_named_array("reco_tc_bw", "reco-tc-bw", + ets->tc_reco_bw, ARRAY_SIZE(ets->tc_reco_bw), + dcb_print_array_u8); +} + +static void dcb_ets_print_reco_tc_tsa(const struct ieee_ets *ets) +{ + dcb_print_named_array("reco_tc_tsa", "reco-tc-tsa", + ets->tc_reco_tsa, ARRAY_SIZE(ets->tc_reco_tsa), + dcb_print_array_tsa); +} + +static void dcb_ets_print_reco_prio_tc(const struct ieee_ets *ets) +{ + dcb_print_named_array("reco_prio_tc", "reco-prio-tc", + ets->reco_prio_tc, ARRAY_SIZE(ets->reco_prio_tc), + dcb_print_array_u8); +} + +static void dcb_ets_print(const struct ieee_ets *ets) +{ + dcb_ets_print_willing(ets); + dcb_ets_print_ets_cap(ets); + dcb_ets_print_cbs(ets); + print_nl(); + + dcb_ets_print_tc_bw(ets); + print_nl(); + + dcb_ets_print_pg_bw(ets); + print_nl(); + + dcb_ets_print_tc_tsa(ets); + print_nl(); + + dcb_ets_print_prio_tc(ets); + print_nl(); + + dcb_ets_print_reco_tc_bw(ets); + print_nl(); + + dcb_ets_print_reco_tc_tsa(ets); + print_nl(); + + dcb_ets_print_reco_prio_tc(ets); + print_nl(); +} + +static int dcb_ets_get(struct dcb *dcb, const char *dev, struct ieee_ets *ets) +{ + return dcb_get_attribute(dcb, dev, DCB_ATTR_IEEE_ETS, ets, sizeof(*ets)); +} + +static int dcb_ets_validate_bw(const __u8 bw[], const __u8 tsa[], const char *what) +{ + bool has_ets = false; + unsigned int total = 0; + unsigned int tc; + + for (tc = 0; tc < IEEE_8021QAZ_MAX_TCS; tc++) { + if (tsa[tc] == IEEE_8021QAZ_TSA_ETS) { + has_ets = true; + break; + } + } + + /* TC bandwidth is only intended for ETS, but 802.1Q-2018 only requires + * that the sum be 100, and individual entries 0..100. It explicitly + * notes that non-ETS TCs can have non-0 TC bandwidth during + * reconfiguration. + */ + for (tc = 0; tc < IEEE_8021QAZ_MAX_TCS; tc++) { + if (bw[tc] > 100) { + fprintf(stderr, "%d%% for TC %d of %s is not a valid bandwidth percentage, expected 0..100%%\n", + bw[tc], tc, what); + return -EINVAL; + } + total += bw[tc]; + } + + /* This is what 802.1Q-2018 requires. */ + if (total == 100) + return 0; + + /* But this requirement does not make sense for all-strict + * configurations. Anything else than 0 does not make sense: either BW + * has not been reconfigured for the all-strict allocation yet, at which + * point we expect sum of 100. Or it has already been reconfigured, at + * which point accept 0. + */ + if (!has_ets && total == 0) + return 0; + + fprintf(stderr, "Bandwidth percentages in %s sum to %d%%, expected %d%%\n", + what, total, has_ets ? 100 : 0); + return -EINVAL; +} + +static int dcb_ets_set(struct dcb *dcb, const char *dev, const struct ieee_ets *ets) +{ + /* Do not validate pg-bw, which is not standard and has unclear + * meaning. + */ + if (dcb_ets_validate_bw(ets->tc_tx_bw, ets->tc_tsa, "tc-bw") || + dcb_ets_validate_bw(ets->tc_reco_bw, ets->tc_reco_tsa, "reco-tc-bw")) + return -EINVAL; + + return dcb_set_attribute(dcb, dev, DCB_ATTR_IEEE_ETS, ets, sizeof(*ets)); +} + +static int dcb_cmd_ets_set(struct dcb *dcb, const char *dev, int argc, char **argv) +{ + struct ieee_ets ets; + int ret; + + if (!argc) { + dcb_ets_help_set(); + return 1; + } + + ret = dcb_ets_get(dcb, dev, &ets); + if (ret) + return ret; + + do { + if (matches(*argv, "help") == 0) { + dcb_ets_help_set(); + return 0; + } else if (matches(*argv, "willing") == 0) { + NEXT_ARG(); + ets.willing = parse_on_off("willing", *argv, &ret); + if (ret) + return ret; + } else if (matches(*argv, "tc-tsa") == 0) { + NEXT_ARG(); + ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_tsa, + ets.tc_tsa); + if (ret) { + fprintf(stderr, "Invalid tc-tsa mapping %s\n", *argv); + return ret; + } + continue; + } else if (matches(*argv, "reco-tc-tsa") == 0) { + NEXT_ARG(); + ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_tsa, + ets.tc_reco_tsa); + if (ret) { + fprintf(stderr, "Invalid reco-tc-tsa mapping %s\n", *argv); + return ret; + } + continue; + } else if (matches(*argv, "tc-bw") == 0) { + NEXT_ARG(); + ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_bw, + ets.tc_tx_bw); + if (ret) { + fprintf(stderr, "Invalid tc-bw mapping %s\n", *argv); + return ret; + } + continue; + } else if (matches(*argv, "pg-bw") == 0) { + NEXT_ARG(); + ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_bw, + ets.tc_rx_bw); + if (ret) { + fprintf(stderr, "Invalid pg-bw mapping %s\n", *argv); + return ret; + } + continue; + } else if (matches(*argv, "reco-tc-bw") == 0) { + NEXT_ARG(); + ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_bw, + ets.tc_reco_bw); + if (ret) { + fprintf(stderr, "Invalid reco-tc-bw mapping %s\n", *argv); + return ret; + } + continue; + } else if (matches(*argv, "prio-tc") == 0) { + NEXT_ARG(); + ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_prio_tc, + ets.prio_tc); + if (ret) { + fprintf(stderr, "Invalid prio-tc mapping %s\n", *argv); + return ret; + } + continue; + } else if (matches(*argv, "reco-prio-tc") == 0) { + NEXT_ARG(); + ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_prio_tc, + ets.reco_prio_tc); + if (ret) { + fprintf(stderr, "Invalid reco-prio-tc mapping %s\n", *argv); + return ret; + } + continue; + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_ets_help_set(); + return -EINVAL; + } + + NEXT_ARG_FWD(); + } while (argc > 0); + + return dcb_ets_set(dcb, dev, &ets); +} + +static int dcb_cmd_ets_show(struct dcb *dcb, const char *dev, int argc, char **argv) +{ + struct ieee_ets ets; + int ret; + + ret = dcb_ets_get(dcb, dev, &ets); + if (ret) + return ret; + + open_json_object(NULL); + + if (!argc) { + dcb_ets_print(&ets); + goto out; + } + + do { + if (matches(*argv, "help") == 0) { + dcb_ets_help_show(); + return 0; + } else if (matches(*argv, "willing") == 0) { + dcb_ets_print_willing(&ets); + print_nl(); + } else if (matches(*argv, "ets-cap") == 0) { + dcb_ets_print_ets_cap(&ets); + print_nl(); + } else if (matches(*argv, "cbs") == 0) { + dcb_ets_print_cbs(&ets); + print_nl(); + } else if (matches(*argv, "tc-tsa") == 0) { + dcb_ets_print_tc_tsa(&ets); + print_nl(); + } else if (matches(*argv, "reco-tc-tsa") == 0) { + dcb_ets_print_reco_tc_tsa(&ets); + print_nl(); + } else if (matches(*argv, "tc-bw") == 0) { + dcb_ets_print_tc_bw(&ets); + print_nl(); + } else if (matches(*argv, "pg-bw") == 0) { + dcb_ets_print_pg_bw(&ets); + print_nl(); + } else if (matches(*argv, "reco-tc-bw") == 0) { + dcb_ets_print_reco_tc_bw(&ets); + print_nl(); + } else if (matches(*argv, "prio-tc") == 0) { + dcb_ets_print_prio_tc(&ets); + print_nl(); + } else if (matches(*argv, "reco-prio-tc") == 0) { + dcb_ets_print_reco_prio_tc(&ets); + print_nl(); + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_ets_help_show(); + return -EINVAL; + } + + NEXT_ARG_FWD(); + } while (argc > 0); + +out: + close_json_object(); + return 0; +} + +int dcb_cmd_ets(struct dcb *dcb, int argc, char **argv) +{ + if (!argc || matches(*argv, "help") == 0) { + dcb_ets_help(); + return 0; + } else if (matches(*argv, "show") == 0) { + NEXT_ARG_FWD(); + return dcb_cmd_parse_dev(dcb, argc, argv, dcb_cmd_ets_show, dcb_ets_help_show); + } else if (matches(*argv, "set") == 0) { + NEXT_ARG_FWD(); + return dcb_cmd_parse_dev(dcb, argc, argv, dcb_cmd_ets_set, dcb_ets_help_set); + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_ets_help(); + return -EINVAL; + } +} diff --git a/dcb/dcb_maxrate.c b/dcb/dcb_maxrate.c new file mode 100644 index 0000000..1538c6d --- /dev/null +++ b/dcb/dcb_maxrate.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include +#include +#include +#include + +#include "dcb.h" +#include "utils.h" + +static void dcb_maxrate_help_set(void) +{ + fprintf(stderr, + "Usage: dcb maxrate set dev STRING\n" + " [ tc-maxrate RATE-MAP ]\n" + "\n" + " where RATE-MAP := [ RATE-MAP ] RATE-MAPPING\n" + " RATE-MAPPING := { all | TC }:RATE\n" + " TC := { 0 .. 7 }\n" + "\n" + ); +} + +static void dcb_maxrate_help_show(void) +{ + fprintf(stderr, + "Usage: dcb [ -i ] maxrate show dev STRING\n" + " [ tc-maxrate ]\n" + "\n" + ); +} + +static void dcb_maxrate_help(void) +{ + fprintf(stderr, + "Usage: dcb maxrate help\n" + "\n" + ); + dcb_maxrate_help_show(); + dcb_maxrate_help_set(); +} + +static int dcb_maxrate_parse_mapping_tc_maxrate(__u32 key, char *value, void *data) +{ + __u64 rate; + + if (get_rate64(&rate, value)) + return -EINVAL; + + return dcb_parse_mapping("TC", key, IEEE_8021QAZ_MAX_TCS - 1, + "RATE", rate, -1, + dcb_set_u64, data); +} + +static void dcb_maxrate_print_tc_maxrate(struct dcb *dcb, const struct ieee_maxrate *maxrate) +{ + size_t size = ARRAY_SIZE(maxrate->tc_maxrate); + SPRINT_BUF(b); + size_t i; + + open_json_array(PRINT_JSON, "tc_maxrate"); + print_string(PRINT_FP, NULL, "tc-maxrate ", NULL); + + for (i = 0; i < size; i++) { + snprintf(b, sizeof(b), "%zd:%%s ", i); + print_rate(dcb->use_iec, PRINT_ANY, NULL, b, maxrate->tc_maxrate[i]); + } + + close_json_array(PRINT_JSON, "tc_maxrate"); +} + +static void dcb_maxrate_print(struct dcb *dcb, const struct ieee_maxrate *maxrate) +{ + dcb_maxrate_print_tc_maxrate(dcb, maxrate); + print_nl(); +} + +static int dcb_maxrate_get(struct dcb *dcb, const char *dev, struct ieee_maxrate *maxrate) +{ + return dcb_get_attribute(dcb, dev, DCB_ATTR_IEEE_MAXRATE, maxrate, sizeof(*maxrate)); +} + +static int dcb_maxrate_set(struct dcb *dcb, const char *dev, const struct ieee_maxrate *maxrate) +{ + return dcb_set_attribute(dcb, dev, DCB_ATTR_IEEE_MAXRATE, maxrate, sizeof(*maxrate)); +} + +static int dcb_cmd_maxrate_set(struct dcb *dcb, const char *dev, int argc, char **argv) +{ + struct ieee_maxrate maxrate; + int ret; + + if (!argc) { + dcb_maxrate_help_set(); + return 0; + } + + ret = dcb_maxrate_get(dcb, dev, &maxrate); + if (ret) + return ret; + + do { + if (matches(*argv, "help") == 0) { + dcb_maxrate_help_set(); + return 0; + } else if (matches(*argv, "tc-maxrate") == 0) { + NEXT_ARG(); + ret = parse_mapping(&argc, &argv, true, + &dcb_maxrate_parse_mapping_tc_maxrate, &maxrate); + if (ret) { + fprintf(stderr, "Invalid mapping %s\n", *argv); + return ret; + } + continue; + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_maxrate_help_set(); + return -EINVAL; + } + + NEXT_ARG_FWD(); + } while (argc > 0); + + return dcb_maxrate_set(dcb, dev, &maxrate); +} + +static int dcb_cmd_maxrate_show(struct dcb *dcb, const char *dev, int argc, char **argv) +{ + struct ieee_maxrate maxrate; + int ret; + + ret = dcb_maxrate_get(dcb, dev, &maxrate); + if (ret) + return ret; + + open_json_object(NULL); + + if (!argc) { + dcb_maxrate_print(dcb, &maxrate); + goto out; + } + + do { + if (matches(*argv, "help") == 0) { + dcb_maxrate_help_show(); + return 0; + } else if (matches(*argv, "tc-maxrate") == 0) { + dcb_maxrate_print_tc_maxrate(dcb, &maxrate); + print_nl(); + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_maxrate_help_show(); + return -EINVAL; + } + + NEXT_ARG_FWD(); + } while (argc > 0); + +out: + close_json_object(); + return 0; +} + +int dcb_cmd_maxrate(struct dcb *dcb, int argc, char **argv) +{ + if (!argc || matches(*argv, "help") == 0) { + dcb_maxrate_help(); + return 0; + } else if (matches(*argv, "show") == 0) { + NEXT_ARG_FWD(); + return dcb_cmd_parse_dev(dcb, argc, argv, + dcb_cmd_maxrate_show, dcb_maxrate_help_show); + } else if (matches(*argv, "set") == 0) { + NEXT_ARG_FWD(); + return dcb_cmd_parse_dev(dcb, argc, argv, + dcb_cmd_maxrate_set, dcb_maxrate_help_set); + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_maxrate_help(); + return -EINVAL; + } +} diff --git a/dcb/dcb_pfc.c b/dcb/dcb_pfc.c new file mode 100644 index 0000000..aaa0902 --- /dev/null +++ b/dcb/dcb_pfc.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include +#include +#include + +#include "dcb.h" +#include "utils.h" + +static void dcb_pfc_help_set(void) +{ + fprintf(stderr, + "Usage: dcb pfc set dev STRING\n" + " [ prio-pfc PFC-MAP ]\n" + " [ macsec-bypass { on | off } ]\n" + " [ delay INTEGER ]\n" + "\n" + " where PFC-MAP := [ PFC-MAP ] PFC-MAPPING\n" + " PFC-MAPPING := { all | TC }:PFC\n" + " TC := { 0 .. 7 }\n" + " PFC := { on | off }\n" + "\n" + ); +} + +static void dcb_pfc_help_show(void) +{ + fprintf(stderr, + "Usage: dcb [ -s ] pfc show dev STRING\n" + " [ pfc-cap ] [ prio-pfc ] [ macsec-bypass ]\n" + " [ delay ] [ requests ] [ indications ]\n" + "\n" + ); +} + +static void dcb_pfc_help(void) +{ + fprintf(stderr, + "Usage: dcb pfc help\n" + "\n" + ); + dcb_pfc_help_show(); + dcb_pfc_help_set(); +} + +static void dcb_pfc_to_array(__u8 array[IEEE_8021QAZ_MAX_TCS], __u8 pfc_en) +{ + int i; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + array[i] = !!(pfc_en & (1 << i)); +} + +static void dcb_pfc_from_array(__u8 array[IEEE_8021QAZ_MAX_TCS], __u8 *pfc_en_p) +{ + __u8 pfc_en = 0; + int i; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (array[i]) + pfc_en |= 1 << i; + } + + *pfc_en_p = pfc_en; +} + +static int dcb_pfc_parse_mapping_prio_pfc(__u32 key, char *value, void *data) +{ + struct ieee_pfc *pfc = data; + __u8 pfc_en[IEEE_8021QAZ_MAX_TCS]; + bool enabled; + int ret; + + dcb_pfc_to_array(pfc_en, pfc->pfc_en); + + enabled = parse_on_off("PFC", value, &ret); + if (ret) + return ret; + + ret = dcb_parse_mapping("PRIO", key, IEEE_8021QAZ_MAX_TCS - 1, + "PFC", enabled, -1, + dcb_set_u8, pfc_en); + if (ret) + return ret; + + dcb_pfc_from_array(pfc_en, &pfc->pfc_en); + return 0; +} + +static void dcb_pfc_print_pfc_cap(const struct ieee_pfc *pfc) +{ + print_uint(PRINT_ANY, "pfc_cap", "pfc-cap %d ", pfc->pfc_cap); +} + +static void dcb_pfc_print_macsec_bypass(const struct ieee_pfc *pfc) +{ + print_on_off(PRINT_ANY, "macsec_bypass", "macsec-bypass %s ", pfc->mbc); +} + +static void dcb_pfc_print_delay(const struct ieee_pfc *pfc) +{ + print_uint(PRINT_ANY, "delay", "delay %d ", pfc->delay); +} + +static void dcb_pfc_print_prio_pfc(const struct ieee_pfc *pfc) +{ + __u8 pfc_en[IEEE_8021QAZ_MAX_TCS]; + + dcb_pfc_to_array(pfc_en, pfc->pfc_en); + dcb_print_named_array("prio_pfc", "prio-pfc", + pfc_en, ARRAY_SIZE(pfc_en), &dcb_print_array_on_off); +} + +static void dcb_pfc_print_requests(const struct ieee_pfc *pfc) +{ + open_json_array(PRINT_JSON, "requests"); + print_string(PRINT_FP, NULL, "requests ", NULL); + dcb_print_array_u64(pfc->requests, ARRAY_SIZE(pfc->requests)); + close_json_array(PRINT_JSON, "requests"); +} + +static void dcb_pfc_print_indications(const struct ieee_pfc *pfc) +{ + open_json_array(PRINT_JSON, "indications"); + print_string(PRINT_FP, NULL, "indications ", NULL); + dcb_print_array_u64(pfc->indications, ARRAY_SIZE(pfc->indications)); + close_json_array(PRINT_JSON, "indications"); +} + +static void dcb_pfc_print(const struct dcb *dcb, const struct ieee_pfc *pfc) +{ + dcb_pfc_print_pfc_cap(pfc); + dcb_pfc_print_macsec_bypass(pfc); + dcb_pfc_print_delay(pfc); + print_nl(); + + dcb_pfc_print_prio_pfc(pfc); + print_nl(); + + if (dcb->stats) { + dcb_pfc_print_requests(pfc); + print_nl(); + + dcb_pfc_print_indications(pfc); + print_nl(); + } +} + +static int dcb_pfc_get(struct dcb *dcb, const char *dev, struct ieee_pfc *pfc) +{ + return dcb_get_attribute(dcb, dev, DCB_ATTR_IEEE_PFC, pfc, sizeof(*pfc)); +} + +static int dcb_pfc_set(struct dcb *dcb, const char *dev, const struct ieee_pfc *pfc) +{ + return dcb_set_attribute(dcb, dev, DCB_ATTR_IEEE_PFC, pfc, sizeof(*pfc)); +} + +static int dcb_cmd_pfc_set(struct dcb *dcb, const char *dev, int argc, char **argv) +{ + struct ieee_pfc pfc; + int ret; + + if (!argc) { + dcb_pfc_help_set(); + return 0; + } + + ret = dcb_pfc_get(dcb, dev, &pfc); + if (ret) + return ret; + + do { + if (matches(*argv, "help") == 0) { + dcb_pfc_help_set(); + return 0; + } else if (matches(*argv, "prio-pfc") == 0) { + NEXT_ARG(); + ret = parse_mapping(&argc, &argv, true, + &dcb_pfc_parse_mapping_prio_pfc, &pfc); + if (ret) { + fprintf(stderr, "Invalid pfc mapping %s\n", *argv); + return ret; + } + continue; + } else if (matches(*argv, "macsec-bypass") == 0) { + NEXT_ARG(); + pfc.mbc = parse_on_off("macsec-bypass", *argv, &ret); + if (ret) + return ret; + } else if (matches(*argv, "delay") == 0) { + NEXT_ARG(); + /* Do not support the size notations for delay. + * Delay is specified in "bit times", not bits, so + * it is not applicable. At the same time it would + * be confusing that 10Kbit does not mean 10240, + * but 1280. + */ + if (get_u16(&pfc.delay, *argv, 0)) { + fprintf(stderr, "Invalid delay `%s', expected an integer 0..65535\n", + *argv); + return -EINVAL; + } + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_pfc_help_set(); + return -EINVAL; + } + + NEXT_ARG_FWD(); + } while (argc > 0); + + return dcb_pfc_set(dcb, dev, &pfc); +} + +static int dcb_cmd_pfc_show(struct dcb *dcb, const char *dev, int argc, char **argv) +{ + struct ieee_pfc pfc; + int ret; + + ret = dcb_pfc_get(dcb, dev, &pfc); + if (ret) + return ret; + + open_json_object(NULL); + + if (!argc) { + dcb_pfc_print(dcb, &pfc); + goto out; + } + + do { + if (matches(*argv, "help") == 0) { + dcb_pfc_help_show(); + return 0; + } else if (matches(*argv, "prio-pfc") == 0) { + dcb_pfc_print_prio_pfc(&pfc); + print_nl(); + } else if (matches(*argv, "pfc-cap") == 0) { + dcb_pfc_print_pfc_cap(&pfc); + print_nl(); + } else if (matches(*argv, "macsec-bypass") == 0) { + dcb_pfc_print_macsec_bypass(&pfc); + print_nl(); + } else if (matches(*argv, "delay") == 0) { + dcb_pfc_print_delay(&pfc); + print_nl(); + } else if (matches(*argv, "requests") == 0) { + dcb_pfc_print_requests(&pfc); + print_nl(); + } else if (matches(*argv, "indications") == 0) { + dcb_pfc_print_indications(&pfc); + print_nl(); + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_pfc_help_show(); + return -EINVAL; + } + + NEXT_ARG_FWD(); + } while (argc > 0); + +out: + close_json_object(); + return 0; +} + +int dcb_cmd_pfc(struct dcb *dcb, int argc, char **argv) +{ + if (!argc || matches(*argv, "help") == 0) { + dcb_pfc_help(); + return 0; + } else if (matches(*argv, "show") == 0) { + NEXT_ARG_FWD(); + return dcb_cmd_parse_dev(dcb, argc, argv, + dcb_cmd_pfc_show, dcb_pfc_help_show); + } else if (matches(*argv, "set") == 0) { + NEXT_ARG_FWD(); + return dcb_cmd_parse_dev(dcb, argc, argv, + dcb_cmd_pfc_set, dcb_pfc_help_set); + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + dcb_pfc_help(); + return -EINVAL; + } +} diff --git a/devlink/Makefile b/devlink/Makefile index 7da7d1f..d540feb 100644 --- a/devlink/Makefile +++ b/devlink/Makefile @@ -12,7 +12,7 @@ endif all: $(TARGETS) $(LIBS) -devlink: $(DEVLINKOBJ) +devlink: $(DEVLINKOBJ) $(LIBNETLINK) $(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@ install: all diff --git a/devlink/devlink.c b/devlink/devlink.c index 007677a..faa87b3 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "version.h" @@ -302,6 +303,13 @@ static void ifname_map_free(struct ifname_map *ifname_map) #define DL_OPT_TRAP_POLICER_BURST BIT(36) #define DL_OPT_HEALTH_REPORTER_AUTO_DUMP BIT(37) #define DL_OPT_PORT_FUNCTION_HW_ADDR BIT(38) +#define DL_OPT_FLASH_OVERWRITE BIT(39) +#define DL_OPT_RELOAD_ACTION BIT(40) +#define DL_OPT_RELOAD_LIMIT BIT(41) +#define DL_OPT_PORT_FLAVOUR BIT(42) +#define DL_OPT_PORT_PFNUMBER BIT(43) +#define DL_OPT_PORT_SFNUMBER BIT(44) +#define DL_OPT_PORT_FUNCTION_STATE BIT(45) struct dl_opts { uint64_t present; /* flags of present items */ @@ -349,6 +357,13 @@ struct dl_opts { uint64_t trap_policer_burst; char port_function_hw_addr[MAX_ADDR_LEN]; uint32_t port_function_hw_addr_len; + uint32_t overwrite_mask; + enum devlink_reload_action reload_action; + enum devlink_reload_limit reload_limit; + uint32_t port_sfnumber; + uint16_t port_flavour; + uint16_t port_pfnumber; + uint8_t port_fn_state; }; struct dl { @@ -675,6 +690,15 @@ static const enum mnl_attr_data_type devlink_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_TRAP_METADATA] = MNL_TYPE_NESTED, [DEVLINK_ATTR_TRAP_GROUP_NAME] = MNL_TYPE_STRING, [DEVLINK_ATTR_RELOAD_FAILED] = MNL_TYPE_U8, + [DEVLINK_ATTR_DEV_STATS] = MNL_TYPE_NESTED, + [DEVLINK_ATTR_RELOAD_STATS] = MNL_TYPE_NESTED, + [DEVLINK_ATTR_RELOAD_STATS_ENTRY] = MNL_TYPE_NESTED, + [DEVLINK_ATTR_RELOAD_ACTION] = MNL_TYPE_U8, + [DEVLINK_ATTR_RELOAD_STATS_LIMIT] = MNL_TYPE_U8, + [DEVLINK_ATTR_RELOAD_STATS_VALUE] = MNL_TYPE_U32, + [DEVLINK_ATTR_REMOTE_RELOAD_STATS] = MNL_TYPE_NESTED, + [DEVLINK_ATTR_RELOAD_ACTION_INFO] = MNL_TYPE_NESTED, + [DEVLINK_ATTR_RELOAD_ACTION_STATS] = MNL_TYPE_NESTED, [DEVLINK_ATTR_TRAP_POLICER_ID] = MNL_TYPE_U32, [DEVLINK_ATTR_TRAP_POLICER_RATE] = MNL_TYPE_U64, [DEVLINK_ATTR_TRAP_POLICER_BURST] = MNL_TYPE_U64, @@ -725,6 +749,7 @@ static int attr_stats_cb(const struct nlattr *attr, void *data) static const enum mnl_attr_data_type devlink_function_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = { [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR ] = MNL_TYPE_BINARY, + [DEVLINK_PORT_FN_ATTR_STATE] = MNL_TYPE_U8, }; static int function_attr_cb(const struct nlattr *attr, void *data) @@ -940,7 +965,13 @@ static int strtobool(const char *str, bool *p_val) static int __dl_argv_handle(char *str, char **p_bus_name, char **p_dev_name) { - strslashrsplit(str, p_bus_name, p_dev_name); + int err; + + err = strslashrsplit(str, p_bus_name, p_dev_name); + if (err) { + pr_err("Devlink identification (\"bus_name/dev_name\") \"%s\" is invalid\n", str); + return err; + } return 0; } @@ -1285,6 +1316,19 @@ eswitch_encap_mode_get(const char *typestr, return 0; } +static int flash_overwrite_section_get(const char *sectionstr, uint32_t *mask) +{ + if (strcmp(sectionstr, "settings") == 0) { + *mask |= DEVLINK_FLASH_OVERWRITE_SETTINGS; + } else if (strcmp(sectionstr, "identifiers") == 0) { + *mask |= DEVLINK_FLASH_OVERWRITE_IDENTIFIERS; + } else { + pr_err("Unknown overwrite section \"%s\"\n", sectionstr); + return -EINVAL; + } + return 0; +} + static int param_cmode_get(const char *cmodestr, enum devlink_param_cmode *cmode) { @@ -1328,6 +1372,77 @@ static int hw_addr_parse(const char *addrstr, char *hw_addr, uint32_t *len) return 0; } +static int reload_action_get(struct dl *dl, const char *actionstr, + enum devlink_reload_action *action) +{ + if (strcmp(actionstr, "driver_reinit") == 0) { + *action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT; + } else if (strcmp(actionstr, "fw_activate") == 0) { + *action = DEVLINK_RELOAD_ACTION_FW_ACTIVATE; + } else { + pr_err("Unknown reload action \"%s\"\n", actionstr); + return -EINVAL; + } + return 0; +} + +static int reload_limit_get(struct dl *dl, const char *limitstr, + enum devlink_reload_limit *limit) +{ + if (strcmp(limitstr, "no_reset") == 0) { + *limit = DEVLINK_RELOAD_LIMIT_NO_RESET; + } else { + pr_err("Unknown reload limit \"%s\"\n", limitstr); + return -EINVAL; + } + return 0; +} + +static struct str_num_map port_flavour_map[] = { + { .str = "physical", .num = DEVLINK_PORT_FLAVOUR_PHYSICAL }, + { .str = "cpu", .num = DEVLINK_PORT_FLAVOUR_CPU }, + { .str = "dsa", .num = DEVLINK_PORT_FLAVOUR_DSA }, + { .str = "pcipf", .num = DEVLINK_PORT_FLAVOUR_PCI_PF }, + { .str = "pcivf", .num = DEVLINK_PORT_FLAVOUR_PCI_VF }, + { .str = "pcisf", .num = DEVLINK_PORT_FLAVOUR_PCI_SF }, + { .str = "virtual", .num = DEVLINK_PORT_FLAVOUR_VIRTUAL}, + { .str = NULL, }, +}; + +static struct str_num_map port_fn_state_map[] = { + { .str = "inactive", .num = DEVLINK_PORT_FN_STATE_INACTIVE}, + { .str = "active", .num = DEVLINK_PORT_FN_STATE_ACTIVE }, + { .str = NULL, } +}; + +static struct str_num_map port_fn_opstate_map[] = { + { .str = "attached", .num = DEVLINK_PORT_FN_OPSTATE_ATTACHED}, + { .str = "detached", .num = DEVLINK_PORT_FN_OPSTATE_DETACHED}, + { .str = NULL, } +}; + +static int port_flavour_parse(const char *flavour, uint16_t *value) +{ + int num; + + num = str_map_lookup_str(port_flavour_map, flavour); + if (num < 0) + return num; + *value = num; + return 0; +} + +static int port_fn_state_parse(const char *statestr, uint8_t *state) +{ + int num; + + num = str_map_lookup_str(port_fn_state_map, statestr); + if (num < 0) + return num; + *state = num; + return 0; +} + struct dl_args_metadata { uint64_t o_flag; char err_msg[DL_ARGS_REQUIRED_MAX_ERR_LEN]; @@ -1359,6 +1474,8 @@ static const struct dl_args_metadata dl_args_required[] = { {DL_OPT_TRAP_NAME, "Trap's name is expected."}, {DL_OPT_TRAP_GROUP_NAME, "Trap group's name is expected."}, {DL_OPT_PORT_FUNCTION_HW_ADDR, "Port function's hardware address is expected."}, + {DL_OPT_PORT_FLAVOUR, "Port flavour is expected."}, + {DL_OPT_PORT_PFNUMBER, "Port PCI PF number is expected."}, }; static int dl_args_finding_required_validate(uint64_t o_required, @@ -1627,6 +1744,21 @@ static int dl_argv_parse(struct dl *dl, uint64_t o_required, if (err) return err; o_found |= DL_OPT_FLASH_COMPONENT; + + } else if (dl_argv_match(dl, "overwrite") && + (o_all & DL_OPT_FLASH_OVERWRITE)) { + const char *sectionstr; + + dl_arg_inc(dl); + err = dl_argv_str(dl, §ionstr); + if(err) + return err; + err = flash_overwrite_section_get(sectionstr, + &opts->overwrite_mask); + if (err) + return err; + o_found |= DL_OPT_FLASH_OVERWRITE; + } else if (dl_argv_match(dl, "reporter") && (o_all & DL_OPT_HEALTH_REPORTER_NAME)) { dl_arg_inc(dl); @@ -1699,6 +1831,30 @@ static int dl_argv_parse(struct dl *dl, uint64_t o_required, opts->netns_is_pid = true; } o_found |= DL_OPT_NETNS; + } else if (dl_argv_match(dl, "action") && + (o_all & DL_OPT_RELOAD_ACTION)) { + const char *actionstr; + + dl_arg_inc(dl); + err = dl_argv_str(dl, &actionstr); + if (err) + return err; + err = reload_action_get(dl, actionstr, &opts->reload_action); + if (err) + return err; + o_found |= DL_OPT_RELOAD_ACTION; + } else if (dl_argv_match(dl, "limit") && + (o_all & DL_OPT_RELOAD_LIMIT)) { + const char *limitstr; + + dl_arg_inc(dl); + err = dl_argv_str(dl, &limitstr); + if (err) + return err; + err = reload_limit_get(dl, limitstr, &opts->reload_limit); + if (err) + return err; + o_found |= DL_OPT_RELOAD_LIMIT; } else if (dl_argv_match(dl, "policer") && (o_all & DL_OPT_TRAP_POLICER_ID)) { dl_arg_inc(dl); @@ -1738,7 +1894,42 @@ static int dl_argv_parse(struct dl *dl, uint64_t o_required, if (err) return err; o_found |= DL_OPT_PORT_FUNCTION_HW_ADDR; + } else if (dl_argv_match(dl, "state") && + (o_all & DL_OPT_PORT_FUNCTION_STATE)) { + const char *statestr; + + dl_arg_inc(dl); + err = dl_argv_str(dl, &statestr); + if (err) + return err; + err = port_fn_state_parse(statestr, &opts->port_fn_state); + if (err) + return err; + + o_found |= DL_OPT_PORT_FUNCTION_STATE; + } else if (dl_argv_match(dl, "flavour") && (o_all & DL_OPT_PORT_FLAVOUR)) { + const char *flavourstr; + dl_arg_inc(dl); + err = dl_argv_str(dl, &flavourstr); + if (err) + return err; + err = port_flavour_parse(flavourstr, &opts->port_flavour); + if (err) + return err; + o_found |= DL_OPT_PORT_FLAVOUR; + } else if (dl_argv_match(dl, "pfnum") && (o_all & DL_OPT_PORT_PFNUMBER)) { + dl_arg_inc(dl); + err = dl_argv_uint16_t(dl, &opts->port_pfnumber); + if (err) + return err; + o_found |= DL_OPT_PORT_PFNUMBER; + } else if (dl_argv_match(dl, "sfnum") && (o_all & DL_OPT_PORT_SFNUMBER)) { + dl_arg_inc(dl); + err = dl_argv_uint32_t(dl, &opts->port_sfnumber); + if (err) + return err; + o_found |= DL_OPT_PORT_SFNUMBER; } else { pr_err("Unknown option \"%s\"\n", dl_argv(dl)); return -EINVAL; @@ -1761,12 +1952,39 @@ dl_function_attr_put(struct nlmsghdr *nlh, const struct dl_opts *opts) struct nlattr *nest; nest = mnl_attr_nest_start(nlh, DEVLINK_ATTR_PORT_FUNCTION); - mnl_attr_put(nlh, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, - opts->port_function_hw_addr_len, - opts->port_function_hw_addr); + + if (opts->present & DL_OPT_PORT_FUNCTION_HW_ADDR) + mnl_attr_put(nlh, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, + opts->port_function_hw_addr_len, + opts->port_function_hw_addr); + if (opts->present & DL_OPT_PORT_FUNCTION_STATE) + mnl_attr_put_u8(nlh, DEVLINK_PORT_FN_ATTR_STATE, + opts->port_fn_state); mnl_attr_nest_end(nlh, nest); } +static void +dl_flash_update_overwrite_put(struct nlmsghdr *nlh, const struct dl_opts *opts) +{ + struct nla_bitfield32 overwrite_mask; + + overwrite_mask.selector = DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS; + overwrite_mask.value = opts->overwrite_mask; + + mnl_attr_put(nlh, DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK, + sizeof(overwrite_mask), &overwrite_mask); +} + +static void +dl_reload_limits_put(struct nlmsghdr *nlh, const struct dl_opts *opts) +{ + struct nla_bitfield32 limits; + + limits.selector = DEVLINK_RELOAD_LIMITS_VALID_MASK; + limits.value = BIT(opts->reload_limit); + mnl_attr_put(nlh, DEVLINK_ATTR_RELOAD_LIMITS, sizeof(limits), &limits); +} + static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl) { struct dl_opts *opts = &dl->opts; @@ -1854,6 +2072,8 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl) if (opts->present & DL_OPT_FLASH_COMPONENT) mnl_attr_put_strz(nlh, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT, opts->flash_component); + if (opts->present & DL_OPT_FLASH_OVERWRITE) + dl_flash_update_overwrite_put(nlh, opts); if (opts->present & DL_OPT_HEALTH_REPORTER_NAME) mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, opts->reporter_name); @@ -1881,6 +2101,11 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl) opts->netns_is_pid ? DEVLINK_ATTR_NETNS_PID : DEVLINK_ATTR_NETNS_FD, opts->netns); + if (opts->present & DL_OPT_RELOAD_ACTION) + mnl_attr_put_u8(nlh, DEVLINK_ATTR_RELOAD_ACTION, + opts->reload_action); + if (opts->present & DL_OPT_RELOAD_LIMIT) + dl_reload_limits_put(nlh, opts); if (opts->present & DL_OPT_TRAP_POLICER_ID) mnl_attr_put_u32(nlh, DEVLINK_ATTR_TRAP_POLICER_ID, opts->trap_policer_id); @@ -1890,8 +2115,14 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl) if (opts->present & DL_OPT_TRAP_POLICER_BURST) mnl_attr_put_u64(nlh, DEVLINK_ATTR_TRAP_POLICER_BURST, opts->trap_policer_burst); - if (opts->present & DL_OPT_PORT_FUNCTION_HW_ADDR) + if (opts->present & (DL_OPT_PORT_FUNCTION_HW_ADDR | DL_OPT_PORT_FUNCTION_STATE)) dl_function_attr_put(nlh, opts); + if (opts->present & DL_OPT_PORT_FLAVOUR) + mnl_attr_put_u16(nlh, DEVLINK_ATTR_PORT_FLAVOUR, opts->port_flavour); + if (opts->present & DL_OPT_PORT_PFNUMBER) + mnl_attr_put_u16(nlh, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, opts->port_pfnumber); + if (opts->present & DL_OPT_PORT_SFNUMBER) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_PCI_SF_NUMBER, opts->port_sfnumber); } static int dl_argv_parse_put(struct nlmsghdr *nlh, struct dl *dl, @@ -1953,8 +2184,9 @@ static void cmd_dev_help(void) pr_err(" devlink dev param set DEV name PARAMETER value VALUE cmode { permanent | driverinit | runtime }\n"); pr_err(" devlink dev param show [DEV name PARAMETER]\n"); pr_err(" devlink dev reload DEV [ netns { PID | NAME | ID } ]\n"); + pr_err(" [ action { driver_reinit | fw_activate } ] [ limit no_reset ]\n"); pr_err(" devlink dev info [ DEV ]\n"); - pr_err(" devlink dev flash DEV file PATH [ component NAME ]\n"); + pr_err(" devlink dev flash DEV file PATH [ component NAME ] [ overwrite SECTION ]\n"); } static bool cmp_arr_last_handle(struct dl *dl, const char *bus_name, @@ -2244,6 +2476,30 @@ static const char *param_cmode_name(uint8_t cmode) } } +static const char *reload_action_name(uint8_t reload_action) +{ + switch (reload_action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + return "driver_reinit"; + case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: + return "fw_activate"; + default: + return ""; + } +} + +static const char *reload_limit_name(uint8_t reload_limit) +{ + switch (reload_limit) { + case DEVLINK_RELOAD_LIMIT_UNSPEC: + return "unspecified"; + case DEVLINK_RELOAD_LIMIT_NO_RESET: + return "no_reset"; + default: + return ""; + } +} + static const char *eswitch_mode_name(uint32_t mode) { switch (mode) { @@ -2558,7 +2814,8 @@ static void pr_out_param_value(struct dl *dl, const char *nla_name, } } -static void pr_out_param(struct dl *dl, struct nlattr **tb, bool array) +static void pr_out_param(struct dl *dl, struct nlattr **tb, bool array, + bool is_port_param) { struct nlattr *nla_param[DEVLINK_ATTR_MAX + 1] = {}; struct nlattr *param_value_attr; @@ -2575,9 +2832,15 @@ static void pr_out_param(struct dl *dl, struct nlattr **tb, bool array) return; if (array) - pr_out_handle_start_arr(dl, tb); + if (is_port_param) + pr_out_port_handle_start_arr(dl, tb, false); + else + pr_out_handle_start_arr(dl, tb); else - __pr_out_handle_start(dl, tb, true, false); + if (is_port_param) + pr_out_port_handle_start(dl, tb, false); + else + __pr_out_handle_start(dl, tb, true, false); nla_type = mnl_attr_get_u8(nla_param[DEVLINK_ATTR_PARAM_TYPE]); @@ -2597,7 +2860,10 @@ static void pr_out_param(struct dl *dl, struct nlattr **tb, bool array) pr_out_entry_end(dl); } pr_out_array_end(dl); - pr_out_handle_end(dl); + if (is_port_param) + pr_out_port_handle_end(dl); + else + pr_out_handle_end(dl); } static int cmd_dev_param_show_cb(const struct nlmsghdr *nlh, void *data) @@ -2610,7 +2876,7 @@ static int cmd_dev_param_show_cb(const struct nlmsghdr *nlh, void *data) if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || !tb[DEVLINK_ATTR_PARAM]) return MNL_CB_ERROR; - pr_out_param(dl, tb, true); + pr_out_param(dl, tb, true, false); return MNL_CB_OK; } @@ -2699,7 +2965,7 @@ static int cmd_dev_param_set(struct dl *dl) struct param_ctx ctx = {}; struct nlmsghdr *nlh; bool conv_exists; - uint32_t val_u32; + uint32_t val_u32 = 0; uint16_t val_u16; uint8_t val_u8; bool val_bool; @@ -2808,6 +3074,21 @@ err_param_value_parse: return err; } +static int cmd_port_param_show_cb(const struct nlmsghdr *nlh, void *data) +{ + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; + struct dl *dl = data; + + mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); + if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || + !tb[DEVLINK_ATTR_PORT_INDEX] || !tb[DEVLINK_ATTR_PARAM]) + return MNL_CB_ERROR; + + pr_out_param(dl, tb, true, true); + return MNL_CB_OK; +} + static int cmd_dev_param_show(struct dl *dl) { uint16_t flags = NLM_F_REQUEST | NLM_F_ACK; @@ -2848,29 +3129,119 @@ static int cmd_dev_param(struct dl *dl) pr_err("Command \"%s\" not found\n", dl_argv(dl)); return -ENOENT; } -static int cmd_dev_show_cb(const struct nlmsghdr *nlh, void *data) + +static void pr_out_action_stats(struct dl *dl, struct nlattr *action_stats) { - struct dl *dl = data; + struct nlattr *tb_stats_entry[DEVLINK_ATTR_MAX + 1] = {}; + struct nlattr *nla_reload_stats_entry, *nla_limit, *nla_value; + enum devlink_reload_limit limit; + uint32_t value; + int err; + + mnl_attr_for_each_nested(nla_reload_stats_entry, action_stats) { + err = mnl_attr_parse_nested(nla_reload_stats_entry, attr_cb, + tb_stats_entry); + if (err != MNL_CB_OK) + return; + + nla_limit = tb_stats_entry[DEVLINK_ATTR_RELOAD_STATS_LIMIT]; + nla_value = tb_stats_entry[DEVLINK_ATTR_RELOAD_STATS_VALUE]; + if (!nla_limit || !nla_value) + return; + + check_indent_newline(dl); + limit = mnl_attr_get_u8(nla_limit); + value = mnl_attr_get_u32(nla_value); + print_uint_name_value(reload_limit_name(limit), value); + } +} + +static void pr_out_reload_stats(struct dl *dl, struct nlattr *reload_stats) +{ + struct nlattr *nla_action_info, *nla_action, *nla_action_stats; struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; - struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); - uint8_t reload_failed = 0; + enum devlink_reload_action action; + int err; - mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); - if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME]) - return MNL_CB_ERROR; + mnl_attr_for_each_nested(nla_action_info, reload_stats) { + err = mnl_attr_parse_nested(nla_action_info, attr_cb, tb); + if (err != MNL_CB_OK) + return; + nla_action = tb[DEVLINK_ATTR_RELOAD_ACTION]; + nla_action_stats = tb[DEVLINK_ATTR_RELOAD_ACTION_STATS]; + if (!nla_action || !nla_action_stats) + return; + + action = mnl_attr_get_u8(nla_action); + pr_out_object_start(dl, reload_action_name(action)); + pr_out_action_stats(dl, nla_action_stats); + pr_out_object_end(dl); + } +} + +static void pr_out_reload_data(struct dl *dl, struct nlattr **tb) +{ + struct nlattr *nla_reload_stats, *nla_remote_reload_stats; + struct nlattr *tb_stats[DEVLINK_ATTR_MAX + 1] = {}; + uint8_t reload_failed = 0; + int err; if (tb[DEVLINK_ATTR_RELOAD_FAILED]) reload_failed = mnl_attr_get_u8(tb[DEVLINK_ATTR_RELOAD_FAILED]); if (reload_failed) { - __pr_out_handle_start(dl, tb, true, false); check_indent_newline(dl); print_bool(PRINT_ANY, "reload_failed", "reload_failed %s", true); + } + if (!tb[DEVLINK_ATTR_DEV_STATS] || !dl->stats) + return; + err = mnl_attr_parse_nested(tb[DEVLINK_ATTR_DEV_STATS], attr_cb, + tb_stats); + if (err != MNL_CB_OK) + return; + + pr_out_object_start(dl, "stats"); + + nla_reload_stats = tb_stats[DEVLINK_ATTR_RELOAD_STATS]; + if (nla_reload_stats) { + pr_out_object_start(dl, "reload"); + pr_out_reload_stats(dl, nla_reload_stats); + pr_out_object_end(dl); + } + nla_remote_reload_stats = tb_stats[DEVLINK_ATTR_REMOTE_RELOAD_STATS]; + if (nla_remote_reload_stats) { + pr_out_object_start(dl, "remote_reload"); + pr_out_reload_stats(dl, nla_remote_reload_stats); + pr_out_object_end(dl); + } + + pr_out_object_end(dl); +} + + +static void pr_out_dev(struct dl *dl, struct nlattr **tb) +{ + if ((tb[DEVLINK_ATTR_RELOAD_FAILED] && mnl_attr_get_u8(tb[DEVLINK_ATTR_RELOAD_FAILED])) || + (tb[DEVLINK_ATTR_DEV_STATS] && dl->stats)) { + __pr_out_handle_start(dl, tb, true, false); + pr_out_reload_data(dl, tb); pr_out_handle_end(dl); } else { pr_out_handle(dl, tb); } +} +static int cmd_dev_show_cb(const struct nlmsghdr *nlh, void *data) +{ + struct dl *dl = data; + struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + + mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); + if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME]) + return MNL_CB_ERROR; + + pr_out_dev(dl, tb); return MNL_CB_OK; } @@ -2897,6 +3268,57 @@ static int cmd_dev_show(struct dl *dl) return err; } +static void pr_out_reload_actions_performed(struct dl *dl, struct nlattr **tb) +{ + struct nlattr *nla_actions_performed; + struct nla_bitfield32 *actions; + uint32_t actions_performed; + uint16_t len; + int action; + + if (!tb[DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED]) + return; + + nla_actions_performed = tb[DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED]; + len = mnl_attr_get_payload_len(nla_actions_performed); + if (len != sizeof(*actions)) + return; + actions = mnl_attr_get_payload(nla_actions_performed); + if (!actions) + return; + g_new_line_count = 1; /* Avoid extra new line in non-json print */ + pr_out_array_start(dl, "reload_actions_performed"); + actions_performed = actions->value & actions->selector; + for (action = 0; action <= DEVLINK_RELOAD_ACTION_MAX; action++) { + if (BIT(action) & actions_performed) { + check_indent_newline(dl); + print_string(PRINT_ANY, NULL, "%s", + reload_action_name(action)); + } + } + pr_out_array_end(dl); + if (!dl->json_output) + __pr_out_newline(); +} + +static int cmd_dev_reload_cb(const struct nlmsghdr *nlh, void *data) +{ + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; + struct dl *dl = data; + + mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); + if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || + !tb[DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED]) + return MNL_CB_ERROR; + + pr_out_section_start(dl, "reload"); + pr_out_reload_actions_performed(dl, tb); + pr_out_section_end(dl); + + return MNL_CB_OK; +} + static int cmd_dev_reload(struct dl *dl) { struct nlmsghdr *nlh; @@ -2910,11 +3332,13 @@ static int cmd_dev_reload(struct dl *dl) nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_RELOAD, NLM_F_REQUEST | NLM_F_ACK); - err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE, DL_OPT_NETNS); + err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE, + DL_OPT_NETNS | DL_OPT_RELOAD_ACTION | + DL_OPT_RELOAD_LIMIT); if (err) return err; - return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); + return _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dev_reload_cb, dl); } static void pr_out_versions_single(struct dl *dl, const struct nlmsghdr *nlh, @@ -3066,6 +3490,9 @@ static int cmd_dev_info(struct dl *dl) struct cmd_dev_flash_status_ctx { struct dl *dl; + struct timespec time_of_last_status; + uint64_t status_msg_timeout; + size_t elapsed_time_msg_len; char *last_msg; char *last_component; uint8_t not_first:1, @@ -3083,6 +3510,16 @@ static int nullstrcmp(const char *str1, const char *str2) return str1 ? 1 : -1; } +static void cmd_dev_flash_clear_elapsed_time(struct cmd_dev_flash_status_ctx *ctx) +{ + int i; + + for (i = 0; i < ctx->elapsed_time_msg_len; i++) + pr_out_tty("\b \b"); + + ctx->elapsed_time_msg_len = 0; +} + static int cmd_dev_flash_status_cb(const struct nlmsghdr *nlh, void *data) { struct cmd_dev_flash_status_ctx *ctx = data; @@ -3095,6 +3532,8 @@ static int cmd_dev_flash_status_cb(const struct nlmsghdr *nlh, void *data) const char *bus_name; const char *dev_name; + cmd_dev_flash_clear_elapsed_time(ctx); + if (genl->cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS && genl->cmd != DEVLINK_CMD_FLASH_UPDATE_END) return MNL_CB_STOP; @@ -3124,12 +3563,19 @@ static int cmd_dev_flash_status_cb(const struct nlmsghdr *nlh, void *data) done = mnl_attr_get_u64(tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE]); if (tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL]) total = mnl_attr_get_u64(tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL]); + if (tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT]) + ctx->status_msg_timeout = mnl_attr_get_u64(tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT]); + else + ctx->status_msg_timeout = 0; if (!nullstrcmp(msg, ctx->last_msg) && !nullstrcmp(component, ctx->last_component) && ctx->last_pc && ctx->not_first) { pr_out_tty("\b\b\b\b\b"); /* clean percentage */ } else { + /* only update the last status timestamp if the message changed */ + clock_gettime(CLOCK_MONOTONIC, &ctx->time_of_last_status); + if (ctx->not_first) pr_out("\n"); if (component) { @@ -3155,11 +3601,78 @@ static int cmd_dev_flash_status_cb(const struct nlmsghdr *nlh, void *data) return MNL_CB_STOP; } +static void cmd_dev_flash_time_elapsed(struct cmd_dev_flash_status_ctx *ctx) +{ + struct timespec now, res; + + clock_gettime(CLOCK_MONOTONIC, &now); + + res.tv_sec = now.tv_sec - ctx->time_of_last_status.tv_sec; + res.tv_nsec = now.tv_nsec - ctx->time_of_last_status.tv_nsec; + if (res.tv_nsec < 0) { + res.tv_sec--; + res.tv_nsec += 1000000000L; + } + + /* Only begin displaying an elapsed time message if we've waited a few + * seconds with no response, or the status message included a timeout + * value. + */ + if (res.tv_sec > 2 || ctx->status_msg_timeout) { + uint64_t elapsed_m, elapsed_s; + char msg[128]; + size_t len; + + /* clear the last elapsed time message, if we have one */ + cmd_dev_flash_clear_elapsed_time(ctx); + + elapsed_m = res.tv_sec / 60; + elapsed_s = res.tv_sec % 60; + + /** + * If we've elapsed a few seconds without receiving any status + * notification from the device, we display a time elapsed + * message. This has a few possible formats: + * + * 1) just time elapsed, when no timeout was provided + * " ( Xm Ys )" + * 2) time elapsed out of a timeout that came from the device + * driver via DEVLINK_CMD_FLASH_UPDATE_STATUS_TIMEOUT + * " ( Xm Ys : Am Ys)" + * 3) time elapsed if we still receive no status after + * reaching the provided timeout. + * " ( Xm Ys : timeout reached )" + */ + if (!ctx->status_msg_timeout) { + len = snprintf(msg, sizeof(msg), + " ( %lum %lus )", elapsed_m, elapsed_s); + } else if (res.tv_sec <= ctx->status_msg_timeout) { + uint64_t timeout_m, timeout_s; + + timeout_m = ctx->status_msg_timeout / 60; + timeout_s = ctx->status_msg_timeout % 60; + + len = snprintf(msg, sizeof(msg), + " ( %lum %lus : %lum %lus )", + elapsed_m, elapsed_s, timeout_m, timeout_s); + } else { + len = snprintf(msg, sizeof(msg), + " ( %lum %lus : timeout reached )", elapsed_m, elapsed_s); + } + + ctx->elapsed_time_msg_len = len; + + pr_out_tty("%s", msg); + fflush(stdout); + } +} + static int cmd_dev_flash_fds_process(struct cmd_dev_flash_status_ctx *ctx, struct mnlg_socket *nlg_ntf, int pipe_r) { int nlfd = mnlg_socket_get_fd(nlg_ntf); + struct timeval timeout; fd_set fds[3]; int fdmax; int i; @@ -3174,7 +3687,14 @@ static int cmd_dev_flash_fds_process(struct cmd_dev_flash_status_ctx *ctx, if (nlfd >= fdmax) fdmax = nlfd + 1; - while (select(fdmax, &fds[0], &fds[1], &fds[2], NULL) < 0) { + /* select only for a short while (1/10th of a second) in order to + * allow periodically updating the screen with an elapsed time + * indicator. + */ + timeout.tv_sec = 0; + timeout.tv_usec = 100000; + + while (select(fdmax, &fds[0], &fds[1], &fds[2], &timeout) < 0) { if (errno == EINTR) continue; pr_err("select() failed\n"); @@ -3196,6 +3716,7 @@ static int cmd_dev_flash_fds_process(struct cmd_dev_flash_status_ctx *ctx, return err2; ctx->flash_done = 1; } + cmd_dev_flash_time_elapsed(ctx); return 0; } @@ -3219,7 +3740,7 @@ static int cmd_dev_flash(struct dl *dl) NLM_F_REQUEST | NLM_F_ACK); err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE | DL_OPT_FLASH_FILE_NAME, - DL_OPT_FLASH_COMPONENT); + DL_OPT_FLASH_COMPONENT | DL_OPT_FLASH_OVERWRITE); if (err) return err; @@ -3229,19 +3750,21 @@ static int cmd_dev_flash(struct dl *dl) err = _mnlg_socket_group_add(nlg_ntf, DEVLINK_GENL_MCGRP_CONFIG_NAME); if (err) - return err; + goto err_socket; err = pipe(pipe_fds); - if (err == -1) - return -errno; + if (err == -1) { + err = -errno; + goto err_socket; + } pipe_r = pipe_fds[0]; pipe_w = pipe_fds[1]; pid = fork(); if (pid == -1) { - close(pipe_r); close(pipe_w); - return -errno; + err = -errno; + goto out; } else if (!pid) { /* In child, just execute the flash and pass returned * value through pipe once it is done. @@ -3256,6 +3779,11 @@ static int cmd_dev_flash(struct dl *dl) } close(pipe_w); + /* initialize starting time to allow comparison for when to begin + * displaying a time elapsed message. + */ + clock_gettime(CLOCK_MONOTONIC, &ctx.time_of_last_status); + do { err = cmd_dev_flash_fds_process(&ctx, nlg_ntf, pipe_r); if (err) @@ -3265,6 +3793,7 @@ static int cmd_dev_flash(struct dl *dl) err = _mnlg_socket_recv_run(dl->nlg, NULL, NULL); out: close(pipe_r); +err_socket: mnlg_socket_close(nlg_ntf); return err; } @@ -3304,8 +3833,12 @@ static void cmd_port_help(void) pr_err(" devlink port set DEV/PORT_INDEX [ type { eth | ib | auto} ]\n"); pr_err(" devlink port split DEV/PORT_INDEX count COUNT\n"); pr_err(" devlink port unsplit DEV/PORT_INDEX\n"); - pr_err(" devlink port function set DEV/PORT_INDEX [ hw_addr ADDR ]\n"); + pr_err(" devlink port function set DEV/PORT_INDEX [ hw_addr ADDR ] [ state STATE ]\n"); + pr_err(" devlink port param set DEV/PORT_INDEX name PARAMETER value VALUE cmode { permanent | driverinit | runtime }\n"); + pr_err(" devlink port param show [DEV/PORT_INDEX name PARAMETER]\n"); pr_err(" devlink port health show [ DEV/PORT_INDEX reporter REPORTER_NAME ]\n"); + pr_err(" devlink port add DEV/PORT_INDEX flavour FLAVOUR pfnum PFNUM [ sfnum SFNUM ]\n"); + pr_err(" devlink port del DEV/PORT_INDEX\n"); } static const char *port_type_name(uint32_t type) @@ -3321,28 +3854,19 @@ static const char *port_type_name(uint32_t type) static const char *port_flavour_name(uint16_t flavour) { - switch (flavour) { - case DEVLINK_PORT_FLAVOUR_PHYSICAL: - return "physical"; - case DEVLINK_PORT_FLAVOUR_CPU: - return "cpu"; - case DEVLINK_PORT_FLAVOUR_DSA: - return "dsa"; - case DEVLINK_PORT_FLAVOUR_PCI_PF: - return "pcipf"; - case DEVLINK_PORT_FLAVOUR_PCI_VF: - return "pcivf"; - case DEVLINK_PORT_FLAVOUR_VIRTUAL: - return "virtual"; - default: - return ""; - } + const char *str; + + str = str_map_lookup_u16(port_flavour_map, flavour); + return str ? str : ""; } -static void pr_out_port_pfvf_num(struct dl *dl, struct nlattr **tb) +static void pr_out_port_pfvfsf_num(struct dl *dl, struct nlattr **tb) { uint16_t fn_num; + if (tb[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]) + print_uint(PRINT_ANY, "controller", " controller %u", + mnl_attr_get_u32(tb[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER])); if (tb[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]) { fn_num = mnl_attr_get_u16(tb[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]); print_uint(PRINT_ANY, "pfnum", " pfnum %u", fn_num); @@ -3351,6 +3875,32 @@ static void pr_out_port_pfvf_num(struct dl *dl, struct nlattr **tb) fn_num = mnl_attr_get_u16(tb[DEVLINK_ATTR_PORT_PCI_VF_NUMBER]); print_uint(PRINT_ANY, "vfnum", " vfnum %u", fn_num); } + if (tb[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]) { + fn_num = mnl_attr_get_u32(tb[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]); + print_uint(PRINT_ANY, "sfnum", " sfnum %u", fn_num); + } + if (tb[DEVLINK_ATTR_PORT_EXTERNAL]) { + uint8_t external; + + external = mnl_attr_get_u8(tb[DEVLINK_ATTR_PORT_EXTERNAL]); + print_bool(PRINT_ANY, "external", " external %s", external); + } +} + +static const char *port_fn_state(uint8_t state) +{ + const char *str; + + str = str_map_lookup_u8(port_fn_state_map, state); + return str ? str : ""; +} + +static const char *port_fn_opstate(uint8_t state) +{ + const char *str; + + str = str_map_lookup_u8(port_fn_opstate_map, state); + return str ? str : ""; } static void pr_out_port_function(struct dl *dl, struct nlattr **tb_port) @@ -3369,16 +3919,33 @@ static void pr_out_port_function(struct dl *dl, struct nlattr **tb_port) if (err != MNL_CB_OK) return; - if (!tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]) - return; - - len = mnl_attr_get_payload_len(tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]); - data = mnl_attr_get_payload(tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]); - pr_out_object_start(dl, "function"); check_indent_newline(dl); - print_string(PRINT_ANY, "hw_addr", "hw_addr %s", - ll_addr_n2a(data, len, 0, hw_addr, sizeof(hw_addr))); + + if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]) { + len = mnl_attr_get_payload_len(tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]); + data = mnl_attr_get_payload(tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]); + + print_string(PRINT_ANY, "hw_addr", "hw_addr %s", + ll_addr_n2a(data, len, 0, hw_addr, sizeof(hw_addr))); + } + if (tb[DEVLINK_PORT_FN_ATTR_STATE]) { + uint8_t state; + + state = mnl_attr_get_u8(tb[DEVLINK_PORT_FN_ATTR_STATE]); + + print_string(PRINT_ANY, "state", " state %s", + port_fn_state(state)); + } + if (tb[DEVLINK_PORT_FN_ATTR_OPSTATE]) { + uint8_t state; + + state = mnl_attr_get_u8(tb[DEVLINK_PORT_FN_ATTR_OPSTATE]); + + print_string(PRINT_ANY, "opstate", " opstate %s", + port_fn_opstate(state)); + } + if (!dl->json_output) __pr_out_indent_dec(); pr_out_object_end(dl); @@ -3422,7 +3989,8 @@ static void pr_out_port(struct dl *dl, struct nlattr **tb) switch (port_flavour) { case DEVLINK_PORT_FLAVOUR_PCI_PF: case DEVLINK_PORT_FLAVOUR_PCI_VF: - pr_out_port_pfvf_num(dl, tb); + case DEVLINK_PORT_FLAVOUR_PCI_SF: + pr_out_port_pfvfsf_num(dl, tb); break; default: break; @@ -3458,113 +4026,394 @@ static int cmd_port_show_cb(const struct nlmsghdr *nlh, void *data) if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || !tb[DEVLINK_ATTR_PORT_INDEX]) return MNL_CB_ERROR; - pr_out_port(dl, tb); - return MNL_CB_OK; -} + pr_out_port(dl, tb); + return MNL_CB_OK; +} + +static int cmd_port_show(struct dl *dl) +{ + struct nlmsghdr *nlh; + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK; + int err; + + if (dl_argc(dl) == 0) + flags |= NLM_F_DUMP; + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PORT_GET, flags); + + if (dl_argc(dl) > 0) { + err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLEP, 0); + if (err) + return err; + } + + pr_out_section_start(dl, "port"); + err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_port_show_cb, dl); + pr_out_section_end(dl); + return err; +} + +static int cmd_port_set(struct dl *dl) +{ + struct nlmsghdr *nlh; + int err; + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PORT_SET, + NLM_F_REQUEST | NLM_F_ACK); + + err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLEP | DL_OPT_PORT_TYPE, 0); + if (err) + return err; + + return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); +} + +static int cmd_port_split(struct dl *dl) +{ + struct nlmsghdr *nlh; + int err; + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PORT_SPLIT, + NLM_F_REQUEST | NLM_F_ACK); + + err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLEP | DL_OPT_PORT_COUNT, 0); + if (err) + return err; + + return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); +} + +static int cmd_port_unsplit(struct dl *dl) +{ + struct nlmsghdr *nlh; + int err; + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PORT_UNSPLIT, + NLM_F_REQUEST | NLM_F_ACK); + + err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLEP, 0); + if (err) + return err; + + return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); +} + +static int cmd_port_param_show(struct dl *dl) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK; + struct nlmsghdr *nlh; + int err; + + if (dl_argc(dl) == 0) + flags |= NLM_F_DUMP; + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PORT_PARAM_GET, flags); + + if (dl_argc(dl) > 0) { + err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLEP | + DL_OPT_PARAM_NAME, 0); + if (err) + return err; + } + + pr_out_section_start(dl, "param"); + err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_port_param_show_cb, dl); + pr_out_section_end(dl); + + return err; +} + +static void cmd_port_function_help(void) +{ + pr_err("Usage: devlink port function set DEV/PORT_INDEX [ hw_addr ADDR ] [ state STATE ]\n"); +} + +static int cmd_port_function_set(struct dl *dl) +{ + struct nlmsghdr *nlh; + int err; + + if (dl_no_arg(dl)) { + cmd_port_function_help(); + return 0; + } + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PORT_SET, NLM_F_REQUEST | NLM_F_ACK); + + err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLEP, + DL_OPT_PORT_FUNCTION_HW_ADDR | DL_OPT_PORT_FUNCTION_STATE); + if (err) + return err; + + return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); +} + +static int cmd_port_param_set_cb(const struct nlmsghdr *nlh, void *data) +{ + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + struct nlattr *nla_param[DEVLINK_ATTR_MAX + 1] = {}; + struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; + struct nlattr *param_value_attr; + enum devlink_param_cmode cmode; + struct param_ctx *ctx = data; + struct dl *dl = ctx->dl; + int nla_type; + int err; + + mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); + if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || + !tb[DEVLINK_ATTR_PORT_INDEX] || !tb[DEVLINK_ATTR_PARAM]) + return MNL_CB_ERROR; + + err = mnl_attr_parse_nested(tb[DEVLINK_ATTR_PARAM], attr_cb, nla_param); + if (err != MNL_CB_OK) + return MNL_CB_ERROR; + + if (!nla_param[DEVLINK_ATTR_PARAM_TYPE] || + !nla_param[DEVLINK_ATTR_PARAM_VALUES_LIST]) + return MNL_CB_ERROR; -static int cmd_port_show(struct dl *dl) -{ - struct nlmsghdr *nlh; - uint16_t flags = NLM_F_REQUEST | NLM_F_ACK; - int err; + nla_type = mnl_attr_get_u8(nla_param[DEVLINK_ATTR_PARAM_TYPE]); + mnl_attr_for_each_nested(param_value_attr, + nla_param[DEVLINK_ATTR_PARAM_VALUES_LIST]) { + struct nlattr *nla_value[DEVLINK_ATTR_MAX + 1] = {}; + struct nlattr *val_attr; - if (dl_argc(dl) == 0) - flags |= NLM_F_DUMP; + err = mnl_attr_parse_nested(param_value_attr, + attr_cb, nla_value); + if (err != MNL_CB_OK) + return MNL_CB_ERROR; - nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PORT_GET, flags); + if (!nla_value[DEVLINK_ATTR_PARAM_VALUE_CMODE] || + (nla_type != MNL_TYPE_FLAG && + !nla_value[DEVLINK_ATTR_PARAM_VALUE_DATA])) + return MNL_CB_ERROR; - if (dl_argc(dl) > 0) { - err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLEP, 0); - if (err) - return err; + cmode = mnl_attr_get_u8(nla_value[DEVLINK_ATTR_PARAM_VALUE_CMODE]); + if (cmode == dl->opts.cmode) { + val_attr = nla_value[DEVLINK_ATTR_PARAM_VALUE_DATA]; + switch (nla_type) { + case MNL_TYPE_U8: + ctx->value.vu8 = mnl_attr_get_u8(val_attr); + break; + case MNL_TYPE_U16: + ctx->value.vu16 = mnl_attr_get_u16(val_attr); + break; + case MNL_TYPE_U32: + ctx->value.vu32 = mnl_attr_get_u32(val_attr); + break; + case MNL_TYPE_STRING: + ctx->value.vstr = mnl_attr_get_str(val_attr); + break; + case MNL_TYPE_FLAG: + ctx->value.vbool = val_attr ? true : false; + break; + } + break; + } } - - pr_out_section_start(dl, "port"); - err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_port_show_cb, dl); - pr_out_section_end(dl); - return err; + ctx->nla_type = nla_type; + return MNL_CB_OK; } -static int cmd_port_set(struct dl *dl) +static int cmd_port_param_set(struct dl *dl) { + struct param_ctx ctx = {}; struct nlmsghdr *nlh; + bool conv_exists; + uint32_t val_u32 = 0; + uint16_t val_u16; + uint8_t val_u8; + bool val_bool; int err; - nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PORT_SET, + err = dl_argv_parse(dl, DL_OPT_HANDLEP | + DL_OPT_PARAM_NAME | + DL_OPT_PARAM_VALUE | + DL_OPT_PARAM_CMODE, 0); + if (err) + return err; + + /* Get value type */ + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PORT_PARAM_GET, NLM_F_REQUEST | NLM_F_ACK); + dl_opts_put(nlh, dl); - err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLEP | DL_OPT_PORT_TYPE, 0); + ctx.dl = dl; + err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_port_param_set_cb, &ctx); if (err) return err; + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PORT_PARAM_SET, + NLM_F_REQUEST | NLM_F_ACK); + dl_opts_put(nlh, dl); + + conv_exists = param_val_conv_exists(param_val_conv, PARAM_VAL_CONV_LEN, + dl->opts.param_name); + + mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_TYPE, ctx.nla_type); + switch (ctx.nla_type) { + case MNL_TYPE_U8: + if (conv_exists) { + err = param_val_conv_uint_get(param_val_conv, + PARAM_VAL_CONV_LEN, + dl->opts.param_name, + dl->opts.param_value, + &val_u32); + val_u8 = val_u32; + } else { + err = strtouint8_t(dl->opts.param_value, &val_u8); + } + if (err) + goto err_param_value_parse; + if (val_u8 == ctx.value.vu8) + return 0; + mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, val_u8); + break; + case MNL_TYPE_U16: + if (conv_exists) { + err = param_val_conv_uint_get(param_val_conv, + PARAM_VAL_CONV_LEN, + dl->opts.param_name, + dl->opts.param_value, + &val_u32); + val_u16 = val_u32; + } else { + err = strtouint16_t(dl->opts.param_value, &val_u16); + } + if (err) + goto err_param_value_parse; + if (val_u16 == ctx.value.vu16) + return 0; + mnl_attr_put_u16(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, val_u16); + break; + case MNL_TYPE_U32: + if (conv_exists) + err = param_val_conv_uint_get(param_val_conv, + PARAM_VAL_CONV_LEN, + dl->opts.param_name, + dl->opts.param_value, + &val_u32); + else + err = strtouint32_t(dl->opts.param_value, &val_u32); + if (err) + goto err_param_value_parse; + if (val_u32 == ctx.value.vu32) + return 0; + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, val_u32); + break; + case MNL_TYPE_FLAG: + err = strtobool(dl->opts.param_value, &val_bool); + if (err) + goto err_param_value_parse; + if (val_bool == ctx.value.vbool) + return 0; + if (val_bool) + mnl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, + 0, NULL); + break; + case MNL_TYPE_STRING: + mnl_attr_put_strz(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, + dl->opts.param_value); + if (!strcmp(dl->opts.param_value, ctx.value.vstr)) + return 0; + break; + default: + printf("Value type not supported\n"); + return -ENOTSUP; + } return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); + +err_param_value_parse: + pr_err("Value \"%s\" is not a number or not within range\n", + dl->opts.param_value); + return err; } -static int cmd_port_split(struct dl *dl) +static int cmd_port_param(struct dl *dl) { - struct nlmsghdr *nlh; - int err; + if (dl_argv_match(dl, "help")) { + cmd_port_help(); + return 0; + } else if (dl_argv_match(dl, "show") || + dl_argv_match(dl, "list") || dl_no_arg(dl)) { + dl_arg_inc(dl); + return cmd_port_param_show(dl); + } else if (dl_argv_match(dl, "set")) { + dl_arg_inc(dl); + return cmd_port_param_set(dl); + } + pr_err("Command \"%s\" not found\n", dl_argv(dl)); + return -ENOENT; +} - nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PORT_SPLIT, - NLM_F_REQUEST | NLM_F_ACK); +static int cmd_port_function(struct dl *dl) +{ + if (dl_argv_match(dl, "help") || dl_no_arg(dl)) { + cmd_port_function_help(); + return 0; + } else if (dl_argv_match(dl, "set")) { + dl_arg_inc(dl); + return cmd_port_function_set(dl); + } + pr_err("Command \"%s\" not found\n", dl_argv(dl)); + return -ENOENT; +} - err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLEP | DL_OPT_PORT_COUNT, 0); - if (err) - return err; +static int cmd_health(struct dl *dl); +static int __cmd_health_show(struct dl *dl, bool show_device, bool show_port); - return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); +static void cmd_port_add_help(void) +{ + pr_err(" devlink port add { DEV | DEV/PORT_INDEX } flavour FLAVOUR pfnum PFNUM [ sfnum SFNUM ]\n"); } -static int cmd_port_unsplit(struct dl *dl) +static int cmd_port_add(struct dl *dl) { struct nlmsghdr *nlh; int err; - nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PORT_UNSPLIT, + if (dl_argv_match(dl, "help") || dl_no_arg(dl)) { + cmd_port_add_help(); + return 0; + } + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PORT_NEW, NLM_F_REQUEST | NLM_F_ACK); - err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLEP, 0); + err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE | DL_OPT_HANDLEP | + DL_OPT_PORT_FLAVOUR | DL_OPT_PORT_PFNUMBER, + DL_OPT_PORT_SFNUMBER); if (err) return err; - return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); + return _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_port_show_cb, dl); } -static void cmd_port_function_help(void) +static void cmd_port_del_help(void) { - pr_err("Usage: devlink port function set DEV/PORT_INDEX [ hw_addr ADDR ]\n"); + pr_err(" devlink port del DEV/PORT_INDEX\n"); } -static int cmd_port_function_set(struct dl *dl) +static int cmd_port_del(struct dl *dl) { struct nlmsghdr *nlh; int err; - nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PORT_SET, NLM_F_REQUEST | NLM_F_ACK); + if (dl_argv_match(dl, "help") || dl_no_arg(dl)) { + cmd_port_del_help(); + return 0; + } - err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLEP | DL_OPT_PORT_FUNCTION_HW_ADDR, 0); + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PORT_DEL, + NLM_F_REQUEST | NLM_F_ACK); + + err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLEP, 0); if (err) return err; return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); } -static int cmd_port_function(struct dl *dl) -{ - if (dl_argv_match(dl, "help") || dl_no_arg(dl)) { - cmd_port_function_help(); - return 0; - } else if (dl_argv_match(dl, "set")) { - dl_arg_inc(dl); - return cmd_port_function_set(dl); - } - pr_err("Command \"%s\" not found\n", dl_argv(dl)); - return -ENOENT; -} - -static int cmd_health(struct dl *dl); -static int __cmd_health_show(struct dl *dl, bool show_device, bool show_port); - static int cmd_port(struct dl *dl) { if (dl_argv_match(dl, "help")) { @@ -3583,6 +4432,9 @@ static int cmd_port(struct dl *dl) } else if (dl_argv_match(dl, "unsplit")) { dl_arg_inc(dl); return cmd_port_unsplit(dl); + } else if (dl_argv_match(dl, "param")) { + dl_arg_inc(dl); + return cmd_port_param(dl); } else if (dl_argv_match(dl, "function")) { dl_arg_inc(dl); return cmd_port_function(dl); @@ -3595,7 +4447,14 @@ static int cmd_port(struct dl *dl) } else { return cmd_health(dl); } + } else if (dl_argv_match(dl, "add")) { + dl_arg_inc(dl); + return cmd_port_add(dl); + } else if (dl_argv_match(dl, "del")) { + dl_arg_inc(dl); + return cmd_port_del(dl); } + pr_err("Command \"%s\" not found\n", dl_argv(dl)); return -ENOENT; } @@ -4397,6 +5256,10 @@ static const char *cmd_name(uint8_t cmd) case DEVLINK_CMD_REGION_SET: return "set"; case DEVLINK_CMD_REGION_NEW: return "new"; case DEVLINK_CMD_REGION_DEL: return "del"; + case DEVLINK_CMD_PORT_PARAM_GET: return "get"; + case DEVLINK_CMD_PORT_PARAM_SET: return "set"; + case DEVLINK_CMD_PORT_PARAM_NEW: return "new"; + case DEVLINK_CMD_PORT_PARAM_DEL: return "del"; case DEVLINK_CMD_FLASH_UPDATE: return "begin"; case DEVLINK_CMD_FLASH_UPDATE_END: return "end"; case DEVLINK_CMD_FLASH_UPDATE_STATUS: return "status"; @@ -4435,6 +5298,10 @@ static const char *cmd_obj(uint8_t cmd) case DEVLINK_CMD_PARAM_SET: case DEVLINK_CMD_PARAM_NEW: case DEVLINK_CMD_PARAM_DEL: + case DEVLINK_CMD_PORT_PARAM_GET: + case DEVLINK_CMD_PORT_PARAM_SET: + case DEVLINK_CMD_PORT_PARAM_NEW: + case DEVLINK_CMD_PORT_PARAM_DEL: return "param"; case DEVLINK_CMD_REGION_GET: case DEVLINK_CMD_REGION_SET: @@ -4552,7 +5419,8 @@ static int cmd_mon_show_cb(const struct nlmsghdr *nlh, void *data) if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME]) return MNL_CB_ERROR; pr_out_mon_header(genl->cmd); - pr_out_handle(dl, tb); + dl->stats = true; + pr_out_dev(dl, tb); pr_out_mon_footer(); break; case DEVLINK_CMD_PORT_GET: /* fall through */ @@ -4576,7 +5444,7 @@ static int cmd_mon_show_cb(const struct nlmsghdr *nlh, void *data) !tb[DEVLINK_ATTR_PARAM]) return MNL_CB_ERROR; pr_out_mon_header(genl->cmd); - pr_out_param(dl, tb, false); + pr_out_param(dl, tb, false, false); pr_out_mon_footer(); break; case DEVLINK_CMD_REGION_GET: /* fall through */ @@ -7046,6 +7914,13 @@ static int cmd_health_diagnose(struct dl *dl) 0); } +static int cmd_health_test(struct dl *dl) +{ + return cmd_health_object_common(dl, + DEVLINK_CMD_HEALTH_REPORTER_TEST, + 0); +} + static int cmd_health_recover(struct dl *dl) { struct nlmsghdr *nlh; @@ -7250,6 +8125,7 @@ static void cmd_health_help(void) pr_err("Usage: devlink health show [ { DEV | DEV/PORT_INDEX } reporter REPORTER_NAME ]\n"); pr_err(" devlink health recover { DEV | DEV/PORT_INDEX } reporter REPORTER_NAME\n"); pr_err(" devlink health diagnose { DEV | DEV/PORT_INDEX } reporter REPORTER_NAME\n"); + pr_err(" devlink health test { DEV | DEV/PORT_INDEX } reporter REPORTER_NAME\n"); pr_err(" devlink health dump show { DEV | DEV/PORT_INDEX } reporter REPORTER_NAME\n"); pr_err(" devlink health dump clear { DEV | DEV/PORT_INDEX } reporter REPORTER_NAME\n"); pr_err(" devlink health set { DEV | DEV/PORT_INDEX } reporter REPORTER_NAME\n"); @@ -7273,6 +8149,9 @@ static int cmd_health(struct dl *dl) } else if (dl_argv_match(dl, "diagnose")) { dl_arg_inc(dl); return cmd_health_diagnose(dl); + } else if (dl_argv_match(dl, "test")) { + dl_arg_inc(dl); + return cmd_health_test(dl); } else if (dl_argv_match(dl, "dump")) { dl_arg_inc(dl); if (dl_argv_match(dl, "show")) { @@ -7745,43 +8624,16 @@ static void dl_free(struct dl *dl) free(dl); } -static int dl_batch(struct dl *dl, const char *name, bool force) +static int dl_batch_cmd(int argc, char *argv[], void *data) { - char *line = NULL; - size_t len = 0; - int ret = EXIT_SUCCESS; - - if (name && strcmp(name, "-") != 0) { - if (freopen(name, "r", stdin) == NULL) { - fprintf(stderr, - "Cannot open file \"%s\" for reading: %s\n", - name, strerror(errno)); - return EXIT_FAILURE; - } - } - - cmdlineno = 0; - while (getcmdline(&line, &len, stdin) != -1) { - char *largv[100]; - int largc; - - largc = makeargs(line, largv, 100); - if (!largc) - continue; /* blank line */ - - if (dl_cmd(dl, largc, largv)) { - fprintf(stderr, "Command failed %s:%d\n", - name, cmdlineno); - ret = EXIT_FAILURE; - if (!force) - break; - } - } + struct dl *dl = data; - if (line) - free(line); + return dl_cmd(dl, argc, argv); +} - return ret; +static int dl_batch(struct dl *dl, const char *name, bool force) +{ + return do_batch(name, force, dl_batch_cmd, dl); } int main(int argc, char **argv) diff --git a/devlink/mnlg.c b/devlink/mnlg.c index c7d25e8..21b10c5 100644 --- a/devlink/mnlg.c +++ b/devlink/mnlg.c @@ -14,11 +14,11 @@ #include #include #include -#include #include #include #include "libnetlink.h" +#include "mnl_utils.h" #include "utils.h" #include "mnlg.h" @@ -28,26 +28,20 @@ struct mnlg_socket { uint32_t id; uint8_t version; unsigned int seq; - unsigned int portid; }; static struct nlmsghdr *__mnlg_msg_prepare(struct mnlg_socket *nlg, uint8_t cmd, uint16_t flags, uint32_t id, uint8_t version) { + struct genlmsghdr genl = { + .cmd = cmd, + .version = version, + }; struct nlmsghdr *nlh; - struct genlmsghdr *genl; - - nlh = mnl_nlmsg_put_header(nlg->buf); - nlh->nlmsg_type = id; - nlh->nlmsg_flags = flags; - nlg->seq = time(NULL); - nlh->nlmsg_seq = nlg->seq; - - genl = mnl_nlmsg_put_extra_header(nlh, sizeof(struct genlmsghdr)); - genl->cmd = cmd; - genl->version = version; + nlh = mnlu_msg_prepare(nlg->buf, id, flags, &genl, sizeof(genl)); + nlg->seq = nlh->nlmsg_seq; return nlh; } @@ -62,61 +56,10 @@ int mnlg_socket_send(struct mnlg_socket *nlg, const struct nlmsghdr *nlh) return mnl_socket_sendto(nlg->nl, nlh, nlh->nlmsg_len); } -static int mnlg_cb_noop(const struct nlmsghdr *nlh, void *data) -{ - return MNL_CB_OK; -} - -static int mnlg_cb_error(const struct nlmsghdr *nlh, void *data) -{ - const struct nlmsgerr *err = mnl_nlmsg_get_payload(nlh); - - /* Netlink subsystems returns the errno value with different signess */ - if (err->error < 0) - errno = -err->error; - else - errno = err->error; - - if (nl_dump_ext_ack(nlh, NULL)) - return MNL_CB_ERROR; - - return err->error == 0 ? MNL_CB_STOP : MNL_CB_ERROR; -} - -static int mnlg_cb_stop(const struct nlmsghdr *nlh, void *data) -{ - int len = *(int *)NLMSG_DATA(nlh); - - if (len < 0) { - errno = -len; - nl_dump_ext_ack_done(nlh, len); - return MNL_CB_ERROR; - } - return MNL_CB_STOP; -} - -static mnl_cb_t mnlg_cb_array[NLMSG_MIN_TYPE] = { - [NLMSG_NOOP] = mnlg_cb_noop, - [NLMSG_ERROR] = mnlg_cb_error, - [NLMSG_DONE] = mnlg_cb_stop, - [NLMSG_OVERRUN] = mnlg_cb_noop, -}; - int mnlg_socket_recv_run(struct mnlg_socket *nlg, mnl_cb_t data_cb, void *data) { - int err; - - do { - err = mnl_socket_recvfrom(nlg->nl, nlg->buf, - MNL_SOCKET_BUFFER_SIZE); - if (err <= 0) - break; - err = mnl_cb_run2(nlg->buf, err, nlg->seq, nlg->portid, - data_cb, data, mnlg_cb_array, - ARRAY_SIZE(mnlg_cb_array)); - } while (err > 0); - - return err; + return mnlu_socket_recv_run(nlg->nl, nlg->seq, nlg->buf, MNL_SOCKET_BUFFER_SIZE, + data_cb, data); } struct group_info { @@ -263,7 +206,6 @@ struct mnlg_socket *mnlg_socket_open(const char *family_name, uint8_t version) { struct mnlg_socket *nlg; struct nlmsghdr *nlh; - int one = 1; int err; nlg = malloc(sizeof(*nlg)); @@ -274,19 +216,9 @@ struct mnlg_socket *mnlg_socket_open(const char *family_name, uint8_t version) if (!nlg->buf) goto err_buf_alloc; - nlg->nl = mnl_socket_open(NETLINK_GENERIC); + nlg->nl = mnlu_socket_open(NETLINK_GENERIC); if (!nlg->nl) - goto err_mnl_socket_open; - - /* Older kernels may no support capped/extended ACK reporting */ - mnl_socket_setsockopt(nlg->nl, NETLINK_CAP_ACK, &one, sizeof(one)); - mnl_socket_setsockopt(nlg->nl, NETLINK_EXT_ACK, &one, sizeof(one)); - - err = mnl_socket_bind(nlg->nl, 0, MNL_SOCKET_AUTOPID); - if (err < 0) - goto err_mnl_socket_bind; - - nlg->portid = mnl_socket_get_portid(nlg->nl); + goto err_socket_open; nlh = __mnlg_msg_prepare(nlg, CTRL_CMD_GETFAMILY, NLM_F_REQUEST | NLM_F_ACK, GENL_ID_CTRL, 1); @@ -305,9 +237,8 @@ struct mnlg_socket *mnlg_socket_open(const char *family_name, uint8_t version) err_mnlg_socket_recv_run: err_mnlg_socket_send: -err_mnl_socket_bind: mnl_socket_close(nlg->nl); -err_mnl_socket_open: +err_socket_open: free(nlg->buf); err_buf_alloc: free(nlg); diff --git a/examples/bpf/README b/examples/bpf/README index 1bbdda3..b726119 100644 --- a/examples/bpf/README +++ b/examples/bpf/README @@ -1,8 +1,18 @@ eBPF toy code examples (running in kernel) to familiarize yourself with syntax and features: - - bpf_shared.c -> Ingress/egress map sharing example - - bpf_tailcall.c -> Using tail call chains - - bpf_cyclic.c -> Simple cycle as tail calls +- BTF defined map examples - bpf_graft.c -> Demo on altering runtime behaviour - - bpf_map_in_map.c -> Using map in map example + - bpf_shared.c -> Ingress/egress map sharing example + - bpf_map_in_map.c -> Using map in map example + +- legacy struct bpf_elf_map defined map examples + - legacy/bpf_shared.c -> Ingress/egress map sharing example + - legacy/bpf_tailcall.c -> Using tail call chains + - legacy/bpf_cyclic.c -> Simple cycle as tail calls + - legacy/bpf_graft.c -> Demo on altering runtime behaviour + - legacy/bpf_map_in_map.c -> Using map in map example + +Note: Users should use new BTF way to defined the maps, the examples +in legacy folder which is using struct bpf_elf_map defined maps is not +recommanded. diff --git a/examples/bpf/bpf_graft.c b/examples/bpf/bpf_graft.c index 07113d4..8066dcc 100644 --- a/examples/bpf/bpf_graft.c +++ b/examples/bpf/bpf_graft.c @@ -33,13 +33,13 @@ * [...] */ -struct bpf_elf_map __section_maps jmp_tc = { - .type = BPF_MAP_TYPE_PROG_ARRAY, - .size_key = sizeof(uint32_t), - .size_value = sizeof(uint32_t), - .pinning = PIN_GLOBAL_NS, - .max_elem = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(key_size, sizeof(uint32_t)); + __uint(value_size, sizeof(uint32_t)); + __uint(max_entries, 1); + __uint(pinning, LIBBPF_PIN_BY_NAME); +} jmp_tc __section(".maps"); __section("aaa") int cls_aaa(struct __sk_buff *skb) diff --git a/examples/bpf/bpf_map_in_map.c b/examples/bpf/bpf_map_in_map.c index ff0e623..39c8626 100644 --- a/examples/bpf/bpf_map_in_map.c +++ b/examples/bpf/bpf_map_in_map.c @@ -1,24 +1,23 @@ #include "../../include/bpf_api.h" -#define MAP_INNER_ID 42 - -struct bpf_elf_map __section_maps map_inner = { - .type = BPF_MAP_TYPE_ARRAY, - .size_key = sizeof(uint32_t), - .size_value = sizeof(uint32_t), - .id = MAP_INNER_ID, - .inner_idx = 0, - .pinning = PIN_GLOBAL_NS, - .max_elem = 1, -}; - -struct bpf_elf_map __section_maps map_outer = { - .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, - .size_key = sizeof(uint32_t), - .size_value = sizeof(uint32_t), - .inner_id = MAP_INNER_ID, - .pinning = PIN_GLOBAL_NS, - .max_elem = 1, +struct inner_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, sizeof(uint32_t)); + __uint(value_size, sizeof(uint32_t)); + __uint(max_entries, 1); +} map_inner __section(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(key_size, sizeof(uint32_t)); + __uint(value_size, sizeof(uint32_t)); + __uint(max_entries, 1); + __uint(pinning, LIBBPF_PIN_BY_NAME); + __array(values, struct inner_map); +} map_outer __section(".maps") = { + .values = { + [0] = &map_inner, + }, }; __section("egress") diff --git a/examples/bpf/bpf_shared.c b/examples/bpf/bpf_shared.c index 21fe6f1..99a332f 100644 --- a/examples/bpf/bpf_shared.c +++ b/examples/bpf/bpf_shared.c @@ -18,13 +18,13 @@ * instance is being created. */ -struct bpf_elf_map __section_maps map_sh = { - .type = BPF_MAP_TYPE_ARRAY, - .size_key = sizeof(uint32_t), - .size_value = sizeof(uint32_t), - .pinning = PIN_OBJECT_NS, /* or PIN_GLOBAL_NS, or PIN_NONE */ - .max_elem = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, sizeof(uint32_t)); + __uint(value_size, sizeof(uint32_t)); + __uint(max_entries, 1); + __uint(pinning, LIBBPF_PIN_BY_NAME); /* or LIBBPF_PIN_NONE */ +} map_sh __section(".maps"); __section("egress") int emain(struct __sk_buff *skb) diff --git a/examples/bpf/legacy/bpf_cyclic.c b/examples/bpf/legacy/bpf_cyclic.c new file mode 100644 index 0000000..3359073 --- /dev/null +++ b/examples/bpf/legacy/bpf_cyclic.c @@ -0,0 +1,35 @@ +#include "../../../include/bpf_api.h" + +/* Cyclic dependency example to test the kernel's runtime upper + * bound on loops. Also demonstrates on how to use direct-actions, + * loaded as: tc filter add [...] bpf da obj [...] + */ +#define JMP_MAP_ID 0xabccba + +struct bpf_elf_map __section_maps jmp_tc = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .id = JMP_MAP_ID, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_OBJECT_NS, + .max_elem = 1, +}; + +__section_tail(JMP_MAP_ID, 0) +int cls_loop(struct __sk_buff *skb) +{ + printt("cb: %u\n", skb->cb[0]++); + tail_call(skb, &jmp_tc, 0); + + skb->tc_classid = TC_H_MAKE(1, 42); + return TC_ACT_OK; +} + +__section_cls_entry +int cls_entry(struct __sk_buff *skb) +{ + tail_call(skb, &jmp_tc, 0); + return TC_ACT_SHOT; +} + +BPF_LICENSE("GPL"); diff --git a/examples/bpf/legacy/bpf_graft.c b/examples/bpf/legacy/bpf_graft.c new file mode 100644 index 0000000..f4c920c --- /dev/null +++ b/examples/bpf/legacy/bpf_graft.c @@ -0,0 +1,66 @@ +#include "../../../include/bpf_api.h" + +/* This example demonstrates how classifier run-time behaviour + * can be altered with tail calls. We start out with an empty + * jmp_tc array, then add section aaa to the array slot 0, and + * later on atomically replace it with section bbb. Note that + * as shown in other examples, the tc loader can prepopulate + * tail called sections, here we start out with an empty one + * on purpose to show it can also be done this way. + * + * tc filter add dev foo parent ffff: bpf obj graft.o + * tc exec bpf dbg + * [...] + * Socket Thread-20229 [001] ..s. 138993.003923: : fallthrough + * -0 [001] ..s. 138993.202265: : fallthrough + * Socket Thread-20229 [001] ..s. 138994.004149: : fallthrough + * [...] + * + * tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec aaa + * tc exec bpf dbg + * [...] + * Socket Thread-19818 [002] ..s. 139012.053587: : aaa + * -0 [002] ..s. 139012.172359: : aaa + * Socket Thread-19818 [001] ..s. 139012.173556: : aaa + * [...] + * + * tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec bbb + * tc exec bpf dbg + * [...] + * Socket Thread-19818 [002] ..s. 139022.102967: : bbb + * -0 [002] ..s. 139022.155640: : bbb + * Socket Thread-19818 [001] ..s. 139022.156730: : bbb + * [...] + */ + +struct bpf_elf_map __section_maps jmp_tc = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_GLOBAL_NS, + .max_elem = 1, +}; + +__section("aaa") +int cls_aaa(struct __sk_buff *skb) +{ + printt("aaa\n"); + return TC_H_MAKE(1, 42); +} + +__section("bbb") +int cls_bbb(struct __sk_buff *skb) +{ + printt("bbb\n"); + return TC_H_MAKE(1, 43); +} + +__section_cls_entry +int cls_entry(struct __sk_buff *skb) +{ + tail_call(skb, &jmp_tc, 0); + printt("fallthrough\n"); + return BPF_H_DEFAULT; +} + +BPF_LICENSE("GPL"); diff --git a/examples/bpf/legacy/bpf_map_in_map.c b/examples/bpf/legacy/bpf_map_in_map.c new file mode 100644 index 0000000..575f881 --- /dev/null +++ b/examples/bpf/legacy/bpf_map_in_map.c @@ -0,0 +1,56 @@ +#include "../../../include/bpf_api.h" + +#define MAP_INNER_ID 42 + +struct bpf_elf_map __section_maps map_inner = { + .type = BPF_MAP_TYPE_ARRAY, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .id = MAP_INNER_ID, + .inner_idx = 0, + .pinning = PIN_GLOBAL_NS, + .max_elem = 1, +}; + +struct bpf_elf_map __section_maps map_outer = { + .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .inner_id = MAP_INNER_ID, + .pinning = PIN_GLOBAL_NS, + .max_elem = 1, +}; + +__section("egress") +int emain(struct __sk_buff *skb) +{ + struct bpf_elf_map *map_inner; + int key = 0, *val; + + map_inner = map_lookup_elem(&map_outer, &key); + if (map_inner) { + val = map_lookup_elem(map_inner, &key); + if (val) + lock_xadd(val, 1); + } + + return BPF_H_DEFAULT; +} + +__section("ingress") +int imain(struct __sk_buff *skb) +{ + struct bpf_elf_map *map_inner; + int key = 0, *val; + + map_inner = map_lookup_elem(&map_outer, &key); + if (map_inner) { + val = map_lookup_elem(map_inner, &key); + if (val) + printt("map val: %d\n", *val); + } + + return BPF_H_DEFAULT; +} + +BPF_LICENSE("GPL"); diff --git a/examples/bpf/legacy/bpf_shared.c b/examples/bpf/legacy/bpf_shared.c new file mode 100644 index 0000000..05b2b9e --- /dev/null +++ b/examples/bpf/legacy/bpf_shared.c @@ -0,0 +1,53 @@ +#include "../../../include/bpf_api.h" + +/* Minimal, stand-alone toy map pinning example: + * + * clang -target bpf -O2 [...] -o bpf_shared.o -c bpf_shared.c + * tc filter add dev foo parent 1: bpf obj bpf_shared.o sec egress + * tc filter add dev foo parent ffff: bpf obj bpf_shared.o sec ingress + * + * Both classifier will share the very same map instance in this example, + * so map content can be accessed from ingress *and* egress side! + * + * This example has a pinning of PIN_OBJECT_NS, so it's private and + * thus shared among various program sections within the object. + * + * A setting of PIN_GLOBAL_NS would place it into a global namespace, + * so that it can be shared among different object files. A setting + * of PIN_NONE (= 0) means no sharing, so each tc invocation a new map + * instance is being created. + */ + +struct bpf_elf_map __section_maps map_sh = { + .type = BPF_MAP_TYPE_ARRAY, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_OBJECT_NS, /* or PIN_GLOBAL_NS, or PIN_NONE */ + .max_elem = 1, +}; + +__section("egress") +int emain(struct __sk_buff *skb) +{ + int key = 0, *val; + + val = map_lookup_elem(&map_sh, &key); + if (val) + lock_xadd(val, 1); + + return BPF_H_DEFAULT; +} + +__section("ingress") +int imain(struct __sk_buff *skb) +{ + int key = 0, *val; + + val = map_lookup_elem(&map_sh, &key); + if (val) + printt("map val: %d\n", *val); + + return BPF_H_DEFAULT; +} + +BPF_LICENSE("GPL"); diff --git a/examples/bpf/legacy/bpf_tailcall.c b/examples/bpf/legacy/bpf_tailcall.c new file mode 100644 index 0000000..8ebc554 --- /dev/null +++ b/examples/bpf/legacy/bpf_tailcall.c @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "../../../include/bpf_api.h" + +#define ENTRY_INIT 3 +#define ENTRY_0 0 +#define ENTRY_1 1 +#define MAX_JMP_SIZE 2 + +#define FOO 42 +#define BAR 43 + +/* This example doesn't really do anything useful, but it's purpose is to + * demonstrate eBPF tail calls on a very simple example. + * + * cls_entry() is our classifier entry point, from there we jump based on + * skb->hash into cls_case1() or cls_case2(). They are both part of the + * program array jmp_tc. Indicated via __section_tail(), the tc loader + * populates the program arrays with the loaded file descriptors already. + * + * To demonstrate nested jumps, cls_case2() jumps within the same jmp_tc + * array to cls_case1(). And whenever we arrive at cls_case1(), we jump + * into cls_exit(), part of the jump array jmp_ex. + * + * Also, to show it's possible, all programs share map_sh and dump the value + * that the entry point incremented. The sections that are loaded into a + * program array can be atomically replaced during run-time, e.g. to change + * classifier behaviour. + */ + +struct bpf_elf_map __section_maps jmp_tc = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .id = FOO, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_OBJECT_NS, + .max_elem = MAX_JMP_SIZE, +}; + +struct bpf_elf_map __section_maps jmp_ex = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .id = BAR, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_OBJECT_NS, + .max_elem = 1, +}; + +struct bpf_elf_map __section_maps map_sh = { + .type = BPF_MAP_TYPE_ARRAY, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_OBJECT_NS, + .max_elem = 1, +}; + +__section_tail(FOO, ENTRY_0) +int cls_case1(struct __sk_buff *skb) +{ + int key = 0, *val; + + val = map_lookup_elem(&map_sh, &key); + if (val) + printt("case1: map-val: %d from:%u\n", *val, skb->cb[0]); + + skb->cb[0] = ENTRY_0; + tail_call(skb, &jmp_ex, ENTRY_0); + + return BPF_H_DEFAULT; +} + +__section_tail(FOO, ENTRY_1) +int cls_case2(struct __sk_buff *skb) +{ + int key = 0, *val; + + val = map_lookup_elem(&map_sh, &key); + if (val) + printt("case2: map-val: %d from:%u\n", *val, skb->cb[0]); + + skb->cb[0] = ENTRY_1; + tail_call(skb, &jmp_tc, ENTRY_0); + + return BPF_H_DEFAULT; +} + +__section_tail(BAR, ENTRY_0) +int cls_exit(struct __sk_buff *skb) +{ + int key = 0, *val; + + val = map_lookup_elem(&map_sh, &key); + if (val) + printt("exit: map-val: %d from:%u\n", *val, skb->cb[0]); + + /* Termination point. */ + return BPF_H_DEFAULT; +} + +__section_cls_entry +int cls_entry(struct __sk_buff *skb) +{ + int key = 0, *val; + + /* For transferring state, we can use skb->cb[0] ... skb->cb[4]. */ + val = map_lookup_elem(&map_sh, &key); + if (val) { + lock_xadd(val, 1); + + skb->cb[0] = ENTRY_INIT; + tail_call(skb, &jmp_tc, skb->hash & (MAX_JMP_SIZE - 1)); + } + + printt("fallthrough\n"); + return BPF_H_DEFAULT; +} + +BPF_LICENSE("GPL"); diff --git a/genl/ctrl.c b/genl/ctrl.c index 0fb464b..549bc66 100644 --- a/genl/ctrl.c +++ b/genl/ctrl.c @@ -28,13 +28,15 @@ static int usage(void) { fprintf(stderr,"Usage: ctrl \n" \ - "CMD := get | list | monitor\n" \ + "CMD := get | list | monitor | policy \n" \ "PARMS := name | id \n" \ "Examples:\n" \ "\tctrl ls\n" \ "\tctrl monitor\n" \ "\tctrl get name foobar\n" \ - "\tctrl get id 0xF\n"); + "\tctrl get id 0xF\n" + "\tctrl policy name foobar\n" + "\tctrl policy id 0xF\n"); return -1; } @@ -123,7 +125,8 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl, ghdr->cmd != CTRL_CMD_DELFAMILY && ghdr->cmd != CTRL_CMD_NEWFAMILY && ghdr->cmd != CTRL_CMD_NEWMCAST_GRP && - ghdr->cmd != CTRL_CMD_DELMCAST_GRP) { + ghdr->cmd != CTRL_CMD_DELMCAST_GRP && + ghdr->cmd != CTRL_CMD_GETPOLICY) { fprintf(stderr, "Unknown controller command %d\n", ghdr->cmd); return 0; } @@ -136,7 +139,7 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl, } attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN); - parse_rtattr(tb, CTRL_ATTR_MAX, attrs, len); + parse_rtattr_flags(tb, CTRL_ATTR_MAX, attrs, len, NLA_F_NESTED); if (tb[CTRL_ATTR_FAMILY_NAME]) { char *name = RTA_DATA(tb[CTRL_ATTR_FAMILY_NAME]); @@ -159,6 +162,36 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl, __u32 *ma = RTA_DATA(tb[CTRL_ATTR_MAXATTR]); fprintf(fp, " max attribs: %d ",*ma); } + if (tb[CTRL_ATTR_OP_POLICY]) { + const struct rtattr *pos; + + rtattr_for_each_nested(pos, tb[CTRL_ATTR_OP_POLICY]) { + struct rtattr *ptb[CTRL_ATTR_POLICY_DUMP_MAX + 1]; + struct rtattr *pattrs = RTA_DATA(pos); + int plen = RTA_PAYLOAD(pos); + + parse_rtattr_flags(ptb, CTRL_ATTR_POLICY_DUMP_MAX, + pattrs, plen, NLA_F_NESTED); + + fprintf(fp, " op %d policies:", + pos->rta_type & ~NLA_F_NESTED); + + if (ptb[CTRL_ATTR_POLICY_DO]) { + __u32 *v = RTA_DATA(ptb[CTRL_ATTR_POLICY_DO]); + + fprintf(fp, " do=%d", *v); + } + + if (ptb[CTRL_ATTR_POLICY_DUMP]) { + __u32 *v = RTA_DATA(ptb[CTRL_ATTR_POLICY_DUMP]); + + fprintf(fp, " dump=%d", *v); + } + } + } + if (tb[CTRL_ATTR_POLICY]) + nl_print_policy(tb[CTRL_ATTR_POLICY], fp); + /* end of family definitions .. */ fprintf(fp,"\n"); if (tb[CTRL_ATTR_OPS]) { @@ -235,7 +268,9 @@ static int ctrl_list(int cmd, int argc, char **argv) exit(1); } - if (cmd == CTRL_CMD_GETFAMILY) { + if (cmd == CTRL_CMD_GETFAMILY || cmd == CTRL_CMD_GETPOLICY) { + req.g.cmd = cmd; + if (argc != 2) { fprintf(stderr, "Wrong number of params\n"); return -1; @@ -260,7 +295,9 @@ static int ctrl_list(int cmd, int argc, char **argv) fprintf(stderr, "Wrong params\n"); goto ctrl_done; } + } + if (cmd == CTRL_CMD_GETFAMILY) { if (rtnl_talk(&rth, nlh, &answer) < 0) { fprintf(stderr, "Error talking to the kernel\n"); goto ctrl_done; @@ -273,7 +310,7 @@ static int ctrl_list(int cmd, int argc, char **argv) } - if (cmd == CTRL_CMD_UNSPEC) { + if (cmd == CTRL_CMD_UNSPEC || cmd == CTRL_CMD_GETPOLICY) { nlh->nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST; nlh->nlmsg_seq = rth.dump = ++rth.seq; @@ -324,6 +361,8 @@ static int parse_ctrl(struct genl_util *a, int argc, char **argv) matches(*argv, "show") == 0 || matches(*argv, "lst") == 0) return ctrl_list(CTRL_CMD_UNSPEC, argc-1, argv+1); + if (matches(*argv, "policy") == 0) + return ctrl_list(CTRL_CMD_GETPOLICY, argc-1, argv+1); if (matches(*argv, "help") == 0) return usage(); diff --git a/include/bpf_api.h b/include/bpf_api.h index 89d3488..82c4708 100644 --- a/include/bpf_api.h +++ b/include/bpf_api.h @@ -19,6 +19,19 @@ #include "bpf_elf.h" +/** libbpf pin type. */ +enum libbpf_pin_type { + LIBBPF_PIN_NONE, + /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ + LIBBPF_PIN_BY_NAME, +}; + +/** Type helper macros. */ + +#define __uint(name, val) int (*name)[val] +#define __type(name, val) typeof(val) *name +#define __array(name, val) typeof(val) *name[] + /** Misc macros. */ #ifndef __stringify diff --git a/include/bpf_util.h b/include/bpf_util.h index 63db07c..53acc41 100644 --- a/include/bpf_util.h +++ b/include/bpf_util.h @@ -274,12 +274,16 @@ int bpf_trace_pipe(void); void bpf_print_ops(struct rtattr *bpf_ops, __u16 len); -int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t size_insns, const char *license, char *log, - size_t size_log); +int bpf_prog_load_dev(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t size_insns, const char *license, __u32 ifindex, + char *log, size_t size_log); +int bpf_program_load(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t size_insns, const char *license, char *log, + size_t size_log); int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type); int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type); +int bpf_program_attach(int prog_fd, int target_fd, enum bpf_attach_type type); int bpf_dump_prog_info(FILE *f, uint32_t id); @@ -287,6 +291,16 @@ int bpf_dump_prog_info(FILE *f, uint32_t id); int bpf_send_map_fds(const char *path, const char *obj); int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux, unsigned int entries); +#ifdef HAVE_LIBBPF +int iproute2_bpf_elf_ctx_init(struct bpf_cfg_in *cfg); +int iproute2_bpf_fetch_ancillary(void); +int iproute2_get_root_path(char *root_path, size_t len); +bool iproute2_is_pin_map(const char *libbpf_map_name, char *pathname); +bool iproute2_is_map_in_map(const char *libbpf_map_name, struct bpf_elf_map *imap, + struct bpf_elf_map *omap, char *omap_name); +int iproute2_find_map_name_by_id(unsigned int map_id, char *name); +int iproute2_load_libbpf(struct bpf_cfg_in *cfg); +#endif /* HAVE_LIBBPF */ #else static inline int bpf_send_map_fds(const char *path, const char *obj) { @@ -299,5 +313,15 @@ static inline int bpf_recv_map_fds(const char *path, int *fds, { return -1; } +#ifdef HAVE_LIBBPF +static inline int iproute2_load_libbpf(struct bpf_cfg_in *cfg) +{ + fprintf(stderr, "No ELF library support compiled in.\n"); + return -1; +} +#endif /* HAVE_LIBBPF */ #endif /* HAVE_ELF */ + +const char *get_libbpf_version(void); + #endif /* __BPF_UTIL__ */ diff --git a/include/iptables.h b/include/iptables.h index 78bc378..eb91f29 100644 --- a/include/iptables.h +++ b/include/iptables.h @@ -12,7 +12,7 @@ extern int do_command4(int argc, char *argv[], char **table, struct xtc_handle **handle, bool restore); extern int delete_chain4(const xt_chainlabel chain, int verbose, struct xtc_handle *handle); -extern int flush_entries4(const xt_chainlabel chain, int verbose, +extern int flush_entries4(const xt_chainlabel chain, int verbose, struct xtc_handle *handle); extern int for_each_chain4(int (*fn)(const xt_chainlabel, int, struct xtc_handle *), int verbose, int builtinstoo, struct xtc_handle *handle); diff --git a/include/json_print.h b/include/json_print.h index 50e71de..6fcf9fd 100644 --- a/include/json_print.h +++ b/include/json_print.h @@ -15,6 +15,9 @@ #include "json_writer.h" #include "color.h" +#define _IS_JSON_CONTEXT(type) (is_json_context() && (type & PRINT_JSON || type & PRINT_ANY)) +#define _IS_FP_CONTEXT(type) (!is_json_context() && (type & PRINT_FP || type & PRINT_ANY)) + json_writer_t *get_json_writer(void); /* @@ -65,9 +68,11 @@ void print_nl(void); _PRINT_FUNC(int, int) _PRINT_FUNC(s64, int64_t) _PRINT_FUNC(bool, bool) +_PRINT_FUNC(on_off, bool) _PRINT_FUNC(null, const char*) _PRINT_FUNC(string, const char*) _PRINT_FUNC(uint, unsigned int) +_PRINT_FUNC(size, __u32) _PRINT_FUNC(u64, uint64_t) _PRINT_FUNC(hhu, unsigned char) _PRINT_FUNC(hu, unsigned short) @@ -85,4 +90,17 @@ _PRINT_NAME_VALUE_FUNC(uint, unsigned int, u); _PRINT_NAME_VALUE_FUNC(string, const char*, s); #undef _PRINT_NAME_VALUE_FUNC +int print_color_rate(bool use_iec, enum output_type t, enum color_attr color, + const char *key, const char *fmt, unsigned long long rate); + +static inline int print_rate(bool use_iec, enum output_type t, + const char *key, const char *fmt, + unsigned long long rate) +{ + return print_color_rate(use_iec, t, COLOR_NONE, key, fmt, rate); +} + +/* A backdoor to the size formatter. Please use print_size() instead. */ +char *sprint_size(__u32 sz, char *buf); + #endif /* _JSON_PRINT_H_ */ diff --git a/include/libnetlink.h b/include/libnetlink.h index e27516f..b9073a6 100644 --- a/include/libnetlink.h +++ b/include/libnetlink.h @@ -284,4 +284,11 @@ int rtnl_from_file(FILE *, rtnl_listen_filter_t handler, * messages from dump file */ #define NLMSG_TSTAMP 15 +#define rtattr_for_each_nested(attr, nest) \ + for ((attr) = (void *)RTA_DATA(nest); \ + RTA_OK(attr, RTA_PAYLOAD(nest) - ((char *)(attr) - (char *)RTA_DATA((nest)))); \ + (attr) = RTA_TAIL((attr))) + +void nl_print_policy(const struct rtattr *attr, FILE *fp); + #endif /* __LIBNETLINK_H__ */ diff --git a/include/mnl_utils.h b/include/mnl_utils.h new file mode 100644 index 0000000..9e7d687 --- /dev/null +++ b/include/mnl_utils.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __MNL_UTILS_H__ +#define __MNL_UTILS_H__ 1 + +struct mnlu_gen_socket { + struct mnl_socket *nl; + char *buf; + uint32_t family; + unsigned int seq; + uint8_t version; +}; + +int mnlu_gen_socket_open(struct mnlu_gen_socket *nlg, const char *family_name, + uint8_t version); +void mnlu_gen_socket_close(struct mnlu_gen_socket *nlg); +struct nlmsghdr *mnlu_gen_socket_cmd_prepare(struct mnlu_gen_socket *nlg, + uint8_t cmd, uint16_t flags); +int mnlu_gen_socket_sndrcv(struct mnlu_gen_socket *nlg, const struct nlmsghdr *nlh, + mnl_cb_t data_cb, void *data); + +struct mnl_socket *mnlu_socket_open(int bus); +struct nlmsghdr *mnlu_msg_prepare(void *buf, uint32_t nlmsg_type, uint16_t flags, + void *extra_header, size_t extra_header_size); +int mnlu_socket_recv_run(struct mnl_socket *nl, unsigned int seq, void *buf, size_t buf_size, + mnl_cb_t cb, void *data); + +#endif /* __MNL_UTILS_H__ */ diff --git a/include/rt_names.h b/include/rt_names.h index 7afce17..1835f3b 100644 --- a/include/rt_names.h +++ b/include/rt_names.h @@ -9,6 +9,7 @@ const char *rtnl_rtscope_n2a(int id, char *buf, int len); const char *rtnl_rttable_n2a(__u32 id, char *buf, int len); const char *rtnl_rtrealm_n2a(int id, char *buf, int len); const char *rtnl_dsfield_n2a(int id, char *buf, int len); +const char *rtnl_dsfield_get_name(int id); const char *rtnl_group_n2a(int id, char *buf, int len); int rtnl_rtprot_a2n(__u32 *id, const char *arg); @@ -33,6 +34,9 @@ int ll_proto_a2n(unsigned short *id, const char *buf); const char *nl_proto_n2a(int id, char *buf, int len); int nl_proto_a2n(__u32 *id, const char *arg); +int protodown_reason_a2n(__u32 *id, const char *arg); +int protodown_reason_n2a(int id, char *buf, int len); + extern int numeric; #endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b21cc6a..b1aba6a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -19,7 +19,8 @@ /* ld/ldx fields */ #define BPF_DW 0x18 /* double word (64-bit) */ -#define BPF_XADD 0xc0 /* exclusive add */ +#define BPF_ATOMIC 0xc0 /* atomic memory ops - op type in immediate */ +#define BPF_XADD 0xc0 /* exclusive add - legacy name */ /* alu/jmp fields */ #define BPF_MOV 0xb0 /* mov reg to reg */ @@ -43,6 +44,11 @@ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ +/* atomic op type fields (stored in immediate) */ +#define BPF_FETCH 0x01 /* not an opcode on its own, used to build others */ +#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ +#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ + /* Register numbers */ enum { BPF_REG_0 = 0, @@ -124,6 +130,7 @@ enum bpf_cmd { BPF_ENABLE_STATS, BPF_ITER_CREATE, BPF_LINK_DETACH, + BPF_PROG_BIND_MAP, }; enum bpf_map_type { @@ -155,6 +162,8 @@ enum bpf_map_type { BPF_MAP_TYPE_DEVMAP_HASH, BPF_MAP_TYPE_STRUCT_OPS, BPF_MAP_TYPE_RINGBUF, + BPF_MAP_TYPE_INODE_STORAGE, + BPF_MAP_TYPE_TASK_STORAGE, }; /* Note that tracing related programs such as @@ -345,19 +354,45 @@ enum bpf_link_type { /* The verifier internal test flag. Behavior is undefined */ #define BPF_F_TEST_STATE_FREQ (1U << 3) +/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will + * restrict map and helper usage for such programs. Sleepable BPF programs can + * only be attached to hooks where kernel execution context allows sleeping. + * Such programs are allowed to use helpers that may sleep like + * bpf_copy_from_user(). + */ +#define BPF_F_SLEEPABLE (1U << 4) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have - * two extensions: - * - * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE - * insn[0].imm: map fd map fd - * insn[1].imm: 0 offset into value - * insn[0].off: 0 0 - * insn[1].off: 0 0 - * ldimm64 rewrite: address of map address of map[0]+offset - * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE + * the following extensions: + * + * insn[0].src_reg: BPF_PSEUDO_MAP_FD + * insn[0].imm: map fd + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map + * verifier type: CONST_PTR_TO_MAP */ #define BPF_PSEUDO_MAP_FD 1 +/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE + * insn[0].imm: map fd + * insn[1].imm: offset into value + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map[0]+offset + * verifier type: PTR_TO_MAP_VALUE + */ #define BPF_PSEUDO_MAP_VALUE 2 +/* insn[0].src_reg: BPF_PSEUDO_BTF_ID + * insn[0].imm: kernel btd id of VAR + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the kernel variable + * verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var + * is struct/union. + */ +#define BPF_PSEUDO_BTF_ID 3 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function @@ -404,6 +439,12 @@ enum { /* Enable memory-mapping BPF map */ BPF_F_MMAPABLE = (1U << 10), + +/* Share perf_event among processes */ + BPF_F_PRESERVE_ELEMS = (1U << 11), + +/* Create a map that is suitable to be an inner map with dynamic max entries */ + BPF_F_INNER_MAP = (1U << 12), }; /* Flags for BPF_PROG_QUERY. */ @@ -414,6 +455,11 @@ enum { */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) +/* Flags for BPF_PROG_TEST_RUN */ + +/* If set, run the test on the cpu specified by bpf_attr.test.cpu */ +#define BPF_F_TEST_RUN_ON_CPU (1U << 0) + /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { /* enabled run_time_ns and run_cnt */ @@ -517,7 +563,12 @@ union bpf_attr { __aligned_u64 line_info; /* line info */ __u32 line_info_cnt; /* number of bpf_line_info records */ __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ - __u32 attach_prog_fd; /* 0 to attach to vmlinux */ + union { + /* valid prog_fd to attach to bpf prog */ + __u32 attach_prog_fd; + /* or valid module BTF object fd or 0 to attach to vmlinux */ + __u32 attach_btf_obj_fd; + }; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -556,6 +607,8 @@ union bpf_attr { */ __aligned_u64 ctx_in; __aligned_u64 ctx_out; + __u32 flags; + __u32 cpu; } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */ @@ -622,8 +675,13 @@ union bpf_attr { }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ - __aligned_u64 iter_info; /* extra bpf_iter_link_info */ - __u32 iter_info_len; /* iter_info length */ + union { + __u32 target_btf_id; /* btf_id of target to attach to */ + struct { + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ + }; + }; } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ @@ -649,6 +707,12 @@ union bpf_attr { __u32 flags; } iter_create; + struct { /* struct used by BPF_PROG_BIND_MAP command */ + __u32 prog_fd; + __u32 map_fd; + __u32 flags; /* extra flags */ + } prog_bind_map; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF @@ -1438,8 +1502,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if the *skb* task belongs to the cgroup2. - * * 1, if the *skb* task does not belong to the cgroup2. + * * 0, if current task belongs to the cgroup2. + * * 1, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) @@ -1592,22 +1656,30 @@ union bpf_attr { * networking traffic statistics as it provides a global socket * identifier that can be assumed unique. * Return - * A 8-byte long non-decreasing number on success, or 0 if the - * socket field is missing inside *skb*. + * A 8-byte long unique number on success, or 0 if the socket + * field is missing inside *skb*. * * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts * *skb*, but gets socket from **struct bpf_sock_addr** context. * Return - * A 8-byte long non-decreasing number. + * A 8-byte long unique number. * * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) * Description * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts * *skb*, but gets socket from **struct bpf_sock_ops** context. * Return - * A 8-byte long non-decreasing number. + * A 8-byte long unique number. + * + * u64 bpf_get_socket_cookie(struct sock *sk) + * Description + * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts + * *sk*, but gets socket from a BTF **struct sock**. This helper + * also works for sleepable programs. + * Return + * A 8-byte long unique number or 0 if *sk* is NULL. * * u32 bpf_get_socket_uid(struct sk_buff *skb) * Return @@ -1649,7 +1721,7 @@ union bpf_attr { * **TCP_CONGESTION**, **TCP_BPF_IW**, * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, - * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**. + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. * Return @@ -2167,6 +2239,9 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * + * If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU + * was exceeded and output params->mtu_result contains the MTU. + * * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. @@ -2204,7 +2279,7 @@ union bpf_attr { * Description * This helper is used in programs implementing policies at the * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. - * if the verdeict eBPF program returns **SK_PASS**), redirect it + * if the verdict eBPF program returns **SK_PASS**), redirect it * to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and * egress interfaces can be used for redirection. The @@ -2390,7 +2465,7 @@ union bpf_attr { * running simultaneously. * * A user should care about the synchronization by himself. - * For example, by using the **BPF_STX_XADD** instruction to alter + * For example, by using the **BPF_ATOMIC** instructions to alter * the shared data. * Return * A pointer to the local storage area. @@ -2496,7 +2571,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * long bpf_sk_release(struct bpf_sock *sock) + * long bpf_sk_release(void *sock) * Description * Release the reference held by *sock*. *sock* must be a * non-**NULL** pointer that was returned from @@ -2676,7 +2751,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Check whether *iph* and *th* contain a valid SYN cookie ACK for * the listening socket in *sk*. @@ -2807,7 +2882,7 @@ union bpf_attr { * * **-ERANGE** if resulting value was out of range. * - * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags) + * void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags) * Description * Get a bpf-local-storage from a *sk*. * @@ -2823,6 +2898,9 @@ union bpf_attr { * "type". The bpf-local-storage "type" (i.e. the *map*) is * searched against all bpf-local-storages residing at *sk*. * + * *sk* is a kernel **struct sock** pointer for LSM program. + * *sk* is a **struct bpf_sock** pointer for other program types. + * * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be * used such that a new bpf-local-storage will be * created if one does not exist. *value* can be used @@ -2835,13 +2913,14 @@ union bpf_attr { * **NULL** if not found or there was an error in adding * a new bpf-local-storage. * - * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * long bpf_sk_storage_delete(struct bpf_map *map, void *sk) * Description * Delete a bpf-local-storage from a *sk*. * Return * 0 on success. * * **-ENOENT** if the bpf-local-storage cannot be found. + * **-EINVAL** if sk is not a fullsock (e.g. a request_sock). * * long bpf_send_signal(u32 sig) * Description @@ -2858,7 +2937,7 @@ union bpf_attr { * * **-EAGAIN** if bpf program can try again. * - * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Try to issue a SYN cookie for the packet with corresponding * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. @@ -2931,10 +3010,10 @@ union bpf_attr { * string length is larger than *size*, just *size*-1 bytes are * copied and the last byte is set to NUL. * - * On success, the length of the copied string is returned. This - * makes this helper useful in tracing programs for reading - * strings, and more importantly to get its length at runtime. See - * the following snippet: + * On success, returns the number of bytes that were written, + * including the terminal NUL. This makes this helper useful in + * tracing programs for reading strings, and more importantly to + * get its length at runtime. See the following snippet: * * :: * @@ -2962,7 +3041,7 @@ union bpf_attr { * **->mm->env_start**: using this helper and the return value, * one can quickly iterate at the right offset of the memory area. * Return - * On success, the strictly positive length of the string, + * On success, the strictly positive length of the output string, * including the trailing NUL character. On error, a negative * value. * @@ -3087,7 +3166,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) + * long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags) * Description * Helper is overloaded depending on BPF program type. This * description applies to **BPF_PROG_TYPE_SCHED_CLS** and @@ -3215,11 +3294,11 @@ union bpf_attr { * * **-EOVERFLOW** if an overflow happened: The same object will be tried again. * - * u64 bpf_sk_cgroup_id(struct bpf_sock *sk) + * u64 bpf_sk_cgroup_id(void *sk) * Description * Return the cgroup v2 id of the socket *sk*. * - * *sk* must be a non-**NULL** pointer to a full socket, e.g. one + * *sk* must be a non-**NULL** pointer to a socket, e.g. one * returned from **bpf_sk_lookup_xxx**\ (), * **bpf_sk_fullsock**\ (), etc. The format of returned id is * same as in **bpf_skb_cgroup_id**\ (). @@ -3229,7 +3308,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level) + * u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level) * Description * Return id of cgroup v2 that is ancestor of cgroup associated * with the *sk* at the *ancestor_level*. The root cgroup is at @@ -3337,38 +3416,38 @@ union bpf_attr { * Description * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. * Return - * *sk* if casting is valid, or NULL otherwise. + * *sk* if casting is valid, or **NULL** otherwise. * * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk) * Description * Dynamically cast a *sk* pointer to a *tcp_sock* pointer. * Return - * *sk* if casting is valid, or NULL otherwise. + * *sk* if casting is valid, or **NULL** otherwise. * * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk) * Description * Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer. * Return - * *sk* if casting is valid, or NULL otherwise. + * *sk* if casting is valid, or **NULL** otherwise. * * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk) * Description * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer. * Return - * *sk* if casting is valid, or NULL otherwise. + * *sk* if casting is valid, or **NULL** otherwise. * * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk) * Description * Dynamically cast a *sk* pointer to a *udp6_sock* pointer. * Return - * *sk* if casting is valid, or NULL otherwise. + * *sk* if casting is valid, or **NULL** otherwise. * * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. * To achieve this, the helper needs *task*, which is a valid - * pointer to struct task_struct. To store the stacktrace, the - * bpf program provides *buf* with a nonnegative *size*. + * pointer to **struct task_struct**. To store the stacktrace, the + * bpf program provides *buf* with a nonnegative *size*. * * The last argument, *flags*, holds the number of stack frames to * skip (from 0 to 255), masked with @@ -3395,6 +3474,447 @@ union bpf_attr { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * + * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) + * Description + * Load header option. Support reading a particular TCP header + * option for bpf program (**BPF_PROG_TYPE_SOCK_OPS**). + * + * If *flags* is 0, it will search the option from the + * *skops*\ **->skb_data**. The comment in **struct bpf_sock_ops** + * has details on what skb_data contains under different + * *skops*\ **->op**. + * + * The first byte of the *searchby_res* specifies the + * kind that it wants to search. + * + * If the searching kind is an experimental kind + * (i.e. 253 or 254 according to RFC6994). It also + * needs to specify the "magic" which is either + * 2 bytes or 4 bytes. It then also needs to + * specify the size of the magic by using + * the 2nd byte which is "kind-length" of a TCP + * header option and the "kind-length" also + * includes the first 2 bytes "kind" and "kind-length" + * itself as a normal TCP header option also does. + * + * For example, to search experimental kind 254 with + * 2 byte magic 0xeB9F, the searchby_res should be + * [ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ]. + * + * To search for the standard window scale option (3), + * the *searchby_res* should be [ 3, 0, 0, .... 0 ]. + * Note, kind-length must be 0 for regular option. + * + * Searching for No-Op (0) and End-of-Option-List (1) are + * not supported. + * + * *len* must be at least 2 bytes which is the minimal size + * of a header option. + * + * Supported flags: + * + * * **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the + * saved_syn packet or the just-received syn packet. + * + * Return + * > 0 when found, the header option is copied to *searchby_res*. + * The return value is the total length copied. On failure, a + * negative error code is returned: + * + * **-EINVAL** if a parameter is invalid. + * + * **-ENOMSG** if the option is not found. + * + * **-ENOENT** if no syn packet is available when + * **BPF_LOAD_HDR_OPT_TCP_SYN** is used. + * + * **-ENOSPC** if there is not enough space. Only *len* number of + * bytes are copied. + * + * **-EFAULT** on failure to parse the header options in the + * packet. + * + * **-EPERM** if the helper cannot be used under the current + * *skops*\ **->op**. + * + * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags) + * Description + * Store header option. The data will be copied + * from buffer *from* with length *len* to the TCP header. + * + * The buffer *from* should have the whole option that + * includes the kind, kind-length, and the actual + * option data. The *len* must be at least kind-length + * long. The kind-length does not have to be 4 byte + * aligned. The kernel will take care of the padding + * and setting the 4 bytes aligned value to th->doff. + * + * This helper will check for duplicated option + * by searching the same option in the outgoing skb. + * + * This helper can only be called during + * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**. + * + * Return + * 0 on success, or negative error in case of failure: + * + * **-EINVAL** If param is invalid. + * + * **-ENOSPC** if there is not enough space in the header. + * Nothing has been written + * + * **-EEXIST** if the option already exists. + * + * **-EFAULT** on failrue to parse the existing header options. + * + * **-EPERM** if the helper cannot be used under the current + * *skops*\ **->op**. + * + * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags) + * Description + * Reserve *len* bytes for the bpf header option. The + * space will be used by **bpf_store_hdr_opt**\ () later in + * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**. + * + * If **bpf_reserve_hdr_opt**\ () is called multiple times, + * the total number of bytes will be reserved. + * + * This helper can only be called during + * **BPF_SOCK_OPS_HDR_OPT_LEN_CB**. + * + * Return + * 0 on success, or negative error in case of failure: + * + * **-EINVAL** if a parameter is invalid. + * + * **-ENOSPC** if there is not enough space in the header. + * + * **-EPERM** if the helper cannot be used under the current + * *skops*\ **->op**. + * + * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags) + * Description + * Get a bpf_local_storage from an *inode*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *inode* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *inode*) except this + * helper enforces the key must be an inode and the map must also + * be a **BPF_MAP_TYPE_INODE_STORAGE**. + * + * Underneath, the value is stored locally at *inode* instead of + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf_local_storage residing at *inode*. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * int bpf_inode_storage_delete(struct bpf_map *map, void *inode) + * Description + * Delete a bpf_local_storage from an *inode*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. + * + * long bpf_d_path(struct path *path, char *buf, u32 sz) + * Description + * Return full path for given **struct path** object, which + * needs to be the kernel BTF *path* object. The path is + * returned in the provided buffer *buf* of size *sz* and + * is zero terminated. + * + * Return + * On success, the strictly positive length of the string, + * including the trailing NUL character. On error, a negative + * value. + * + * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr) + * Description + * Read *size* bytes from user space address *user_ptr* and store + * the data in *dst*. This is a wrapper of **copy_from_user**\ (). + * Return + * 0 on success, or a negative error in case of failure. + * + * long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags) + * Description + * Use BTF to store a string representation of *ptr*->ptr in *str*, + * using *ptr*->type_id. This value should specify the type + * that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1) + * can be used to look up vmlinux BTF type ids. Traversing the + * data structure using BTF, the type information and values are + * stored in the first *str_size* - 1 bytes of *str*. Safe copy of + * the pointer data is carried out to avoid kernel crashes during + * operation. Smaller types can use string space on the stack; + * larger programs can use map data to store the string + * representation. + * + * The string can be subsequently shared with userspace via + * bpf_perf_event_output() or ring buffer interfaces. + * bpf_trace_printk() is to be avoided as it places too small + * a limit on string size to be useful. + * + * *flags* is a combination of + * + * **BTF_F_COMPACT** + * no formatting around type information + * **BTF_F_NONAME** + * no struct/union member names/types + * **BTF_F_PTR_RAW** + * show raw (unobfuscated) pointer values; + * equivalent to printk specifier %px. + * **BTF_F_ZERO** + * show zero-valued struct/union members; they + * are not displayed by default + * + * Return + * The number of bytes that were written (or would have been + * written if output had to be truncated due to string size), + * or a negative error in cases of failure. + * + * long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 ptr_size, u64 flags) + * Description + * Use BTF to write to seq_write a string representation of + * *ptr*->ptr, using *ptr*->type_id as per bpf_snprintf_btf(). + * *flags* are identical to those used for bpf_snprintf_btf. + * Return + * 0 on success or a negative error in case of failure. + * + * u64 bpf_skb_cgroup_classid(struct sk_buff *skb) + * Description + * See **bpf_get_cgroup_classid**\ () for the main description. + * This helper differs from **bpf_get_cgroup_classid**\ () in that + * the cgroup v1 net_cls class is retrieved only from the *skb*'s + * associated socket instead of the current process. + * Return + * The id is returned or 0 in case the id could not be retrieved. + * + * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex* + * and fill in L2 addresses from neighboring subsystem. This helper + * is somewhat similar to **bpf_redirect**\ (), except that it + * populates L2 addresses as well, meaning, internally, the helper + * relies on the neighbor lookup for the L2 address of the nexthop. + * + * The helper will perform a FIB lookup based on the skb's + * networking header to get the address of the next hop, unless + * this is supplied by the caller in the *params* argument. The + * *plen* argument indicates the len of *params* and should be set + * to 0 if *params* is NULL. + * + * The *flags* argument is reserved and must be 0. The helper is + * currently only supported for tc BPF program types, and enabled + * for IPv4 and IPv6 protocols. + * Return + * The helper returns **TC_ACT_REDIRECT** on success or + * **TC_ACT_SHOT** on error. + * + * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on *cpu*. A ksym is an + * extern variable decorated with '__ksym'. For ksym, there is a + * global var (either static or global) defined of the same name + * in the kernel. The ksym is percpu if the global var is percpu. + * The returned pointer points to the global percpu var on *cpu*. + * + * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the + * kernel, except that bpf_per_cpu_ptr() may return NULL. This + * happens if *cpu* is larger than nr_cpu_ids. The caller of + * bpf_per_cpu_ptr() must check the returned value. + * Return + * A pointer pointing to the kernel percpu variable on *cpu*, or + * NULL, if *cpu* is invalid. + * + * void *bpf_this_cpu_ptr(const void *percpu_ptr) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on this cpu. See the + * description of 'ksym' in **bpf_per_cpu_ptr**\ (). + * + * bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in + * the kernel. Different from **bpf_per_cpu_ptr**\ (), it would + * never return NULL. + * Return + * A pointer pointing to the kernel percpu variable on this cpu. + * + * long bpf_redirect_peer(u32 ifindex, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex*. + * This helper is somewhat similar to **bpf_redirect**\ (), except + * that the redirection happens to the *ifindex*' peer device and + * the netns switch takes place from ingress to ingress without + * going through the CPU's backlog queue. + * + * The *flags* argument is reserved and must be 0. The helper is + * currently only supported for tc BPF program types at the ingress + * hook and for veth device types. The peer device must reside in a + * different network namespace. + * Return + * The helper returns **TC_ACT_REDIRECT** on success or + * **TC_ACT_SHOT** on error. + * + * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags) + * Description + * Get a bpf_local_storage from the *task*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *task* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this + * helper enforces the key must be an task_struct and the map must also + * be a **BPF_MAP_TYPE_TASK_STORAGE**. + * + * Underneath, the value is stored locally at *task* instead of + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf_local_storage residing at *task*. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task) + * Description + * Delete a bpf_local_storage from a *task*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. + * + * struct task_struct *bpf_get_current_task_btf(void) + * Description + * Return a BTF pointer to the "current" task. + * This pointer can also be used in helpers that accept an + * *ARG_PTR_TO_BTF_ID* of type *task_struct*. + * Return + * Pointer to the current task. + * + * long bpf_bprm_opts_set(struct linux_binprm *bprm, u64 flags) + * Description + * Set or clear certain options on *bprm*: + * + * **BPF_F_BPRM_SECUREEXEC** Set the secureexec bit + * which sets the **AT_SECURE** auxv for glibc. The bit + * is cleared if the flag is not specified. + * Return + * **-EINVAL** if invalid *flags* are passed, zero otherwise. + * + * u64 bpf_ktime_get_coarse_ns(void) + * Description + * Return a coarse-grained version of the time elapsed since + * system boot, in nanoseconds. Does not include time the system + * was suspended. + * + * See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**) + * Return + * Current *ktime*. + * + * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size) + * Description + * Returns the stored IMA hash of the *inode* (if it's avaialable). + * If the hash is larger than *size*, then only *size* + * bytes will be copied to *dst* + * Return + * The **hash_algo** is returned on success, + * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if + * invalid arguments are passed. + * + * struct socket *bpf_sock_from_file(struct file *file) + * Description + * If the given file represents a socket, returns the associated + * socket. + * Return + * A pointer to a struct socket on success or NULL if the file is + * not a socket. + * + * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags) + * Description + * Check packet size against exceeding MTU of net device (based + * on *ifindex*). This helper will likely be used in combination + * with helpers that adjust/change the packet size. + * + * The argument *len_diff* can be used for querying with a planned + * size change. This allows to check MTU prior to changing packet + * ctx. Providing an *len_diff* adjustment that is larger than the + * actual packet size (resulting in negative packet size) will in + * principle not exceed the MTU, why it is not considered a + * failure. Other BPF-helpers are needed for performing the + * planned size change, why the responsability for catch a negative + * packet size belong in those helpers. + * + * Specifying *ifindex* zero means the MTU check is performed + * against the current net device. This is practical if this isn't + * used prior to redirect. + * + * On input *mtu_len* must be a valid pointer, else verifier will + * reject BPF program. If the value *mtu_len* is initialized to + * zero then the ctx packet size is use. When value *mtu_len* is + * provided as input this specify the L3 length that the MTU check + * is done against. Remember XDP and TC length operate at L2, but + * this value is L3 as this correlate to MTU and IP-header tot_len + * values which are L3 (similar behavior as bpf_fib_lookup). + * + * The Linux kernel route table can configure MTUs on a more + * specific per route level, which is not provided by this helper. + * For route level MTU checks use the **bpf_fib_lookup**\ () + * helper. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** for tc cls_act programs. + * + * The *flags* argument can be a combination of one or more of the + * following values: + * + * **BPF_MTU_CHK_SEGS** + * This flag will only works for *ctx* **struct sk_buff**. + * If packet context contains extra packet segment buffers + * (often knows as GSO skb), then MTU check is harder to + * check at this point, because in transmit path it is + * possible for the skb packet to get re-segmented + * (depending on net device features). This could still be + * a MTU violation, so this flag enables performing MTU + * check against segments, with a different violation + * return code to tell it apart. Check cannot use len_diff. + * + * On return *mtu_len* pointer contains the MTU value of the net + * device. Remember the net device configured MTU is the L3 size, + * which is returned here and XDP and TC length operate at L2. + * Helper take this into account for you, but remember when using + * MTU value in your BPF-code. + * + * Return + * * 0 on success, and populate MTU value in *mtu_len* pointer. + * + * * < 0 if any input argument is invalid (*mtu_len* not updated) + * + * MTU violations return positive values, but also populate MTU + * value in *mtu_len* pointer, as this can be needed for + * implementing PMTU handing: + * + * * **BPF_MTU_CHK_RET_FRAG_NEEDED** + * * **BPF_MTU_CHK_RET_SEGS_TOOBIG** + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3539,6 +4059,28 @@ union bpf_attr { FN(skc_to_tcp_request_sock), \ FN(skc_to_udp6_sock), \ FN(get_task_stack), \ + FN(load_hdr_opt), \ + FN(store_hdr_opt), \ + FN(reserve_hdr_opt), \ + FN(inode_storage_get), \ + FN(inode_storage_delete), \ + FN(d_path), \ + FN(copy_from_user), \ + FN(snprintf_btf), \ + FN(seq_printf_btf), \ + FN(skb_cgroup_classid), \ + FN(redirect_neigh), \ + FN(per_cpu_ptr), \ + FN(this_cpu_ptr), \ + FN(redirect_peer), \ + FN(task_storage_get), \ + FN(task_storage_delete), \ + FN(get_current_task_btf), \ + FN(bprm_opts_set), \ + FN(ktime_get_coarse_ns), \ + FN(ima_inode_hash), \ + FN(sock_from_file), \ + FN(check_mtu), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -3648,9 +4190,13 @@ enum { BPF_F_SYSCTL_BASE_NAME = (1ULL << 0), }; -/* BPF_FUNC_sk_storage_get flags */ +/* BPF_FUNC__storage_get flags */ enum { - BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0), + BPF_LOCAL_STORAGE_GET_F_CREATE = (1ULL << 0), + /* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility + * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead. + */ + BPF_SK_STORAGE_GET_F_CREATE = BPF_LOCAL_STORAGE_GET_F_CREATE, }; /* BPF_FUNC_read_branch_records flags. */ @@ -3706,6 +4252,11 @@ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_IP, }; +/* Flags for bpf_bprm_opts_set helper */ +enum { + BPF_F_BPRM_SECUREEXEC = (1ULL << 0), +}; + #define __bpf_md_ptr(type, name) \ union { \ type name; \ @@ -4030,6 +4581,7 @@ struct bpf_prog_info { __aligned_u64 prog_tags; __u64 run_time_ns; __u64 run_cnt; + __u64 recursion_misses; } __attribute__((aligned(8))); struct bpf_map_info { @@ -4053,6 +4605,9 @@ struct bpf_btf_info { __aligned_u64 btf; __u32 btf_size; __u32 id; + __aligned_u64 name; + __u32 name_len; + __u32 kernel_btf; } __attribute__((aligned(8))); struct bpf_link_info { @@ -4071,6 +4626,15 @@ struct bpf_link_info { __u64 cgroup_id; __u32 attach_type; } cgroup; + struct { + __aligned_u64 target_name; /* in/out: target_name buffer ptr */ + __u32 target_name_len; /* in/out: target_name buffer len */ + union { + struct { + __u32 map_id; + } map; + }; + } iter; struct { __u32 netns_ino; __u32 attach_type; @@ -4158,6 +4722,36 @@ struct bpf_sock_ops { __u64 bytes_received; __u64 bytes_acked; __bpf_md_ptr(struct bpf_sock *, sk); + /* [skb_data, skb_data_end) covers the whole TCP header. + * + * BPF_SOCK_OPS_PARSE_HDR_OPT_CB: The packet received + * BPF_SOCK_OPS_HDR_OPT_LEN_CB: Not useful because the + * header has not been written. + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB: The header and options have + * been written so far. + * BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: The SYNACK that concludes + * the 3WHS. + * BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: The ACK that concludes + * the 3WHS. + * + * bpf_load_hdr_opt() can also be used to read a particular option. + */ + __bpf_md_ptr(void *, skb_data); + __bpf_md_ptr(void *, skb_data_end); + __u32 skb_len; /* The total length of a packet. + * It includes the header, options, + * and payload. + */ + __u32 skb_tcp_flags; /* tcp_flags of the header. It provides + * an easy way to check for tcp_flags + * without parsing skb_data. + * + * In particular, the skb_tcp_flags + * will still be available in + * BPF_SOCK_OPS_HDR_OPT_LEN even though + * the outgoing header has not + * been written yet. + */ }; /* Definitions for bpf_sock_ops_cb_flags */ @@ -4166,8 +4760,51 @@ enum { BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), + /* Call bpf for all received TCP headers. The bpf prog will be + * called under sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB + * + * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB + * for the header option related helpers that will be useful + * to the bpf programs. + * + * It could be used at the client/active side (i.e. connect() side) + * when the server told it that the server was in syncookie + * mode and required the active side to resend the bpf-written + * options. The active side can keep writing the bpf-options until + * it received a valid packet from the server side to confirm + * the earlier packet (and options) has been received. The later + * example patch is using it like this at the active side when the + * server is in syncookie mode. + * + * The bpf prog will usually turn this off in the common cases. + */ + BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4), + /* Call bpf when kernel has received a header option that + * the kernel cannot handle. The bpf prog will be called under + * sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB. + * + * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB + * for the header option related helpers that will be useful + * to the bpf programs. + */ + BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5), + /* Call bpf when the kernel is writing header options for the + * outgoing packet. The bpf prog will first be called + * to reserve space in a skb under + * sock_ops->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB. Then + * the bpf prog will be called to write the header option(s) + * under sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * + * Please refer to the comment in BPF_SOCK_OPS_HDR_OPT_LEN_CB + * and BPF_SOCK_OPS_WRITE_HDR_OPT_CB for the header option + * related helpers that will be useful to the bpf programs. + * + * The kernel gets its chance to reserve space and write + * options first before the BPF program does. + */ + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), /* Mask of all currently supported cb flags */ - BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF, + BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, }; /* List of known BPF sock_ops operators. @@ -4223,6 +4860,63 @@ enum { */ BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. */ + BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option. + * It will be called to handle + * the packets received at + * an already established + * connection. + * + * sock_ops->skb_data: + * Referring to the received skb. + * It covers the TCP header only. + * + * bpf_load_hdr_opt() can also + * be used to search for a + * particular option. + */ + BPF_SOCK_OPS_HDR_OPT_LEN_CB, /* Reserve space for writing the + * header option later in + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * Arg1: bool want_cookie. (in + * writing SYNACK only) + * + * sock_ops->skb_data: + * Not available because no header has + * been written yet. + * + * sock_ops->skb_tcp_flags: + * The tcp_flags of the + * outgoing skb. (e.g. SYN, ACK, FIN). + * + * bpf_reserve_hdr_opt() should + * be used to reserve space. + */ + BPF_SOCK_OPS_WRITE_HDR_OPT_CB, /* Write the header options + * Arg1: bool want_cookie. (in + * writing SYNACK only) + * + * sock_ops->skb_data: + * Referring to the outgoing skb. + * It covers the TCP header + * that has already been written + * by the kernel and the + * earlier bpf-progs. + * + * sock_ops->skb_tcp_flags: + * The tcp_flags of the outgoing + * skb. (e.g. SYN, ACK, FIN). + * + * bpf_store_hdr_opt() should + * be used to write the + * option. + * + * bpf_load_hdr_opt() can also + * be used to search for a + * particular option that + * has already been written + * by the kernel or the + * earlier bpf-progs. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect @@ -4250,6 +4944,63 @@ enum { enum { TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ + TCP_BPF_DELACK_MAX = 1003, /* Max delay ack in usecs */ + TCP_BPF_RTO_MIN = 1004, /* Min delay ack in usecs */ + /* Copy the SYN pkt to optval + * + * BPF_PROG_TYPE_SOCK_OPS only. It is similar to the + * bpf_getsockopt(TCP_SAVED_SYN) but it does not limit + * to only getting from the saved_syn. It can either get the + * syn packet from: + * + * 1. the just-received SYN packet (only available when writing the + * SYNACK). It will be useful when it is not necessary to + * save the SYN packet for latter use. It is also the only way + * to get the SYN during syncookie mode because the syn + * packet cannot be saved during syncookie. + * + * OR + * + * 2. the earlier saved syn which was done by + * bpf_setsockopt(TCP_SAVE_SYN). + * + * The bpf_getsockopt(TCP_BPF_SYN*) option will hide where the + * SYN packet is obtained. + * + * If the bpf-prog does not need the IP[46] header, the + * bpf-prog can avoid parsing the IP header by using + * TCP_BPF_SYN. Otherwise, the bpf-prog can get both + * IP[46] and TCP header by using TCP_BPF_SYN_IP. + * + * >0: Total number of bytes copied + * -ENOSPC: Not enough space in optval. Only optlen number of + * bytes is copied. + * -ENOENT: The SYN skb is not available now and the earlier SYN pkt + * is not saved by setsockopt(TCP_SAVE_SYN). + */ + TCP_BPF_SYN = 1005, /* Copy the TCP header */ + TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ + TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ +}; + +enum { + BPF_LOAD_HDR_OPT_TCP_SYN = (1ULL << 0), +}; + +/* args[0] value during BPF_SOCK_OPS_HDR_OPT_LEN_CB and + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + */ +enum { + BPF_WRITE_HDR_TCP_CURRENT_MSS = 1, /* Kernel is finding the + * total option spaces + * required for an established + * sk in order to calculate the + * MSS. No skb is actually + * sent. + */ + BPF_WRITE_HDR_TCP_SYNACK_COOKIE = 2, /* Kernel is in syncookie mode + * when sending a SYN. + */ }; struct bpf_perf_event_value { @@ -4311,9 +5062,13 @@ struct bpf_fib_lookup { __be16 sport; __be16 dport; - /* total length of packet from network header - used for MTU check */ - __u16 tot_len; + union { /* used for MTU check */ + /* input to lookup */ + __u16 tot_len; /* L3 length from network hdr (iph->tot_len) */ + /* output: MTU value */ + __u16 mtu_result; + }; /* input: L3 device index for lookup * output: device index from FIB lookup */ @@ -4349,6 +5104,27 @@ struct bpf_fib_lookup { __u8 dmac[6]; /* ETH_ALEN */ }; +struct bpf_redir_neigh { + /* network family for lookup (AF_INET, AF_INET6) */ + __u32 nh_family; + /* network address of nexthop; skips fib lookup to find gateway */ + union { + __be32 ipv4_nh; + __u32 ipv6_nh[4]; /* in6_addr; network order */ + }; +}; + +/* bpf_check_mtu flags*/ +enum bpf_check_mtu_flags { + BPF_MTU_CHK_SEGS = (1U << 0), +}; + +enum bpf_check_mtu_ret { + BPF_MTU_CHK_RET_SUCCESS, /* check and lookup successful */ + BPF_MTU_CHK_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_MTU_CHK_RET_SEGS_TOOBIG, /* GSO re-segmentation needed to fwd */ +}; + enum bpf_task_fd_type { BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ BPF_FD_TYPE_TRACEPOINT, /* tp name */ @@ -4447,4 +5223,34 @@ struct bpf_sk_lookup { __u32 local_port; /* Host byte order */ }; +/* + * struct btf_ptr is used for typed pointer representation; the + * type id is used to render the pointer data as the appropriate type + * via the bpf_snprintf_btf() helper described above. A flags field - + * potentially to specify additional details about the BTF pointer + * (rather than its mode of display) - is included for future use. + * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately. + */ +struct btf_ptr { + void *ptr; + __u32 type_id; + __u32 flags; /* BTF ptr flags; unused at present. */ +}; + +/* + * Flags to control bpf_snprintf_btf() behaviour. + * - BTF_F_COMPACT: no formatting around type information + * - BTF_F_NONAME: no struct/union member names/types + * - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values; + * equivalent to %px. + * - BTF_F_ZERO: show zero-valued struct/union members; they + * are not displayed by default + */ +enum { + BTF_F_COMPACT = (1ULL << 0), + BTF_F_NONAME = (1ULL << 1), + BTF_F_PTR_RAW = (1ULL << 2), + BTF_F_ZERO = (1ULL << 3), +}; + #endif /* __LINUX_BPF_H__ */ diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 55b6660..5e1d383 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -84,6 +84,7 @@ typedef __u32 can_err_mask_t; /* CAN payload length and DLC definitions according to ISO 11898-1 */ #define CAN_MAX_DLC 8 +#define CAN_MAX_RAW_DLC 15 #define CAN_MAX_DLEN 8 /* CAN FD payload length and DLC definitions according to ISO 11898-7 */ @@ -91,23 +92,32 @@ typedef __u32 can_err_mask_t; #define CANFD_MAX_DLEN 64 /** - * struct can_frame - basic CAN frame structure - * @can_id: CAN ID of the frame and CAN_*_FLAG flags, see canid_t definition - * @can_dlc: frame payload length in byte (0 .. 8) aka data length code - * N.B. the DLC field from ISO 11898-1 Chapter 8.4.2.3 has a 1:1 - * mapping of the 'data length code' to the real payload length - * @__pad: padding - * @__res0: reserved / padding - * @__res1: reserved / padding - * @data: CAN frame payload (up to 8 byte) + * struct can_frame - Classical CAN frame structure (aka CAN 2.0B) + * @can_id: CAN ID of the frame and CAN_*_FLAG flags, see canid_t definition + * @len: CAN frame payload length in byte (0 .. 8) + * @can_dlc: deprecated name for CAN frame payload length in byte (0 .. 8) + * @__pad: padding + * @__res0: reserved / padding + * @len8_dlc: optional DLC value (9 .. 15) at 8 byte payload length + * len8_dlc contains values from 9 .. 15 when the payload length is + * 8 bytes but the DLC value (see ISO 11898-1) is greater then 8. + * CAN_CTRLMODE_CC_LEN8_DLC flag has to be enabled in CAN driver. + * @data: CAN frame payload (up to 8 byte) */ struct can_frame { canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */ - __u8 can_dlc; /* frame payload length in byte (0 .. CAN_MAX_DLEN) */ - __u8 __pad; /* padding */ - __u8 __res0; /* reserved / padding */ - __u8 __res1; /* reserved / padding */ - __u8 data[CAN_MAX_DLEN] __attribute__((aligned(8))); + union { + /* CAN frame payload length in byte (0 .. CAN_MAX_DLEN) + * was previously named can_dlc so we need to carry that + * name for legacy support + */ + __u8 len; + __u8 can_dlc; /* deprecated */ + } __attribute__((packed)); /* disable padding added in some ABIs */ + __u8 __pad; /* padding */ + __u8 __res0; /* reserved / padding */ + __u8 len8_dlc; /* optional DLC for 8 byte payload length (9 .. 15) */ + __u8 data[CAN_MAX_DLEN] __attribute__((aligned(8))); }; /* diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index 7159dc6..00c763d 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -100,6 +100,7 @@ struct can_ctrlmode { #define CAN_CTRLMODE_FD 0x20 /* CAN FD mode */ #define CAN_CTRLMODE_PRESUME_ACK 0x40 /* Ignore missing CAN ACKs */ #define CAN_CTRLMODE_FD_NON_ISO 0x80 /* CAN FD in non-ISO mode */ +#define CAN_CTRLMODE_CC_LEN8_DLC 0x100 /* Classic CAN DLC option */ /* * CAN device statistics diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h index fd885c7..5e48987 100644 --- a/include/uapi/linux/const.h +++ b/include/uapi/linux/const.h @@ -28,4 +28,9 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) + +#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + #endif /* _LINUX_CONST_H */ diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h new file mode 100644 index 0000000..a791a94 --- /dev/null +++ b/include/uapi/linux/dcbnl.h @@ -0,0 +1,769 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 2008-2011, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Lucy Liu + */ + +#ifndef __LINUX_DCBNL_H__ +#define __LINUX_DCBNL_H__ + +#include + +/* IEEE 802.1Qaz std supported values */ +#define IEEE_8021QAZ_MAX_TCS 8 + +#define IEEE_8021QAZ_TSA_STRICT 0 +#define IEEE_8021QAZ_TSA_CB_SHAPER 1 +#define IEEE_8021QAZ_TSA_ETS 2 +#define IEEE_8021QAZ_TSA_VENDOR 255 + +/* This structure contains the IEEE 802.1Qaz ETS managed object + * + * @willing: willing bit in ETS configuration TLV + * @ets_cap: indicates supported capacity of ets feature + * @cbs: credit based shaper ets algorithm supported + * @tc_tx_bw: tc tx bandwidth indexed by traffic class + * @tc_rx_bw: tc rx bandwidth indexed by traffic class + * @tc_tsa: TSA Assignment table, indexed by traffic class + * @prio_tc: priority assignment table mapping 8021Qp to traffic class + * @tc_reco_bw: recommended tc bandwidth indexed by traffic class for TLV + * @tc_reco_tsa: recommended tc bandwidth indexed by traffic class for TLV + * @reco_prio_tc: recommended tc tx bandwidth indexed by traffic class for TLV + * + * Recommended values are used to set fields in the ETS recommendation TLV + * with hardware offloaded LLDP. + * + * ---- + * TSA Assignment 8 bit identifiers + * 0 strict priority + * 1 credit-based shaper + * 2 enhanced transmission selection + * 3-254 reserved + * 255 vendor specific + */ +struct ieee_ets { + __u8 willing; + __u8 ets_cap; + __u8 cbs; + __u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS]; + __u8 tc_rx_bw[IEEE_8021QAZ_MAX_TCS]; + __u8 tc_tsa[IEEE_8021QAZ_MAX_TCS]; + __u8 prio_tc[IEEE_8021QAZ_MAX_TCS]; + __u8 tc_reco_bw[IEEE_8021QAZ_MAX_TCS]; + __u8 tc_reco_tsa[IEEE_8021QAZ_MAX_TCS]; + __u8 reco_prio_tc[IEEE_8021QAZ_MAX_TCS]; +}; + +/* This structure contains rate limit extension to the IEEE 802.1Qaz ETS + * managed object. + * Values are 64 bits long and specified in Kbps to enable usage over both + * slow and very fast networks. + * + * @tc_maxrate: maximal tc tx bandwidth indexed by traffic class + */ +struct ieee_maxrate { + __u64 tc_maxrate[IEEE_8021QAZ_MAX_TCS]; +}; + +enum dcbnl_cndd_states { + DCB_CNDD_RESET = 0, + DCB_CNDD_EDGE, + DCB_CNDD_INTERIOR, + DCB_CNDD_INTERIOR_READY, +}; + +/* This structure contains the IEEE 802.1Qau QCN managed object. + * + *@rpg_enable: enable QCN RP + *@rppp_max_rps: maximum number of RPs allowed for this CNPV on this port + *@rpg_time_reset: time between rate increases if no CNMs received. + * given in u-seconds + *@rpg_byte_reset: transmitted data between rate increases if no CNMs received. + * given in Bytes + *@rpg_threshold: The number of times rpByteStage or rpTimeStage can count + * before RP rate control state machine advances states + *@rpg_max_rate: the maxinun rate, in Mbits per second, + * at which an RP can transmit + *@rpg_ai_rate: The rate, in Mbits per second, + * used to increase rpTargetRate in the RPR_ACTIVE_INCREASE + *@rpg_hai_rate: The rate, in Mbits per second, + * used to increase rpTargetRate in the RPR_HYPER_INCREASE state + *@rpg_gd: Upon CNM receive, flow rate is limited to (Fb/Gd)*CurrentRate. + * rpgGd is given as log2(Gd), where Gd may only be powers of 2 + *@rpg_min_dec_fac: The minimum factor by which the current transmit rate + * can be changed by reception of a CNM. + * value is given as percentage (1-100) + *@rpg_min_rate: The minimum value, in bits per second, for rate to limit + *@cndd_state_machine: The state of the congestion notification domain + * defense state machine, as defined by IEEE 802.3Qau + * section 32.1.1. In the interior ready state, + * the QCN capable hardware may add CN-TAG TLV to the + * outgoing traffic, to specifically identify outgoing + * flows. + */ + +struct ieee_qcn { + __u8 rpg_enable[IEEE_8021QAZ_MAX_TCS]; + __u32 rppp_max_rps[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_time_reset[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_byte_reset[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_threshold[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_max_rate[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_ai_rate[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_hai_rate[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_gd[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_min_dec_fac[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_min_rate[IEEE_8021QAZ_MAX_TCS]; + __u32 cndd_state_machine[IEEE_8021QAZ_MAX_TCS]; +}; + +/* This structure contains the IEEE 802.1Qau QCN statistics. + * + *@rppp_rp_centiseconds: the number of RP-centiseconds accumulated + * by RPs at this priority level on this Port + *@rppp_created_rps: number of active RPs(flows) that react to CNMs + */ + +struct ieee_qcn_stats { + __u64 rppp_rp_centiseconds[IEEE_8021QAZ_MAX_TCS]; + __u32 rppp_created_rps[IEEE_8021QAZ_MAX_TCS]; +}; + +/* This structure contains the IEEE 802.1Qaz PFC managed object + * + * @pfc_cap: Indicates the number of traffic classes on the local device + * that may simultaneously have PFC enabled. + * @pfc_en: bitmap indicating pfc enabled traffic classes + * @mbc: enable macsec bypass capability + * @delay: the allowance made for a round-trip propagation delay of the + * link in bits. + * @requests: count of the sent pfc frames + * @indications: count of the received pfc frames + */ +struct ieee_pfc { + __u8 pfc_cap; + __u8 pfc_en; + __u8 mbc; + __u16 delay; + __u64 requests[IEEE_8021QAZ_MAX_TCS]; + __u64 indications[IEEE_8021QAZ_MAX_TCS]; +}; + +#define IEEE_8021Q_MAX_PRIORITIES 8 +#define DCBX_MAX_BUFFERS 8 +struct dcbnl_buffer { + /* priority to buffer mapping */ + __u8 prio2buffer[IEEE_8021Q_MAX_PRIORITIES]; + /* buffer size in Bytes */ + __u32 buffer_size[DCBX_MAX_BUFFERS]; + __u32 total_size; +}; + +/* CEE DCBX std supported values */ +#define CEE_DCBX_MAX_PGS 8 +#define CEE_DCBX_MAX_PRIO 8 + +/** + * struct cee_pg - CEE Priority-Group managed object + * + * @willing: willing bit in the PG tlv + * @error: error bit in the PG tlv + * @pg_en: enable bit of the PG feature + * @tcs_supported: number of traffic classes supported + * @pg_bw: bandwidth percentage for each priority group + * @prio_pg: priority to PG mapping indexed by priority + */ +struct cee_pg { + __u8 willing; + __u8 error; + __u8 pg_en; + __u8 tcs_supported; + __u8 pg_bw[CEE_DCBX_MAX_PGS]; + __u8 prio_pg[CEE_DCBX_MAX_PGS]; +}; + +/** + * struct cee_pfc - CEE PFC managed object + * + * @willing: willing bit in the PFC tlv + * @error: error bit in the PFC tlv + * @pfc_en: bitmap indicating pfc enabled traffic classes + * @tcs_supported: number of traffic classes supported + */ +struct cee_pfc { + __u8 willing; + __u8 error; + __u8 pfc_en; + __u8 tcs_supported; +}; + +/* IEEE 802.1Qaz std supported values */ +#define IEEE_8021QAZ_APP_SEL_ETHERTYPE 1 +#define IEEE_8021QAZ_APP_SEL_STREAM 2 +#define IEEE_8021QAZ_APP_SEL_DGRAM 3 +#define IEEE_8021QAZ_APP_SEL_ANY 4 +#define IEEE_8021QAZ_APP_SEL_DSCP 5 + +/* This structure contains the IEEE 802.1Qaz APP managed object. This + * object is also used for the CEE std as well. + * + * @selector: protocol identifier type + * @protocol: protocol of type indicated + * @priority: 3-bit unsigned integer indicating priority for IEEE + * 8-bit 802.1p user priority bitmap for CEE + * + * ---- + * Selector field values for IEEE 802.1Qaz + * 0 Reserved + * 1 Ethertype + * 2 Well known port number over TCP or SCTP + * 3 Well known port number over UDP or DCCP + * 4 Well known port number over TCP, SCTP, UDP, or DCCP + * 5 Differentiated Services Code Point (DSCP) value + * 6-7 Reserved + * + * Selector field values for CEE + * 0 Ethertype + * 1 Well known port number over TCP or UDP + * 2-3 Reserved + */ +struct dcb_app { + __u8 selector; + __u8 priority; + __u16 protocol; +}; + +/** + * struct dcb_peer_app_info - APP feature information sent by the peer + * + * @willing: willing bit in the peer APP tlv + * @error: error bit in the peer APP tlv + * + * In addition to this information the full peer APP tlv also contains + * a table of 'app_count' APP objects defined above. + */ +struct dcb_peer_app_info { + __u8 willing; + __u8 error; +}; + +struct dcbmsg { + __u8 dcb_family; + __u8 cmd; + __u16 dcb_pad; +}; + +/** + * enum dcbnl_commands - supported DCB commands + * + * @DCB_CMD_UNDEFINED: unspecified command to catch errors + * @DCB_CMD_GSTATE: request the state of DCB in the device + * @DCB_CMD_SSTATE: set the state of DCB in the device + * @DCB_CMD_PGTX_GCFG: request the priority group configuration for Tx + * @DCB_CMD_PGTX_SCFG: set the priority group configuration for Tx + * @DCB_CMD_PGRX_GCFG: request the priority group configuration for Rx + * @DCB_CMD_PGRX_SCFG: set the priority group configuration for Rx + * @DCB_CMD_PFC_GCFG: request the priority flow control configuration + * @DCB_CMD_PFC_SCFG: set the priority flow control configuration + * @DCB_CMD_SET_ALL: apply all changes to the underlying device + * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying + * device. Only useful when using bonding. + * @DCB_CMD_GCAP: request the DCB capabilities of the device + * @DCB_CMD_GNUMTCS: get the number of traffic classes currently supported + * @DCB_CMD_SNUMTCS: set the number of traffic classes + * @DCB_CMD_GBCN: set backward congestion notification configuration + * @DCB_CMD_SBCN: get backward congestion notification configuration. + * @DCB_CMD_GAPP: get application protocol configuration + * @DCB_CMD_SAPP: set application protocol configuration + * @DCB_CMD_IEEE_SET: set IEEE 802.1Qaz configuration + * @DCB_CMD_IEEE_GET: get IEEE 802.1Qaz configuration + * @DCB_CMD_GDCBX: get DCBX engine configuration + * @DCB_CMD_SDCBX: set DCBX engine configuration + * @DCB_CMD_GFEATCFG: get DCBX features flags + * @DCB_CMD_SFEATCFG: set DCBX features negotiation flags + * @DCB_CMD_CEE_GET: get CEE aggregated configuration + * @DCB_CMD_IEEE_DEL: delete IEEE 802.1Qaz configuration + */ +enum dcbnl_commands { + DCB_CMD_UNDEFINED, + + DCB_CMD_GSTATE, + DCB_CMD_SSTATE, + + DCB_CMD_PGTX_GCFG, + DCB_CMD_PGTX_SCFG, + DCB_CMD_PGRX_GCFG, + DCB_CMD_PGRX_SCFG, + + DCB_CMD_PFC_GCFG, + DCB_CMD_PFC_SCFG, + + DCB_CMD_SET_ALL, + + DCB_CMD_GPERM_HWADDR, + + DCB_CMD_GCAP, + + DCB_CMD_GNUMTCS, + DCB_CMD_SNUMTCS, + + DCB_CMD_PFC_GSTATE, + DCB_CMD_PFC_SSTATE, + + DCB_CMD_BCN_GCFG, + DCB_CMD_BCN_SCFG, + + DCB_CMD_GAPP, + DCB_CMD_SAPP, + + DCB_CMD_IEEE_SET, + DCB_CMD_IEEE_GET, + + DCB_CMD_GDCBX, + DCB_CMD_SDCBX, + + DCB_CMD_GFEATCFG, + DCB_CMD_SFEATCFG, + + DCB_CMD_CEE_GET, + DCB_CMD_IEEE_DEL, + + __DCB_CMD_ENUM_MAX, + DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_attrs - DCB top-level netlink attributes + * + * @DCB_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_ATTR_IFNAME: interface name of the underlying device (NLA_STRING) + * @DCB_ATTR_STATE: enable state of DCB in the device (NLA_U8) + * @DCB_ATTR_PFC_STATE: enable state of PFC in the device (NLA_U8) + * @DCB_ATTR_PFC_CFG: priority flow control configuration (NLA_NESTED) + * @DCB_ATTR_NUM_TC: number of traffic classes supported in the device (NLA_U8) + * @DCB_ATTR_PG_CFG: priority group configuration (NLA_NESTED) + * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8) + * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED) + * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED) + * @DCB_ATTR_NUMTCS: number of traffic classes supported (NLA_NESTED) + * @DCB_ATTR_BCN: backward congestion notification configuration (NLA_NESTED) + * @DCB_ATTR_IEEE: IEEE 802.1Qaz supported attributes (NLA_NESTED) + * @DCB_ATTR_DCBX: DCBX engine configuration in the device (NLA_U8) + * @DCB_ATTR_FEATCFG: DCBX features flags (NLA_NESTED) + * @DCB_ATTR_CEE: CEE std supported attributes (NLA_NESTED) + */ +enum dcbnl_attrs { + DCB_ATTR_UNDEFINED, + + DCB_ATTR_IFNAME, + DCB_ATTR_STATE, + DCB_ATTR_PFC_STATE, + DCB_ATTR_PFC_CFG, + DCB_ATTR_NUM_TC, + DCB_ATTR_PG_CFG, + DCB_ATTR_SET_ALL, + DCB_ATTR_PERM_HWADDR, + DCB_ATTR_CAP, + DCB_ATTR_NUMTCS, + DCB_ATTR_BCN, + DCB_ATTR_APP, + + /* IEEE std attributes */ + DCB_ATTR_IEEE, + + DCB_ATTR_DCBX, + DCB_ATTR_FEATCFG, + + /* CEE nested attributes */ + DCB_ATTR_CEE, + + __DCB_ATTR_ENUM_MAX, + DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, +}; + +/** + * enum ieee_attrs - IEEE 802.1Qaz get/set attributes + * + * @DCB_ATTR_IEEE_UNSPEC: unspecified + * @DCB_ATTR_IEEE_ETS: negotiated ETS configuration + * @DCB_ATTR_IEEE_PFC: negotiated PFC configuration + * @DCB_ATTR_IEEE_APP_TABLE: negotiated APP configuration + * @DCB_ATTR_IEEE_PEER_ETS: peer ETS configuration - get only + * @DCB_ATTR_IEEE_PEER_PFC: peer PFC configuration - get only + * @DCB_ATTR_IEEE_PEER_APP: peer APP tlv - get only + */ +enum ieee_attrs { + DCB_ATTR_IEEE_UNSPEC, + DCB_ATTR_IEEE_ETS, + DCB_ATTR_IEEE_PFC, + DCB_ATTR_IEEE_APP_TABLE, + DCB_ATTR_IEEE_PEER_ETS, + DCB_ATTR_IEEE_PEER_PFC, + DCB_ATTR_IEEE_PEER_APP, + DCB_ATTR_IEEE_MAXRATE, + DCB_ATTR_IEEE_QCN, + DCB_ATTR_IEEE_QCN_STATS, + DCB_ATTR_DCB_BUFFER, + __DCB_ATTR_IEEE_MAX +}; +#define DCB_ATTR_IEEE_MAX (__DCB_ATTR_IEEE_MAX - 1) + +enum ieee_attrs_app { + DCB_ATTR_IEEE_APP_UNSPEC, + DCB_ATTR_IEEE_APP, + __DCB_ATTR_IEEE_APP_MAX +}; +#define DCB_ATTR_IEEE_APP_MAX (__DCB_ATTR_IEEE_APP_MAX - 1) + +/** + * enum cee_attrs - CEE DCBX get attributes. + * + * @DCB_ATTR_CEE_UNSPEC: unspecified + * @DCB_ATTR_CEE_PEER_PG: peer PG configuration - get only + * @DCB_ATTR_CEE_PEER_PFC: peer PFC configuration - get only + * @DCB_ATTR_CEE_PEER_APP_TABLE: peer APP tlv - get only + * @DCB_ATTR_CEE_TX_PG: TX PG configuration (DCB_CMD_PGTX_GCFG) + * @DCB_ATTR_CEE_RX_PG: RX PG configuration (DCB_CMD_PGRX_GCFG) + * @DCB_ATTR_CEE_PFC: PFC configuration (DCB_CMD_PFC_GCFG) + * @DCB_ATTR_CEE_APP_TABLE: APP configuration (multi DCB_CMD_GAPP) + * @DCB_ATTR_CEE_FEAT: DCBX features flags (DCB_CMD_GFEATCFG) + * + * An aggregated collection of the cee std negotiated parameters. + */ +enum cee_attrs { + DCB_ATTR_CEE_UNSPEC, + DCB_ATTR_CEE_PEER_PG, + DCB_ATTR_CEE_PEER_PFC, + DCB_ATTR_CEE_PEER_APP_TABLE, + DCB_ATTR_CEE_TX_PG, + DCB_ATTR_CEE_RX_PG, + DCB_ATTR_CEE_PFC, + DCB_ATTR_CEE_APP_TABLE, + DCB_ATTR_CEE_FEAT, + __DCB_ATTR_CEE_MAX +}; +#define DCB_ATTR_CEE_MAX (__DCB_ATTR_CEE_MAX - 1) + +enum peer_app_attr { + DCB_ATTR_CEE_PEER_APP_UNSPEC, + DCB_ATTR_CEE_PEER_APP_INFO, + DCB_ATTR_CEE_PEER_APP, + __DCB_ATTR_CEE_PEER_APP_MAX +}; +#define DCB_ATTR_CEE_PEER_APP_MAX (__DCB_ATTR_CEE_PEER_APP_MAX - 1) + +enum cee_attrs_app { + DCB_ATTR_CEE_APP_UNSPEC, + DCB_ATTR_CEE_APP, + __DCB_ATTR_CEE_APP_MAX +}; +#define DCB_ATTR_CEE_APP_MAX (__DCB_ATTR_CEE_APP_MAX - 1) + +/** + * enum dcbnl_pfc_attrs - DCB Priority Flow Control user priority nested attrs + * + * @DCB_PFC_UP_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_PFC_UP_ATTR_0: Priority Flow Control value for User Priority 0 (NLA_U8) + * @DCB_PFC_UP_ATTR_1: Priority Flow Control value for User Priority 1 (NLA_U8) + * @DCB_PFC_UP_ATTR_2: Priority Flow Control value for User Priority 2 (NLA_U8) + * @DCB_PFC_UP_ATTR_3: Priority Flow Control value for User Priority 3 (NLA_U8) + * @DCB_PFC_UP_ATTR_4: Priority Flow Control value for User Priority 4 (NLA_U8) + * @DCB_PFC_UP_ATTR_5: Priority Flow Control value for User Priority 5 (NLA_U8) + * @DCB_PFC_UP_ATTR_6: Priority Flow Control value for User Priority 6 (NLA_U8) + * @DCB_PFC_UP_ATTR_7: Priority Flow Control value for User Priority 7 (NLA_U8) + * @DCB_PFC_UP_ATTR_MAX: highest attribute number currently defined + * @DCB_PFC_UP_ATTR_ALL: apply to all priority flow control attrs (NLA_FLAG) + * + */ +enum dcbnl_pfc_up_attrs { + DCB_PFC_UP_ATTR_UNDEFINED, + + DCB_PFC_UP_ATTR_0, + DCB_PFC_UP_ATTR_1, + DCB_PFC_UP_ATTR_2, + DCB_PFC_UP_ATTR_3, + DCB_PFC_UP_ATTR_4, + DCB_PFC_UP_ATTR_5, + DCB_PFC_UP_ATTR_6, + DCB_PFC_UP_ATTR_7, + DCB_PFC_UP_ATTR_ALL, + + __DCB_PFC_UP_ATTR_ENUM_MAX, + DCB_PFC_UP_ATTR_MAX = __DCB_PFC_UP_ATTR_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_pg_attrs - DCB Priority Group attributes + * + * @DCB_PG_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_PG_ATTR_TC_0: Priority Group Traffic Class 0 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_1: Priority Group Traffic Class 1 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_2: Priority Group Traffic Class 2 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_3: Priority Group Traffic Class 3 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_4: Priority Group Traffic Class 4 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_5: Priority Group Traffic Class 5 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_6: Priority Group Traffic Class 6 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_7: Priority Group Traffic Class 7 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_MAX: highest attribute number currently defined + * @DCB_PG_ATTR_TC_ALL: apply to all traffic classes (NLA_NESTED) + * @DCB_PG_ATTR_BW_ID_0: Percent of link bandwidth for Priority Group 0 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_1: Percent of link bandwidth for Priority Group 1 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_2: Percent of link bandwidth for Priority Group 2 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_3: Percent of link bandwidth for Priority Group 3 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_4: Percent of link bandwidth for Priority Group 4 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_5: Percent of link bandwidth for Priority Group 5 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_6: Percent of link bandwidth for Priority Group 6 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_7: Percent of link bandwidth for Priority Group 7 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_MAX: highest attribute number currently defined + * @DCB_PG_ATTR_BW_ID_ALL: apply to all priority groups (NLA_FLAG) + * + */ +enum dcbnl_pg_attrs { + DCB_PG_ATTR_UNDEFINED, + + DCB_PG_ATTR_TC_0, + DCB_PG_ATTR_TC_1, + DCB_PG_ATTR_TC_2, + DCB_PG_ATTR_TC_3, + DCB_PG_ATTR_TC_4, + DCB_PG_ATTR_TC_5, + DCB_PG_ATTR_TC_6, + DCB_PG_ATTR_TC_7, + DCB_PG_ATTR_TC_MAX, + DCB_PG_ATTR_TC_ALL, + + DCB_PG_ATTR_BW_ID_0, + DCB_PG_ATTR_BW_ID_1, + DCB_PG_ATTR_BW_ID_2, + DCB_PG_ATTR_BW_ID_3, + DCB_PG_ATTR_BW_ID_4, + DCB_PG_ATTR_BW_ID_5, + DCB_PG_ATTR_BW_ID_6, + DCB_PG_ATTR_BW_ID_7, + DCB_PG_ATTR_BW_ID_MAX, + DCB_PG_ATTR_BW_ID_ALL, + + __DCB_PG_ATTR_ENUM_MAX, + DCB_PG_ATTR_MAX = __DCB_PG_ATTR_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_tc_attrs - DCB Traffic Class attributes + * + * @DCB_TC_ATTR_PARAM_UNDEFINED: unspecified attribute to catch errors + * @DCB_TC_ATTR_PARAM_PGID: (NLA_U8) Priority group the traffic class belongs to + * Valid values are: 0-7 + * @DCB_TC_ATTR_PARAM_UP_MAPPING: (NLA_U8) Traffic class to user priority map + * Some devices may not support changing the + * user priority map of a TC. + * @DCB_TC_ATTR_PARAM_STRICT_PRIO: (NLA_U8) Strict priority setting + * 0 - none + * 1 - group strict + * 2 - link strict + * @DCB_TC_ATTR_PARAM_BW_PCT: optional - (NLA_U8) If supported by the device and + * not configured to use link strict priority, + * this is the percentage of bandwidth of the + * priority group this traffic class belongs to + * @DCB_TC_ATTR_PARAM_ALL: (NLA_FLAG) all traffic class parameters + * + */ +enum dcbnl_tc_attrs { + DCB_TC_ATTR_PARAM_UNDEFINED, + + DCB_TC_ATTR_PARAM_PGID, + DCB_TC_ATTR_PARAM_UP_MAPPING, + DCB_TC_ATTR_PARAM_STRICT_PRIO, + DCB_TC_ATTR_PARAM_BW_PCT, + DCB_TC_ATTR_PARAM_ALL, + + __DCB_TC_ATTR_PARAM_ENUM_MAX, + DCB_TC_ATTR_PARAM_MAX = __DCB_TC_ATTR_PARAM_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_cap_attrs - DCB Capability attributes + * + * @DCB_CAP_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_CAP_ATTR_ALL: (NLA_FLAG) all capability parameters + * @DCB_CAP_ATTR_PG: (NLA_U8) device supports Priority Groups + * @DCB_CAP_ATTR_PFC: (NLA_U8) device supports Priority Flow Control + * @DCB_CAP_ATTR_UP2TC: (NLA_U8) device supports user priority to + * traffic class mapping + * @DCB_CAP_ATTR_PG_TCS: (NLA_U8) bitmap where each bit represents a + * number of traffic classes the device + * can be configured to use for Priority Groups + * @DCB_CAP_ATTR_PFC_TCS: (NLA_U8) bitmap where each bit represents a + * number of traffic classes the device can be + * configured to use for Priority Flow Control + * @DCB_CAP_ATTR_GSP: (NLA_U8) device supports group strict priority + * @DCB_CAP_ATTR_BCN: (NLA_U8) device supports Backwards Congestion + * Notification + * @DCB_CAP_ATTR_DCBX: (NLA_U8) device supports DCBX engine + * + */ +enum dcbnl_cap_attrs { + DCB_CAP_ATTR_UNDEFINED, + DCB_CAP_ATTR_ALL, + DCB_CAP_ATTR_PG, + DCB_CAP_ATTR_PFC, + DCB_CAP_ATTR_UP2TC, + DCB_CAP_ATTR_PG_TCS, + DCB_CAP_ATTR_PFC_TCS, + DCB_CAP_ATTR_GSP, + DCB_CAP_ATTR_BCN, + DCB_CAP_ATTR_DCBX, + + __DCB_CAP_ATTR_ENUM_MAX, + DCB_CAP_ATTR_MAX = __DCB_CAP_ATTR_ENUM_MAX - 1, +}; + +/** + * DCBX capability flags + * + * @DCB_CAP_DCBX_HOST: DCBX negotiation is performed by the host LLDP agent. + * 'set' routines are used to configure the device with + * the negotiated parameters + * + * @DCB_CAP_DCBX_LLD_MANAGED: DCBX negotiation is not performed in the host but + * by another entity + * 'get' routines are used to retrieve the + * negotiated parameters + * 'set' routines can be used to set the initial + * negotiation configuration + * + * @DCB_CAP_DCBX_VER_CEE: for a non-host DCBX engine, indicates the engine + * supports the CEE protocol flavor + * + * @DCB_CAP_DCBX_VER_IEEE: for a non-host DCBX engine, indicates the engine + * supports the IEEE protocol flavor + * + * @DCB_CAP_DCBX_STATIC: for a non-host DCBX engine, indicates the engine + * supports static configuration (i.e no actual + * negotiation is performed negotiated parameters equal + * the initial configuration) + * + */ +#define DCB_CAP_DCBX_HOST 0x01 +#define DCB_CAP_DCBX_LLD_MANAGED 0x02 +#define DCB_CAP_DCBX_VER_CEE 0x04 +#define DCB_CAP_DCBX_VER_IEEE 0x08 +#define DCB_CAP_DCBX_STATIC 0x10 + +/** + * enum dcbnl_numtcs_attrs - number of traffic classes + * + * @DCB_NUMTCS_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_NUMTCS_ATTR_ALL: (NLA_FLAG) all traffic class attributes + * @DCB_NUMTCS_ATTR_PG: (NLA_U8) number of traffic classes used for + * priority groups + * @DCB_NUMTCS_ATTR_PFC: (NLA_U8) number of traffic classes which can + * support priority flow control + */ +enum dcbnl_numtcs_attrs { + DCB_NUMTCS_ATTR_UNDEFINED, + DCB_NUMTCS_ATTR_ALL, + DCB_NUMTCS_ATTR_PG, + DCB_NUMTCS_ATTR_PFC, + + __DCB_NUMTCS_ATTR_ENUM_MAX, + DCB_NUMTCS_ATTR_MAX = __DCB_NUMTCS_ATTR_ENUM_MAX - 1, +}; + +enum dcbnl_bcn_attrs{ + DCB_BCN_ATTR_UNDEFINED = 0, + + DCB_BCN_ATTR_RP_0, + DCB_BCN_ATTR_RP_1, + DCB_BCN_ATTR_RP_2, + DCB_BCN_ATTR_RP_3, + DCB_BCN_ATTR_RP_4, + DCB_BCN_ATTR_RP_5, + DCB_BCN_ATTR_RP_6, + DCB_BCN_ATTR_RP_7, + DCB_BCN_ATTR_RP_ALL, + + DCB_BCN_ATTR_BCNA_0, + DCB_BCN_ATTR_BCNA_1, + DCB_BCN_ATTR_ALPHA, + DCB_BCN_ATTR_BETA, + DCB_BCN_ATTR_GD, + DCB_BCN_ATTR_GI, + DCB_BCN_ATTR_TMAX, + DCB_BCN_ATTR_TD, + DCB_BCN_ATTR_RMIN, + DCB_BCN_ATTR_W, + DCB_BCN_ATTR_RD, + DCB_BCN_ATTR_RU, + DCB_BCN_ATTR_WRTT, + DCB_BCN_ATTR_RI, + DCB_BCN_ATTR_C, + DCB_BCN_ATTR_ALL, + + __DCB_BCN_ATTR_ENUM_MAX, + DCB_BCN_ATTR_MAX = __DCB_BCN_ATTR_ENUM_MAX - 1, +}; + +/** + * enum dcb_general_attr_values - general DCB attribute values + * + * @DCB_ATTR_UNDEFINED: value used to indicate an attribute is not supported + * + */ +enum dcb_general_attr_values { + DCB_ATTR_VALUE_UNDEFINED = 0xff +}; + +#define DCB_APP_IDTYPE_ETHTYPE 0x00 +#define DCB_APP_IDTYPE_PORTNUM 0x01 +enum dcbnl_app_attrs { + DCB_APP_ATTR_UNDEFINED, + + DCB_APP_ATTR_IDTYPE, + DCB_APP_ATTR_ID, + DCB_APP_ATTR_PRIORITY, + + __DCB_APP_ATTR_ENUM_MAX, + DCB_APP_ATTR_MAX = __DCB_APP_ATTR_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_featcfg_attrs - features conifiguration flags + * + * @DCB_FEATCFG_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_FEATCFG_ATTR_ALL: (NLA_FLAG) all features configuration attributes + * @DCB_FEATCFG_ATTR_PG: (NLA_U8) configuration flags for priority groups + * @DCB_FEATCFG_ATTR_PFC: (NLA_U8) configuration flags for priority + * flow control + * @DCB_FEATCFG_ATTR_APP: (NLA_U8) configuration flags for application TLV + * + */ +#define DCB_FEATCFG_ERROR 0x01 /* error in feature resolution */ +#define DCB_FEATCFG_ENABLE 0x02 /* enable feature */ +#define DCB_FEATCFG_WILLING 0x04 /* feature is willing */ +#define DCB_FEATCFG_ADVERTISE 0x08 /* advertise feature */ +enum dcbnl_featcfg_attrs { + DCB_FEATCFG_ATTR_UNDEFINED, + DCB_FEATCFG_ATTR_ALL, + DCB_FEATCFG_ATTR_PG, + DCB_FEATCFG_ATTR_PFC, + DCB_FEATCFG_ATTR_APP, + + __DCB_FEATCFG_ATTR_ENUM_MAX, + DCB_FEATCFG_ATTR_MAX = __DCB_FEATCFG_ATTR_ENUM_MAX - 1, +}; + +#endif /* __LINUX_DCBNL_H__ */ diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b7f23fa..a430775 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -13,6 +13,8 @@ #ifndef _LINUX_DEVLINK_H_ #define _LINUX_DEVLINK_H_ +#include + #define DEVLINK_GENL_NAME "devlink" #define DEVLINK_GENL_VERSION 0x1 #define DEVLINK_GENL_MCGRP_CONFIG_NAME "config" @@ -122,6 +124,8 @@ enum devlink_command { DEVLINK_CMD_TRAP_POLICER_NEW, DEVLINK_CMD_TRAP_POLICER_DEL, + DEVLINK_CMD_HEALTH_REPORTER_TEST, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -193,6 +197,13 @@ enum devlink_port_flavour { * port that faces the PCI VF. */ DEVLINK_PORT_FLAVOUR_VIRTUAL, /* Any virtual port facing the user. */ + DEVLINK_PORT_FLAVOUR_UNUSED, /* Port which exists in the switch, but + * is not used in any way. + */ + DEVLINK_PORT_FLAVOUR_PCI_SF, /* Represents eswitch port + * for the PCI SF. It is an internal + * port that faces the PCI SF. + */ }; enum devlink_param_cmode { @@ -228,6 +239,28 @@ enum { DEVLINK_ATTR_STATS_MAX = __DEVLINK_ATTR_STATS_MAX - 1 }; +/* Specify what sections of a flash component can be overwritten when + * performing an update. Overwriting of firmware binary sections is always + * implicitly assumed to be allowed. + * + * Each section must be documented in + * Documentation/networking/devlink/devlink-flash.rst + * + */ +enum { + DEVLINK_FLASH_OVERWRITE_SETTINGS_BIT, + DEVLINK_FLASH_OVERWRITE_IDENTIFIERS_BIT, + + __DEVLINK_FLASH_OVERWRITE_MAX_BIT, + DEVLINK_FLASH_OVERWRITE_MAX_BIT = __DEVLINK_FLASH_OVERWRITE_MAX_BIT - 1 +}; + +#define DEVLINK_FLASH_OVERWRITE_SETTINGS _BITUL(DEVLINK_FLASH_OVERWRITE_SETTINGS_BIT) +#define DEVLINK_FLASH_OVERWRITE_IDENTIFIERS _BITUL(DEVLINK_FLASH_OVERWRITE_IDENTIFIERS_BIT) + +#define DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS \ + (_BITUL(__DEVLINK_FLASH_OVERWRITE_MAX_BIT) - 1) + /** * enum devlink_trap_action - Packet trap action. * @DEVLINK_TRAP_ACTION_DROP: Packet is dropped by the device and a copy is not @@ -272,6 +305,29 @@ enum { DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE, }; +enum devlink_reload_action { + DEVLINK_RELOAD_ACTION_UNSPEC, + DEVLINK_RELOAD_ACTION_DRIVER_REINIT, /* Driver entities re-instantiation */ + DEVLINK_RELOAD_ACTION_FW_ACTIVATE, /* FW activate */ + + /* Add new reload actions above */ + __DEVLINK_RELOAD_ACTION_MAX, + DEVLINK_RELOAD_ACTION_MAX = __DEVLINK_RELOAD_ACTION_MAX - 1 +}; + +enum devlink_reload_limit { + DEVLINK_RELOAD_LIMIT_UNSPEC, /* unspecified, no constraints */ + DEVLINK_RELOAD_LIMIT_NO_RESET, /* No reset allowed, no down time allowed, + * no link flap and no configuration is lost. + */ + + /* Add new reload limit above */ + __DEVLINK_RELOAD_LIMIT_MAX, + DEVLINK_RELOAD_LIMIT_MAX = __DEVLINK_RELOAD_LIMIT_MAX - 1 +}; + +#define DEVLINK_RELOAD_LIMITS_VALID_MASK (_BITUL(__DEVLINK_RELOAD_LIMIT_MAX) - 1) + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -458,6 +514,26 @@ enum devlink_attr { DEVLINK_ATTR_PORT_LANES, /* u32 */ DEVLINK_ATTR_PORT_SPLITTABLE, /* u8 */ + DEVLINK_ATTR_PORT_EXTERNAL, /* u8 */ + DEVLINK_ATTR_PORT_CONTROLLER_NUMBER, /* u32 */ + + DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT, /* u64 */ + DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK, /* bitfield32 */ + + DEVLINK_ATTR_RELOAD_ACTION, /* u8 */ + DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, /* bitfield32 */ + DEVLINK_ATTR_RELOAD_LIMITS, /* bitfield32 */ + + DEVLINK_ATTR_DEV_STATS, /* nested */ + DEVLINK_ATTR_RELOAD_STATS, /* nested */ + DEVLINK_ATTR_RELOAD_STATS_ENTRY, /* nested */ + DEVLINK_ATTR_RELOAD_STATS_LIMIT, /* u8 */ + DEVLINK_ATTR_RELOAD_STATS_VALUE, /* u32 */ + DEVLINK_ATTR_REMOTE_RELOAD_STATS, /* nested */ + DEVLINK_ATTR_RELOAD_ACTION_INFO, /* nested */ + DEVLINK_ATTR_RELOAD_ACTION_STATS, /* nested */ + + DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, @@ -507,9 +583,29 @@ enum devlink_resource_unit { enum devlink_port_function_attr { DEVLINK_PORT_FUNCTION_ATTR_UNSPEC, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, /* binary */ + DEVLINK_PORT_FN_ATTR_STATE, /* u8 */ + DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */ __DEVLINK_PORT_FUNCTION_ATTR_MAX, DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1 }; +enum devlink_port_fn_state { + DEVLINK_PORT_FN_STATE_INACTIVE, + DEVLINK_PORT_FN_STATE_ACTIVE, +}; + +/** + * enum devlink_port_fn_opstate - indicates operational state of the function + * @DEVLINK_PORT_FN_OPSTATE_ATTACHED: Driver is attached to the function. + * For graceful tear down of the function, after inactivation of the + * function, user should wait for operational state to turn DETACHED. + * @DEVLINK_PORT_FN_OPSTATE_DETACHED: Driver is detached from the function. + * It is safe to delete the port. + */ +enum devlink_port_fn_opstate { + DEVLINK_PORT_FN_OPSTATE_DETACHED, + DEVLINK_PORT_FN_OPSTATE_ATTACHED, +}; + #endif /* _LINUX_DEVLINK_H_ */ diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index 7c6c390..9fa720e 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -64,6 +64,8 @@ enum { CTRL_ATTR_OPS, CTRL_ATTR_MCAST_GROUPS, CTRL_ATTR_POLICY, + CTRL_ATTR_OP_POLICY, + CTRL_ATTR_OP, __CTRL_ATTR_MAX, }; @@ -85,6 +87,15 @@ enum { __CTRL_ATTR_MCAST_GRP_MAX, }; +enum { + CTRL_ATTR_POLICY_UNSPEC, + CTRL_ATTR_POLICY_DO, + CTRL_ATTR_POLICY_DUMP, + + __CTRL_ATTR_POLICY_DUMP_MAX, + CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1 +}; + #define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1) diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h index 70b283f..fcea1c5 100644 --- a/include/uapi/linux/icmpv6.h +++ b/include/uapi/linux/icmpv6.h @@ -138,6 +138,7 @@ struct icmp6hdr { #define ICMPV6_HDR_FIELD 0 #define ICMPV6_UNK_NEXTHDR 1 #define ICMPV6_UNK_OPTION 2 +#define ICMPV6_HDR_INCOMP 3 /* * constants for (set|get)sockopt diff --git a/include/uapi/linux/if_alg.h b/include/uapi/linux/if_alg.h index bc2bcde..dc52a11 100644 --- a/include/uapi/linux/if_alg.h +++ b/include/uapi/linux/if_alg.h @@ -24,6 +24,22 @@ struct sockaddr_alg { __u8 salg_name[64]; }; +/* + * Linux v4.12 and later removed the 64-byte limit on salg_name[]; it's now an + * arbitrary-length field. We had to keep the original struct above for source + * compatibility with existing userspace programs, though. Use the new struct + * below if support for very long algorithm names is needed. To do this, + * allocate 'sizeof(struct sockaddr_alg_new) + strlen(algname) + 1' bytes, and + * copy algname (including the null terminator) into salg_name. + */ +struct sockaddr_alg_new { + __u16 salg_family; + __u8 salg_type[14]; + __u32 salg_feat; + __u32 salg_mask; + __u8 salg_name[]; +}; + struct af_alg_iv { __u32 ivlen; __u8 iv[0]; @@ -35,6 +51,7 @@ struct af_alg_iv { #define ALG_SET_OP 3 #define ALG_SET_AEAD_ASSOCLEN 4 #define ALG_SET_AEAD_AUTHSIZE 5 +#define ALG_SET_DRBG_ENTROPY 6 /* Operations */ #define ALG_OP_DECRYPT 0 diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h index 45f3750..e8eb4ad 100644 --- a/include/uapi/linux/if_bonding.h +++ b/include/uapi/linux/if_bonding.h @@ -94,6 +94,7 @@ #define BOND_XMIT_POLICY_LAYER23 2 /* layer 2+3 (IP ^ MAC) */ #define BOND_XMIT_POLICY_ENCAP23 3 /* encapsulated layer 2+3 */ #define BOND_XMIT_POLICY_ENCAP34 4 /* encapsulated layer 3+4 */ +#define BOND_XMIT_POLICY_VLAN_SRCMAC 5 /* vlan + source MAC */ /* 802.3ad port state definitions (43.4.2.2 in the 802.3ad standard) */ #define LACP_STATE_LACP_ACTIVITY 0x1 diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 0490db9..fee6e45 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -121,6 +121,7 @@ enum { IFLA_BRIDGE_VLAN_INFO, IFLA_BRIDGE_VLAN_TUNNEL_INFO, IFLA_BRIDGE_MRP, + IFLA_BRIDGE_CFM, __IFLA_BRIDGE_MAX, }; #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) @@ -328,6 +329,130 @@ struct br_mrp_start_in_test { __u16 in_id; }; +enum { + IFLA_BRIDGE_CFM_UNSPEC, + IFLA_BRIDGE_CFM_MEP_CREATE, + IFLA_BRIDGE_CFM_MEP_DELETE, + IFLA_BRIDGE_CFM_MEP_CONFIG, + IFLA_BRIDGE_CFM_CC_CONFIG, + IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD, + IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE, + IFLA_BRIDGE_CFM_CC_RDI, + IFLA_BRIDGE_CFM_CC_CCM_TX, + IFLA_BRIDGE_CFM_MEP_CREATE_INFO, + IFLA_BRIDGE_CFM_MEP_CONFIG_INFO, + IFLA_BRIDGE_CFM_CC_CONFIG_INFO, + IFLA_BRIDGE_CFM_CC_RDI_INFO, + IFLA_BRIDGE_CFM_CC_CCM_TX_INFO, + IFLA_BRIDGE_CFM_CC_PEER_MEP_INFO, + IFLA_BRIDGE_CFM_MEP_STATUS_INFO, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO, + __IFLA_BRIDGE_CFM_MAX, +}; + +#define IFLA_BRIDGE_CFM_MAX (__IFLA_BRIDGE_CFM_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_MEP_CREATE_UNSPEC, + IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE, + IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN, + IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION, + IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX, + __IFLA_BRIDGE_CFM_MEP_CREATE_MAX, +}; + +#define IFLA_BRIDGE_CFM_MEP_CREATE_MAX (__IFLA_BRIDGE_CFM_MEP_CREATE_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_MEP_DELETE_UNSPEC, + IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE, + __IFLA_BRIDGE_CFM_MEP_DELETE_MAX, +}; + +#define IFLA_BRIDGE_CFM_MEP_DELETE_MAX (__IFLA_BRIDGE_CFM_MEP_DELETE_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_MEP_CONFIG_UNSPEC, + IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE, + IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC, + IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL, + IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID, + __IFLA_BRIDGE_CFM_MEP_CONFIG_MAX, +}; + +#define IFLA_BRIDGE_CFM_MEP_CONFIG_MAX (__IFLA_BRIDGE_CFM_MEP_CONFIG_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_CC_CONFIG_UNSPEC, + IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE, + IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE, + IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL, + IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID, + __IFLA_BRIDGE_CFM_CC_CONFIG_MAX, +}; + +#define IFLA_BRIDGE_CFM_CC_CONFIG_MAX (__IFLA_BRIDGE_CFM_CC_CONFIG_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_CC_PEER_MEP_UNSPEC, + IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE, + IFLA_BRIDGE_CFM_CC_PEER_MEPID, + __IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX, +}; + +#define IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX (__IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_CC_RDI_UNSPEC, + IFLA_BRIDGE_CFM_CC_RDI_INSTANCE, + IFLA_BRIDGE_CFM_CC_RDI_RDI, + __IFLA_BRIDGE_CFM_CC_RDI_MAX, +}; + +#define IFLA_BRIDGE_CFM_CC_RDI_MAX (__IFLA_BRIDGE_CFM_CC_RDI_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_CC_CCM_TX_UNSPEC, + IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE, + IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC, + IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE, + IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD, + IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV, + IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE, + IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV, + IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE, + __IFLA_BRIDGE_CFM_CC_CCM_TX_MAX, +}; + +#define IFLA_BRIDGE_CFM_CC_CCM_TX_MAX (__IFLA_BRIDGE_CFM_CC_CCM_TX_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_MEP_STATUS_UNSPEC, + IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE, + IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN, + IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN, + IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN, + __IFLA_BRIDGE_CFM_MEP_STATUS_MAX, +}; + +#define IFLA_BRIDGE_CFM_MEP_STATUS_MAX (__IFLA_BRIDGE_CFM_MEP_STATUS_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_CC_PEER_STATUS_UNSPEC, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN, + __IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX, +}; + +#define IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX (__IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX - 1) + struct bridge_stp_xstats { __u64 transition_blk; __u64 transition_fwd; @@ -455,10 +580,33 @@ enum { enum { MDBA_MDB_EATTR_UNSPEC, MDBA_MDB_EATTR_TIMER, + MDBA_MDB_EATTR_SRC_LIST, + MDBA_MDB_EATTR_GROUP_MODE, + MDBA_MDB_EATTR_SOURCE, + MDBA_MDB_EATTR_RTPROT, __MDBA_MDB_EATTR_MAX }; #define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1) +/* per mdb entry source */ +enum { + MDBA_MDB_SRCLIST_UNSPEC, + MDBA_MDB_SRCLIST_ENTRY, + __MDBA_MDB_SRCLIST_MAX +}; +#define MDBA_MDB_SRCLIST_MAX (__MDBA_MDB_SRCLIST_MAX - 1) + +/* per mdb entry per source attributes + * these are embedded in MDBA_MDB_SRCLIST_ENTRY + */ +enum { + MDBA_MDB_SRCATTR_UNSPEC, + MDBA_MDB_SRCATTR_ADDRESS, + MDBA_MDB_SRCATTR_TIMER, + __MDBA_MDB_SRCATTR_MAX +}; +#define MDBA_MDB_SRCATTR_MAX (__MDBA_MDB_SRCATTR_MAX - 1) + /* multicast router types */ enum { MDB_RTR_TYPE_DISABLED, @@ -495,12 +643,15 @@ struct br_mdb_entry { __u8 state; #define MDB_FLAGS_OFFLOAD (1 << 0) #define MDB_FLAGS_FAST_LEAVE (1 << 1) +#define MDB_FLAGS_STAR_EXCL (1 << 2) +#define MDB_FLAGS_BLOCKED (1 << 3) __u8 flags; __u16 vid; struct { union { __be32 ip4; struct in6_addr ip6; + unsigned char mac_addr[ETH_ALEN]; } u; __be16 proto; } addr; @@ -509,10 +660,23 @@ struct br_mdb_entry { enum { MDBA_SET_ENTRY_UNSPEC, MDBA_SET_ENTRY, + MDBA_SET_ENTRY_ATTRS, __MDBA_SET_ENTRY_MAX, }; #define MDBA_SET_ENTRY_MAX (__MDBA_SET_ENTRY_MAX - 1) +/* [MDBA_SET_ENTRY_ATTRS] = { + * [MDBE_ATTR_xxx] + * ... + * } + */ +enum { + MDBE_ATTR_UNSPEC, + MDBE_ATTR_SOURCE, + __MDBE_ATTR_MAX, +}; +#define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1) + /* Embedded inside LINK_XSTATS_TYPE_BRIDGE */ enum { BRIDGE_XSTATS_UNSPEC, diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 1a0c7df..8e6f2c3 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -99,6 +99,7 @@ #define ETH_P_1588 0x88F7 /* IEEE 1588 Timesync */ #define ETH_P_NCSI 0x88F8 /* NCSI protocol */ #define ETH_P_PRP 0x88FB /* IEC 62439-3 PRP/HSRv0 */ +#define ETH_P_CFM 0x8902 /* Connectivity Fault Management */ #define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */ #define ETH_P_IBOE 0x8915 /* Infiniband over Ethernet */ #define ETH_P_TDLS 0x890D /* TDLS */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b1bdcfb..5019337 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -7,24 +7,23 @@ /* This struct should be in sync with struct rtnl_link_stats64 */ struct rtnl_link_stats { - __u32 rx_packets; /* total packets received */ - __u32 tx_packets; /* total packets transmitted */ - __u32 rx_bytes; /* total bytes received */ - __u32 tx_bytes; /* total bytes transmitted */ - __u32 rx_errors; /* bad packets received */ - __u32 tx_errors; /* packet transmit problems */ - __u32 rx_dropped; /* no space in linux buffers */ - __u32 tx_dropped; /* no space available in linux */ - __u32 multicast; /* multicast packets received */ + __u32 rx_packets; + __u32 tx_packets; + __u32 rx_bytes; + __u32 tx_bytes; + __u32 rx_errors; + __u32 tx_errors; + __u32 rx_dropped; + __u32 tx_dropped; + __u32 multicast; __u32 collisions; - /* detailed rx_errors: */ __u32 rx_length_errors; - __u32 rx_over_errors; /* receiver ring buff overflow */ - __u32 rx_crc_errors; /* recved pkt with crc error */ - __u32 rx_frame_errors; /* recv'd frame alignment error */ - __u32 rx_fifo_errors; /* recv'r fifo overrun */ - __u32 rx_missed_errors; /* receiver missed packet */ + __u32 rx_over_errors; + __u32 rx_crc_errors; + __u32 rx_frame_errors; + __u32 rx_fifo_errors; + __u32 rx_missed_errors; /* detailed tx_errors */ __u32 tx_aborted_errors; @@ -37,29 +36,201 @@ struct rtnl_link_stats { __u32 rx_compressed; __u32 tx_compressed; - __u32 rx_nohandler; /* dropped, no handler found */ + __u32 rx_nohandler; }; -/* The main device statistics structure */ +/** + * struct rtnl_link_stats64 - The main device statistics structure. + * + * @rx_packets: Number of good packets received by the interface. + * For hardware interfaces counts all good packets received from the device + * by the host, including packets which host had to drop at various stages + * of processing (even in the driver). + * + * @tx_packets: Number of packets successfully transmitted. + * For hardware interfaces counts packets which host was able to successfully + * hand over to the device, which does not necessarily mean that packets + * had been successfully transmitted out of the device, only that device + * acknowledged it copied them out of host memory. + * + * @rx_bytes: Number of good received bytes, corresponding to @rx_packets. + * + * For IEEE 802.3 devices should count the length of Ethernet Frames + * excluding the FCS. + * + * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets. + * + * For IEEE 802.3 devices should count the length of Ethernet Frames + * excluding the FCS. + * + * @rx_errors: Total number of bad packets received on this network device. + * This counter must include events counted by @rx_length_errors, + * @rx_crc_errors, @rx_frame_errors and other errors not otherwise + * counted. + * + * @tx_errors: Total number of transmit problems. + * This counter must include events counter by @tx_aborted_errors, + * @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors, + * @tx_window_errors and other errors not otherwise counted. + * + * @rx_dropped: Number of packets received but not processed, + * e.g. due to lack of resources or unsupported protocol. + * For hardware interfaces this counter may include packets discarded + * due to L2 address filtering but should not include packets dropped + * by the device due to buffer exhaustion which are counted separately in + * @rx_missed_errors (since procfs folds those two counters together). + * + * @tx_dropped: Number of packets dropped on their way to transmission, + * e.g. due to lack of resources. + * + * @multicast: Multicast packets received. + * For hardware interfaces this statistic is commonly calculated + * at the device level (unlike @rx_packets) and therefore may include + * packets which did not reach the host. + * + * For IEEE 802.3 devices this counter may be equivalent to: + * + * - 30.3.1.1.21 aMulticastFramesReceivedOK + * + * @collisions: Number of collisions during packet transmissions. + * + * @rx_length_errors: Number of packets dropped due to invalid length. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter should be equivalent to a sum + * of the following attributes: + * + * - 30.3.1.1.23 aInRangeLengthErrors + * - 30.3.1.1.24 aOutOfRangeLengthField + * - 30.3.1.1.25 aFrameTooLongErrors + * + * @rx_over_errors: Receiver FIFO overflow event counter. + * + * Historically the count of overflow events. Such events may be + * reported in the receive descriptors or via interrupts, and may + * not correspond one-to-one with dropped packets. + * + * The recommended interpretation for high speed interfaces is - + * number of packets dropped because they did not fit into buffers + * provided by the host, e.g. packets larger than MTU or next buffer + * in the ring was not available for a scatter transfer. + * + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * This statistics was historically used interchangeably with + * @rx_fifo_errors. + * + * This statistic corresponds to hardware events and is not commonly used + * on software devices. + * + * @rx_crc_errors: Number of packets received with a CRC error. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.6 aFrameCheckSequenceErrors + * + * @rx_frame_errors: Receiver frame alignment errors. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter should be equivalent to: + * + * - 30.3.1.1.7 aAlignmentErrors + * + * @rx_fifo_errors: Receiver FIFO error counter. + * + * Historically the count of overflow events. Those events may be + * reported in the receive descriptors or via interrupts, and may + * not correspond one-to-one with dropped packets. + * + * This statistics was used interchangeably with @rx_over_errors. + * Not recommended for use in drivers for high speed interfaces. + * + * This statistic is used on software devices, e.g. to count software + * packet queue overflow (can) or sequencing errors (GRE). + * + * @rx_missed_errors: Count of packets missed by the host. + * Folded into the "drop" counter in `/proc/net/dev`. + * + * Counts number of packets dropped by the device due to lack + * of buffer space. This usually indicates that the host interface + * is slower than the network interface, or host is not keeping up + * with the receive packet rate. + * + * This statistic corresponds to hardware events and is not used + * on software devices. + * + * @tx_aborted_errors: + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * For IEEE 802.3 devices capable of half-duplex operation this counter + * must be equivalent to: + * + * - 30.3.1.1.11 aFramesAbortedDueToXSColls + * + * High speed interfaces may use this counter as a general device + * discard counter. + * + * @tx_carrier_errors: Number of frame transmission errors due to loss + * of carrier during transmission. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.13 aCarrierSenseErrors + * + * @tx_fifo_errors: Number of frame transmission errors due to device + * FIFO underrun / underflow. This condition occurs when the device + * begins transmission of a frame but is unable to deliver the + * entire frame to the transmitter in time for transmission. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for + * old half-duplex Ethernet. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices possibly equivalent to: + * + * - 30.3.2.1.4 aSQETestErrors + * + * @tx_window_errors: Number of frame transmission errors due + * to late collisions (for Ethernet - after the first 64B of transmission). + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.10 aLateCollisions + * + * @rx_compressed: Number of correctly received compressed packets. + * This counters is only meaningful for interfaces which support + * packet compression (e.g. CSLIP, PPP). + * + * @tx_compressed: Number of transmitted compressed packets. + * This counters is only meaningful for interfaces which support + * packet compression (e.g. CSLIP, PPP). + * + * @rx_nohandler: Number of packets received on the interface + * but dropped by the networking stack because the device is + * not designated to receive packets (e.g. backup link in a bond). + */ struct rtnl_link_stats64 { - __u64 rx_packets; /* total packets received */ - __u64 tx_packets; /* total packets transmitted */ - __u64 rx_bytes; /* total bytes received */ - __u64 tx_bytes; /* total bytes transmitted */ - __u64 rx_errors; /* bad packets received */ - __u64 tx_errors; /* packet transmit problems */ - __u64 rx_dropped; /* no space in linux buffers */ - __u64 tx_dropped; /* no space available in linux */ - __u64 multicast; /* multicast packets received */ + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 rx_errors; + __u64 tx_errors; + __u64 rx_dropped; + __u64 tx_dropped; + __u64 multicast; __u64 collisions; /* detailed rx_errors: */ __u64 rx_length_errors; - __u64 rx_over_errors; /* receiver ring buff overflow */ - __u64 rx_crc_errors; /* recved pkt with crc error */ - __u64 rx_frame_errors; /* recv'd frame alignment error */ - __u64 rx_fifo_errors; /* recv'r fifo overrun */ - __u64 rx_missed_errors; /* receiver missed packet */ + __u64 rx_over_errors; + __u64 rx_crc_errors; + __u64 rx_frame_errors; + __u64 rx_fifo_errors; + __u64 rx_missed_errors; /* detailed tx_errors */ __u64 tx_aborted_errors; @@ -71,8 +242,7 @@ struct rtnl_link_stats64 { /* for cslip etc */ __u64 rx_compressed; __u64 tx_compressed; - - __u64 rx_nohandler; /* dropped, no handler found */ + __u64 rx_nohandler; }; /* The struct should be in sync with struct ifmap */ @@ -353,6 +523,8 @@ enum { IFLA_BRPORT_BACKUP_PORT, IFLA_BRPORT_MRP_RING_OPEN, IFLA_BRPORT_MRP_IN_OPEN, + IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, + IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -417,6 +589,8 @@ enum { IFLA_MACVLAN_MACADDR, IFLA_MACVLAN_MACADDR_DATA, IFLA_MACVLAN_MACADDR_COUNT, + IFLA_MACVLAN_BC_QUEUE_LEN, + IFLA_MACVLAN_BC_QUEUE_LEN_USED, __IFLA_MACVLAN_MAX, }; diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 3d884d6..c07caf7 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -2,6 +2,7 @@ #ifndef __LINUX_IF_PACKET_H #define __LINUX_IF_PACKET_H +#include #include struct sockaddr_pkt { @@ -296,6 +297,17 @@ struct packet_mreq { unsigned char mr_address[8]; }; +struct fanout_args { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 id; + __u16 type_flags; +#else + __u16 type_flags; + __u16 id; +#endif + __u32 max_num_members; +}; + #define PACKET_MR_MULTICAST 0 #define PACKET_MR_PROMISC 1 #define PACKET_MR_ALLMULTI 2 diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index cd83b4f..ed1c315 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -160,6 +160,7 @@ enum { INET_DIAG_ULP_INFO, INET_DIAG_SK_BPF_STORAGES, INET_DIAG_CGROUP_ID, + INET_DIAG_SOCKOPT, __INET_DIAG_MAX, }; @@ -183,6 +184,23 @@ struct inet_diag_meminfo { __u32 idiag_tmem; }; +/* INET_DIAG_SOCKOPT */ + +struct inet_diag_sockopt { + __u8 recverr:1, + is_icsk:1, + freebind:1, + hdrincl:1, + mc_loop:1, + transparent:1, + mc_all:1, + nodefrag:1; + __u8 bind_address_no_port:1, + recverr_rfc4884:1, + defer_connect:1, + unused:5; +}; + /* INET_DIAG_VEGASINFO */ struct tcpvegas_info { diff --git a/include/uapi/linux/kernel.h b/include/uapi/linux/kernel.h index d99ffa1..5413a8c 100644 --- a/include/uapi/linux/kernel.h +++ b/include/uapi/linux/kernel.h @@ -3,13 +3,6 @@ #define _LINUX_KERNEL_H #include - -/* - * 'kernel.h' contains some often-used function prototypes etc - */ -#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) -#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) - -#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +#include #endif /* _LINUX_KERNEL_H */ diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 131c3a2..0480d2d 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -108,7 +108,7 @@ enum { L2TP_ATTR_VLAN_ID, /* u16 (not used) */ L2TP_ATTR_COOKIE, /* 0, 4 or 8 bytes */ L2TP_ATTR_PEER_COOKIE, /* 0, 4 or 8 bytes */ - L2TP_ATTR_DEBUG, /* u32, enum l2tp_debug_flags */ + L2TP_ATTR_DEBUG, /* u32, enum l2tp_debug_flags (not used) */ L2TP_ATTR_RECV_SEQ, /* u8 */ L2TP_ATTR_SEND_SEQ, /* u8 */ L2TP_ATTR_LNS_MODE, /* u8 */ @@ -144,6 +144,8 @@ enum { L2TP_ATTR_RX_OOS_PACKETS, /* u64 */ L2TP_ATTR_RX_ERRORS, /* u64 */ L2TP_ATTR_STATS_PAD, + L2TP_ATTR_RX_COOKIE_DISCARDS, /* u64 */ + L2TP_ATTR_RX_INVALID, /* u64 */ __L2TP_ATTR_STATS_MAX, }; @@ -177,7 +179,9 @@ enum l2tp_seqmode { }; /** - * enum l2tp_debug_flags - debug message categories for L2TP tunnels/sessions + * enum l2tp_debug_flags - debug message categories for L2TP tunnels/sessions. + * + * Unused. * * @L2TP_MSG_DEBUG: verbose debug (if compiled in) * @L2TP_MSG_CONTROL: userspace - kernel interface diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 3218123..c3e4016 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -36,6 +36,7 @@ enum { /* netlink interface */ #define MPTCP_PM_NAME "mptcp_pm" #define MPTCP_PM_CMD_GRP_NAME "mptcp_pm_cmds" +#define MPTCP_PM_EV_GRP_NAME "mptcp_pm_events" #define MPTCP_PM_VER 0x1 /* @@ -82,6 +83,7 @@ enum { MPTCP_PM_CMD_FLUSH_ADDRS, MPTCP_PM_CMD_SET_LIMITS, MPTCP_PM_CMD_GET_LIMITS, + MPTCP_PM_CMD_SET_FLAGS, __MPTCP_PM_CMD_AFTER_LAST }; @@ -101,6 +103,81 @@ struct mptcp_info { __u64 mptcpi_write_seq; __u64 mptcpi_snd_una; __u64 mptcpi_rcv_nxt; + __u8 mptcpi_local_addr_used; + __u8 mptcpi_local_addr_max; }; +/* + * MPTCP_EVENT_CREATED: token, family, saddr4 | saddr6, daddr4 | daddr6, + * sport, dport + * A new MPTCP connection has been created. It is the good time to allocate + * memory and send ADD_ADDR if needed. Depending on the traffic-patterns + * it can take a long time until the MPTCP_EVENT_ESTABLISHED is sent. + * + * MPTCP_EVENT_ESTABLISHED: token, family, saddr4 | saddr6, daddr4 | daddr6, + * sport, dport + * A MPTCP connection is established (can start new subflows). + * + * MPTCP_EVENT_CLOSED: token + * A MPTCP connection has stopped. + * + * MPTCP_EVENT_ANNOUNCED: token, rem_id, family, daddr4 | daddr6 [, dport] + * A new address has been announced by the peer. + * + * MPTCP_EVENT_REMOVED: token, rem_id + * An address has been lost by the peer. + * + * MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6, + * daddr4 | daddr6, sport, dport, backup, + * if_idx [, error] + * A new subflow has been established. 'error' should not be set. + * + * MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6, + * sport, dport, backup, if_idx [, error] + * A subflow has been closed. An error (copy of sk_err) could be set if an + * error has been detected for this subflow. + * + * MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6, + * sport, dport, backup, if_idx [, error] + * The priority of a subflow has changed. 'error' should not be set. + */ +enum mptcp_event_type { + MPTCP_EVENT_UNSPEC = 0, + MPTCP_EVENT_CREATED = 1, + MPTCP_EVENT_ESTABLISHED = 2, + MPTCP_EVENT_CLOSED = 3, + + MPTCP_EVENT_ANNOUNCED = 6, + MPTCP_EVENT_REMOVED = 7, + + MPTCP_EVENT_SUB_ESTABLISHED = 10, + MPTCP_EVENT_SUB_CLOSED = 11, + + MPTCP_EVENT_SUB_PRIORITY = 13, +}; + +enum mptcp_event_attr { + MPTCP_ATTR_UNSPEC = 0, + + MPTCP_ATTR_TOKEN, /* u32 */ + MPTCP_ATTR_FAMILY, /* u16 */ + MPTCP_ATTR_LOC_ID, /* u8 */ + MPTCP_ATTR_REM_ID, /* u8 */ + MPTCP_ATTR_SADDR4, /* be32 */ + MPTCP_ATTR_SADDR6, /* struct in6_addr */ + MPTCP_ATTR_DADDR4, /* be32 */ + MPTCP_ATTR_DADDR6, /* struct in6_addr */ + MPTCP_ATTR_SPORT, /* be16 */ + MPTCP_ATTR_DPORT, /* be16 */ + MPTCP_ATTR_BACKUP, /* u8 */ + MPTCP_ATTR_ERROR, /* u8 */ + MPTCP_ATTR_FLAGS, /* u16 */ + MPTCP_ATTR_TIMEOUT, /* u32 */ + MPTCP_ATTR_IF_IDX, /* s32 */ + + __MPTCP_ATTR_AFTER_LAST +}; + +#define MPTCP_ATTR_MAX (__MPTCP_ATTR_AFTER_LAST - 1) + #endif /* _MPTCP_H */ diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index 899be98..566d8e4 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -43,7 +43,8 @@ enum nf_inet_hooks { NF_INET_FORWARD, NF_INET_LOCAL_OUT, NF_INET_POST_ROUTING, - NF_INET_NUMHOOKS + NF_INET_NUMHOOKS, + NF_INET_INGRESS = NF_INET_NUMHOOKS, }; enum nf_dev_hooks { diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 4b372f4..659c882 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -92,11 +92,11 @@ enum { /* Reserve empty slots */ IPSET_ATTR_CADT_MAX = 16, /* Create-only specific attributes */ - IPSET_ATTR_GC, + IPSET_ATTR_INITVAL, /* was unused IPSET_ATTR_GC */ IPSET_ATTR_HASHSIZE, IPSET_ATTR_MAXELEM, IPSET_ATTR_NETMASK, - IPSET_ATTR_PROBES, + IPSET_ATTR_BUCKETSIZE, /* was unused IPSET_ATTR_PROBES */ IPSET_ATTR_RESIZE, IPSET_ATTR_SIZE, /* Kernel-only */ @@ -214,6 +214,8 @@ enum ipset_cadt_flags { enum ipset_create_flags { IPSET_CREATE_FLAG_BIT_FORCEADD = 0, IPSET_CREATE_FLAG_FORCEADD = (1 << IPSET_CREATE_FLAG_BIT_FORCEADD), + IPSET_CREATE_FLAG_BIT_BUCKETSIZE = 1, + IPSET_CREATE_FLAG_BUCKETSIZE = (1 << IPSET_CREATE_FLAG_BIT_BUCKETSIZE), IPSET_CREATE_FLAG_BIT_MAX = 7, }; diff --git a/include/uapi/linux/netfilter/x_tables.h b/include/uapi/linux/netfilter/x_tables.h index ae2fd12..89ee50d 100644 --- a/include/uapi/linux/netfilter/x_tables.h +++ b/include/uapi/linux/netfilter/x_tables.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _X_TABLES_H #define _X_TABLES_H -#include +#include #include #define XT_FUNCTION_MAXNAMELEN 30 diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 695c88e..5024c54 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -2,7 +2,7 @@ #ifndef __LINUX_NETLINK_H #define __LINUX_NETLINK_H -#include +#include #include /* for __kernel_sa_family_t */ #include @@ -129,6 +129,7 @@ struct nlmsgerr { * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to * be used - in the success case - to identify a created * object or operation or similar (binary) + * @NLMSGERR_ATTR_POLICY: policy for a rejected attribute * @__NLMSGERR_ATTR_MAX: number of attributes * @NLMSGERR_ATTR_MAX: highest attribute number */ @@ -137,6 +138,7 @@ enum nlmsgerr_attrs { NLMSGERR_ATTR_MSG, NLMSGERR_ATTR_OFFS, NLMSGERR_ATTR_COOKIE, + NLMSGERR_ATTR_POLICY, __NLMSGERR_ATTR_MAX, NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 @@ -327,6 +329,7 @@ enum netlink_attribute_type { * the index, if limited inside the nesting (U32) * @NL_POLICY_TYPE_ATTR_BITFIELD32_MASK: valid mask for the * bitfield32 type (U32) + * @NL_POLICY_TYPE_ATTR_MASK: mask of valid bits for unsigned integers (U64) * @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment */ enum netlink_policy_type_attr { @@ -342,6 +345,7 @@ enum netlink_policy_type_attr { NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE, NL_POLICY_TYPE_ATTR_BITFIELD32_MASK, NL_POLICY_TYPE_ATTR_PAD, + NL_POLICY_TYPE_ATTR_MASK, /* keep last */ __NL_POLICY_TYPE_ATTR_MAX, diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index ee95f42..7ea59cf 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -591,6 +591,9 @@ enum { TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED = 1 << 1, /* Part of an existing connection. */ TCA_FLOWER_KEY_CT_FLAGS_RELATED = 1 << 2, /* Related to an established connection. */ TCA_FLOWER_KEY_CT_FLAGS_TRACKED = 1 << 3, /* Conntrack has occurred. */ + TCA_FLOWER_KEY_CT_FLAGS_INVALID = 1 << 4, /* Conntrack is invalid. */ + TCA_FLOWER_KEY_CT_FLAGS_REPLY = 1 << 5, /* Packet is in the reply direction. */ + __TCA_FLOWER_KEY_CT_FLAGS_MAX, }; enum { diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 9e7c2c6..79a699f 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -434,6 +434,7 @@ enum { TCA_HTB_RATE64, TCA_HTB_CEIL64, TCA_HTB_PAD, + TCA_HTB_OFFLOAD, __TCA_HTB_MAX, }; diff --git a/include/uapi/linux/rpl.h b/include/uapi/linux/rpl.h index c24b64c..72d60e0 100644 --- a/include/uapi/linux/rpl.h +++ b/include/uapi/linux/rpl.h @@ -28,10 +28,10 @@ struct ipv6_rpl_sr_hdr { pad:4, reserved1:16; #elif defined(__BIG_ENDIAN_BITFIELD) - __u32 reserved:20, + __u32 cmpri:4, + cmpre:4, pad:4, - cmpri:4, - cmpre:4; + reserved:20; #else #error "Please fix " #endif diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 5ad84e6..b34b9ad 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -319,6 +319,11 @@ enum rt_scope_t { #define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */ #define RTM_F_OFFLOAD 0x4000 /* route is offloaded */ #define RTM_F_TRAP 0x8000 /* route is trapping packets */ +#define RTM_F_OFFLOAD_FAILED 0x20000000 /* route offload failed, this value + * is chosen to avoid conflicts with + * other flags defined in + * include/uapi/linux/ipv6_route.h + */ /* Reserved table identifiers */ @@ -396,11 +401,13 @@ struct rtnexthop { #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ #define RTNH_F_ONLINK 4 /* Gateway is forced on link */ -#define RTNH_F_OFFLOAD 8 /* offloaded route */ +#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */ #define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */ #define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */ +#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */ -#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | RTNH_F_OFFLOAD) +#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \ + RTNH_F_OFFLOAD | RTNH_F_TRAP) /* Macros to handle hexthops */ @@ -764,12 +771,18 @@ enum { #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg)) /* tcamsg flags stored in attribute TCA_ROOT_FLAGS * - * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO - * actions in a dump. All dump responses will contain the number of actions - * being dumped stored in for user app's consumption in TCA_ROOT_COUNT + * TCA_ACT_FLAG_LARGE_DUMP_ON user->kernel to request for larger than + * TCA_ACT_MAX_PRIO actions in a dump. All dump responses will contain the + * number of actions being dumped stored in for user app's consumption in + * TCA_ROOT_COUNT + * + * TCA_ACT_FLAG_TERSE_DUMP user->kernel to request terse (brief) dump that only + * includes essential action info (kind, index, etc.) * */ #define TCA_FLAG_LARGE_DUMP_ON (1 << 0) +#define TCA_ACT_FLAG_LARGE_DUMP_ON TCA_FLAG_LARGE_DUMP_ON +#define TCA_ACT_FLAG_TERSE_DUMP (1 << 1) /* New extended info filters for IFLA_EXT_MASK */ #define RTEXT_FILTER_VF (1 << 0) @@ -777,6 +790,8 @@ enum { #define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2) #define RTEXT_FILTER_SKIP_STATS (1 << 3) #define RTEXT_FILTER_MRP (1 << 4) +#define RTEXT_FILTER_CFM_CONFIG (1 << 5) +#define RTEXT_FILTER_CFM_STATUS (1 << 6) /* End of information exported to user level */ diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 63f9cf7..4831d33 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -140,6 +140,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_ECN_SUPPORTED 130 #define SCTP_EXPOSE_POTENTIALLY_FAILED_STATE 131 #define SCTP_EXPOSE_PF_STATE SCTP_EXPOSE_POTENTIALLY_FAILED_STATE +#define SCTP_REMOTE_UDP_ENCAPS_PORT 132 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -1191,6 +1192,12 @@ struct sctp_event { uint8_t se_on; }; +struct sctp_udpencaps { + sctp_assoc_t sue_assoc_id; + struct sockaddr_storage sue_address; + uint16_t sue_port; +}; + /* SCTP Stream schedulers */ enum sctp_sched_type { SCTP_SS_FCFS, diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h index 5312de8..bb5c8dd 100644 --- a/include/uapi/linux/seg6_local.h +++ b/include/uapi/linux/seg6_local.h @@ -26,6 +26,7 @@ enum { SEG6_LOCAL_IIF, SEG6_LOCAL_OIF, SEG6_LOCAL_BPF, + SEG6_LOCAL_VRFTABLE, __SEG6_LOCAL_MAX, }; #define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1) diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index f84e7bc..26fc60c 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -159,6 +159,7 @@ enum UDP_MIB_SNDBUFERRORS, /* SndbufErrors */ UDP_MIB_CSUMERRORS, /* InCsumErrors */ UDP_MIB_IGNOREDMULTI, /* IgnoredMulti */ + UDP_MIB_MEMERRORS, /* MemErrors */ __UDP_MIB_MAX }; diff --git a/include/uapi/linux/tc_act/tc_mpls.h b/include/uapi/linux/tc_act/tc_mpls.h index 9360e95..9e4e8f5 100644 --- a/include/uapi/linux/tc_act/tc_mpls.h +++ b/include/uapi/linux/tc_act/tc_mpls.h @@ -10,6 +10,7 @@ #define TCA_MPLS_ACT_PUSH 2 #define TCA_MPLS_ACT_MODIFY 3 #define TCA_MPLS_ACT_DEC_TTL 4 +#define TCA_MPLS_ACT_MAC_PUSH 5 struct tc_mpls { tc_gen; /* generic TC action fields. */ diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h index 168995b..5b306fe 100644 --- a/include/uapi/linux/tc_act/tc_vlan.h +++ b/include/uapi/linux/tc_act/tc_vlan.h @@ -16,6 +16,8 @@ #define TCA_VLAN_ACT_POP 1 #define TCA_VLAN_ACT_PUSH 2 #define TCA_VLAN_ACT_MODIFY 3 +#define TCA_VLAN_ACT_POP_ETH 4 +#define TCA_VLAN_ACT_PUSH_ETH 5 struct tc_vlan { tc_gen; @@ -30,6 +32,8 @@ enum { TCA_VLAN_PUSH_VLAN_PROTOCOL, TCA_VLAN_PAD, TCA_VLAN_PUSH_VLAN_PRIORITY, + TCA_VLAN_PUSH_ETH_DST, + TCA_VLAN_PUSH_ETH_SRC, __TCA_VLAN_MAX, }; #define TCA_VLAN_MAX (__TCA_VLAN_MAX - 1) diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index ee670e8..a206627 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -51,7 +51,7 @@ struct tcphdr { fin:1; #else #error "Adjust your defines" -#endif +#endif __be16 window; __sum16 check; __be16 urg_ptr; @@ -62,14 +62,14 @@ struct tcphdr { * (union is compatible to any of its members) * This means this part of the code is -fstrict-aliasing safe now. */ -union tcp_word_hdr { +union tcp_word_hdr { struct tcphdr hdr; - __be32 words[5]; -}; + __be32 words[5]; +}; -#define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) +#define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3]) -enum { +enum { TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000), TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000), TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000), @@ -80,7 +80,7 @@ enum { TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000), TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000), TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000) -}; +}; /* * TCP general constants @@ -103,8 +103,8 @@ enum { #define TCP_QUICKACK 12 /* Block/reenable quick acks */ #define TCP_CONGESTION 13 /* Congestion control algorithm */ #define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ -#define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ -#define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ +#define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ +#define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ #define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ #define TCP_REPAIR 19 /* TCP sock is under repair right now */ #define TCP_REPAIR_QUEUE 20 @@ -314,6 +314,7 @@ enum { TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */ TCP_NLA_BYTES_NOTSENT, /* Bytes in write queue not yet sent */ TCP_NLA_EDT, /* Earliest departure time (CLOCK_MONOTONIC) */ + TCP_NLA_TTL, /* TTL or hop limit of a packet received */ }; /* for TCP_MD5SIG socket option */ @@ -343,11 +344,19 @@ struct tcp_diag_md5sig { /* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */ +#define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1 struct tcp_zerocopy_receive { __u64 address; /* in: address of mapping */ __u32 length; /* in/out: number of bytes to map/mapped */ __u32 recv_skip_hint; /* out: amount of bytes to skip */ __u32 inq; /* out: amount of bytes in read queue */ __s32 err; /* out: socket error */ + __u64 copybuf_address; /* in: copybuf address (small reads) */ + __s32 copybuf_len; /* in/out: copybuf bytes avail/used or error */ + __u32 flags; /* in: flags */ + __u64 msg_control; /* ancillary data */ + __u64 msg_controllen; + __u32 msg_flags; + __u32 reserved; /* set to 0 for now */ }; #endif /* _LINUX_TCP_H */ diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index de5bcd2..f08cc36 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -254,6 +254,8 @@ static __inline__ int tipc_aead_key_size(struct tipc_aead_key *key) return sizeof(*key) + key->keylen; } +#define TIPC_REKEYING_NOW (~0U) + /* The macros and functions below are deprecated: */ diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index dc0d23a..d847dd6 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -165,6 +165,8 @@ enum { TIPC_NLA_NODE_UP, /* flag */ TIPC_NLA_NODE_ID, /* data */ TIPC_NLA_NODE_KEY, /* data */ + TIPC_NLA_NODE_KEY_MASTER, /* flag */ + TIPC_NLA_NODE_REKEYING, /* u32 */ __TIPC_NLA_NODE_MAX, TIPC_NLA_NODE_MAX = __TIPC_NLA_NODE_MAX - 1 diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index 3b67540..805cbce 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -77,6 +77,13 @@ #define TLS_CIPHER_AES_CCM_128_TAG_SIZE 16 #define TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE 8 +#define TLS_CIPHER_CHACHA20_POLY1305 54 +#define TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE 12 +#define TLS_CIPHER_CHACHA20_POLY1305_KEY_SIZE 32 +#define TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE 0 +#define TLS_CIPHER_CHACHA20_POLY1305_TAG_SIZE 16 +#define TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE 8 + #define TLS_SET_RECORD_TYPE 1 #define TLS_GET_RECORD_TYPE 2 @@ -109,6 +116,14 @@ struct tls12_crypto_info_aes_ccm_128 { unsigned char rec_seq[TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE]; }; +struct tls12_crypto_info_chacha20_poly1305 { + struct tls_crypto_info info; + unsigned char iv[TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE]; + unsigned char key[TLS_CIPHER_CHACHA20_POLY1305_KEY_SIZE]; + unsigned char salt[TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE]; + unsigned char rec_seq[TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE]; +}; + enum { TLS_INFO_UNSPEC, TLS_INFO_VERSION, diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h new file mode 100644 index 0000000..37ae26b --- /dev/null +++ b/include/uapi/linux/vdpa.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * vdpa device management interface + * Copyright (c) 2020 Mellanox Technologies Ltd. All rights reserved. + */ + +#ifndef _LINUX_VDPA_H_ +#define _LINUX_VDPA_H_ + +#define VDPA_GENL_NAME "vdpa" +#define VDPA_GENL_VERSION 0x1 + +enum vdpa_command { + VDPA_CMD_UNSPEC, + VDPA_CMD_MGMTDEV_NEW, + VDPA_CMD_MGMTDEV_GET, /* can dump */ + VDPA_CMD_DEV_NEW, + VDPA_CMD_DEV_DEL, + VDPA_CMD_DEV_GET, /* can dump */ +}; + +enum vdpa_attr { + VDPA_ATTR_UNSPEC, + + /* bus name (optional) + dev name together make the parent device handle */ + VDPA_ATTR_MGMTDEV_BUS_NAME, /* string */ + VDPA_ATTR_MGMTDEV_DEV_NAME, /* string */ + VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES, /* u64 */ + + VDPA_ATTR_DEV_NAME, /* string */ + VDPA_ATTR_DEV_ID, /* u32 */ + VDPA_ATTR_DEV_VENDOR_ID, /* u32 */ + VDPA_ATTR_DEV_MAX_VQS, /* u32 */ + VDPA_ATTR_DEV_MAX_VQ_SIZE, /* u16 */ + + /* new attributes must be added above here */ + VDPA_ATTR_MAX, +}; + +#endif diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h new file mode 100644 index 0000000..bc1c062 --- /dev/null +++ b/include/uapi/linux/virtio_ids.h @@ -0,0 +1,58 @@ +#ifndef _LINUX_VIRTIO_IDS_H +#define _LINUX_VIRTIO_IDS_H +/* + * Virtio IDs + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ + +#define VIRTIO_ID_NET 1 /* virtio net */ +#define VIRTIO_ID_BLOCK 2 /* virtio block */ +#define VIRTIO_ID_CONSOLE 3 /* virtio console */ +#define VIRTIO_ID_RNG 4 /* virtio rng */ +#define VIRTIO_ID_BALLOON 5 /* virtio balloon */ +#define VIRTIO_ID_IOMEM 6 /* virtio ioMemory */ +#define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ +#define VIRTIO_ID_SCSI 8 /* virtio scsi */ +#define VIRTIO_ID_9P 9 /* 9p virtio console */ +#define VIRTIO_ID_MAC80211_WLAN 10 /* virtio WLAN MAC */ +#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ +#define VIRTIO_ID_CAIF 12 /* Virtio caif */ +#define VIRTIO_ID_MEMORY_BALLOON 13 /* virtio memory balloon */ +#define VIRTIO_ID_GPU 16 /* virtio GPU */ +#define VIRTIO_ID_CLOCK 17 /* virtio clock/timer */ +#define VIRTIO_ID_INPUT 18 /* virtio input */ +#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ +#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ +#define VIRTIO_ID_SIGNAL_DIST 21 /* virtio signal distribution device */ +#define VIRTIO_ID_PSTORE 22 /* virtio pstore device */ +#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ +#define VIRTIO_ID_MEM 24 /* virtio mem */ +#define VIRTIO_ID_FS 26 /* virtio filesystem */ +#define VIRTIO_ID_PMEM 27 /* virtio pmem */ +#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ + +#endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/utils.h b/include/utils.h index 7041c46..b29c379 100644 --- a/include/utils.h +++ b/include/utils.h @@ -20,7 +20,6 @@ extern int preferred_family; extern int human_readable; -extern int use_iec; extern int show_stats; extern int show_details; extern int show_raw; @@ -163,6 +162,9 @@ int get_be64(__be64 *val, const char *arg, int base); int get_be32(__be32 *val, const char *arg, int base); int get_be16(__be16 *val, const char *arg, int base); int get_addr64(__u64 *ap, const char *cp); +int get_rate(unsigned int *rate, const char *str); +int get_rate64(__u64 *rate, const char *str); +int get_size(unsigned int *size, const char *str); int hex2mem(const char *buf, uint8_t *mem, int count); char *hexstring_n2a(const __u8 *str, int len, char *buf, int blen); @@ -322,4 +324,47 @@ int get_time64(__s64 *time, const char *str); char *sprint_time(__u32 time, char *buf); char *sprint_time64(__s64 time, char *buf); +int do_batch(const char *name, bool force, + int (*cmd)(int argc, char *argv[], void *user), void *user); + +int parse_one_of(const char *msg, const char *realval, const char * const *list, + size_t len, int *p_err); +bool parse_on_off(const char *msg, const char *realval, int *p_err); + +int parse_mapping_num_all(__u32 *keyp, const char *key); +int parse_mapping_gen(int *argcp, char ***argvp, + int (*key_cb)(__u32 *keyp, const char *key), + int (*mapping_cb)(__u32 key, char *value, void *data), + void *mapping_cb_data); +int parse_mapping(int *argcp, char ***argvp, bool allow_all, + int (*mapping_cb)(__u32 key, char *value, void *data), + void *mapping_cb_data); + +struct str_num_map { + const char *str; + unsigned int num; +}; + +int str_map_lookup_str(const struct str_num_map *map, const char *needle); +const char *str_map_lookup_uint(const struct str_num_map *map, + unsigned int val); +const char *str_map_lookup_u16(const struct str_num_map *map, uint16_t val); +const char *str_map_lookup_u8(const struct str_num_map *map, uint8_t val); + +unsigned int get_str_char_count(const char *str, int match); +int str_split_by_char(char *str, char **before, char **after, int match); + +#define INDENT_STR_MAXLEN 32 + +struct indent_mem { + int indent_level; + char indent_str[INDENT_STR_MAXLEN + 1]; +}; + +struct indent_mem *alloc_indent_mem(void); +void free_indent_mem(struct indent_mem *mem); +void inc_indent(struct indent_mem *mem); +void dec_indent(struct indent_mem *mem); +void print_indent(struct indent_mem *mem); + #endif /* __UTILS_H__ */ diff --git a/include/version.h b/include/version.h index 0088493..b511341 100644 --- a/include/version.h +++ b/include/version.h @@ -1 +1 @@ -static const char version[] = "5.8.0"; +static const char version[] = "5.12.0"; diff --git a/ip/ip.c b/ip/ip.c index ac44502..4cf09fc 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -24,6 +24,11 @@ #include "namespace.h" #include "color.h" #include "rt_names.h" +#include "bpf_util.h" + +#ifndef LIBDIR +#define LIBDIR "/usr/lib" +#endif int preferred_family = AF_UNSPEC; int human_readable; @@ -41,6 +46,17 @@ bool do_all; struct rtnl_handle rth = { .fd = -1 }; +const char *get_ip_lib_dir(void) +{ + const char *lib_dir; + + lib_dir = getenv("IP_LIB_DIR"); + if (!lib_dir) + lib_dir = LIBDIR "/ip"; + + return lib_dir; +} + static void usage(void) __attribute__((noreturn)); static void usage(void) @@ -48,10 +64,11 @@ static void usage(void) fprintf(stderr, "Usage: ip [ OPTIONS ] OBJECT { COMMAND | help }\n" " ip [ -force ] -batch filename\n" - "where OBJECT := { link | address | addrlabel | route | rule | neigh | ntable |\n" - " tunnel | tuntap | maddress | mroute | mrule | monitor | xfrm |\n" - " netns | l2tp | fou | macsec | tcp_metrics | token | netconf | ila |\n" - " vrf | sr | nexthop | mptcp }\n" + "where OBJECT := { address | addrlabel | fou | help | ila | l2tp | link |\n" + " macsec | maddress | monitor | mptcp | mroute | mrule |\n" + " neighbor | neighbour | netconf | netns | nexthop | ntable |\n" + " ntbl | route | rule | sr | tap | tcpmetrics |\n" + " token | tunnel | tuntap | vrf | xfrm }\n" " OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n" " -h[uman-readable] | -iec | -j[son] | -p[retty] |\n" " -f[amily] { inet | inet6 | mpls | bridge | link } |\n" @@ -121,60 +138,36 @@ static int do_cmd(const char *argv0, int argc, char **argv) return EXIT_FAILURE; } -static int batch(const char *name) +static int ip_batch_cmd(int argc, char *argv[], void *data) { - char *line = NULL; - size_t len = 0; - int ret = EXIT_SUCCESS; - int orig_family = preferred_family; + const int *orig_family = data; - batch_mode = 1; + preferred_family = *orig_family; + return do_cmd(argv[0], argc, argv); +} - if (name && strcmp(name, "-") != 0) { - if (freopen(name, "r", stdin) == NULL) { - fprintf(stderr, - "Cannot open file \"%s\" for reading: %s\n", - name, strerror(errno)); - return EXIT_FAILURE; - } - } +static int batch(const char *name) +{ + int orig_family = preferred_family; + int ret; if (rtnl_open(&rth, 0) < 0) { fprintf(stderr, "Cannot open rtnetlink\n"); return EXIT_FAILURE; } - cmdlineno = 0; - while (getcmdline(&line, &len, stdin) != -1) { - char *largv[100]; - int largc; - - preferred_family = orig_family; - - largc = makeargs(line, largv, 100); - if (largc == 0) - continue; /* blank line */ - - if (do_cmd(largv[0], largc, largv)) { - fprintf(stderr, "Command failed %s:%d\n", - name, cmdlineno); - ret = EXIT_FAILURE; - if (!force) - break; - } - } - if (line) - free(line); + batch_mode = 1; + ret = do_batch(name, force, ip_batch_cmd, &orig_family); rtnl_close(&rth); return ret; } - int main(int argc, char **argv) { - char *basename; + const char *libbpf_version; char *batch_file = NULL; + char *basename; int color = 0; /* to run vrf exec without root, capabilities might be set, drop them @@ -255,7 +248,11 @@ int main(int argc, char **argv) ++timestamp; ++timestamp_short; } else if (matches(opt, "-Version") == 0) { - printf("ip utility, iproute2-%s\n", version); + printf("ip utility, iproute2-%s", version); + libbpf_version = get_libbpf_version(); + if (libbpf_version) + printf(", libbpf %s", libbpf_version); + printf("\n"); exit(0); } else if (matches(opt, "-force") == 0) { ++force; diff --git a/ip/ip_common.h b/ip/ip_common.h index d604f75..1fd2ed3 100644 --- a/ip/ip_common.h +++ b/ip/ip_common.h @@ -6,6 +6,8 @@ #include "json_print.h" +extern int use_iec; + struct link_filter { int ifindex; int family; @@ -27,6 +29,8 @@ struct link_filter { int target_nsid; }; +const char *get_ip_lib_dir(void); + int get_operstate(const char *name); int print_linkinfo(struct nlmsghdr *n, void *arg); int print_addrinfo(struct nlmsghdr *n, void *arg); @@ -35,6 +39,7 @@ int print_neigh(struct nlmsghdr *n, void *arg); int ipaddr_list_link(int argc, char **argv); void ipaddr_get_vf_rate(int, int *, int *, const char *); void iplink_usage(void) __attribute__((noreturn)); +void iplink_types_usage(void); void iproute_reset_filter(int ifindex); void ipmroute_reset_filter(int ifindex); diff --git a/ip/ipaddress.c b/ip/ipaddress.c index 2b4cb48..cfb24f5 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -73,12 +73,8 @@ static void usage(void) "CONFFLAG-LIST := [ CONFFLAG-LIST ] CONFFLAG\n" "CONFFLAG := [ home | nodad | mngtmpaddr | noprefixroute | autojoin ]\n" "LIFETIME := [ valid_lft LFT ] [ preferred_lft LFT ]\n" - "LFT := forever | SECONDS\n" - "TYPE := { vlan | veth | vcan | vxcan | dummy | ifb | macvlan | macvtap |\n" - " bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan | lowpan |\n" - " gre | gretap | erspan | ip6gre | ip6gretap | ip6erspan | vti |\n" - " nlmon | can | bond_slave | ipvlan | geneve | bridge_slave |\n" - " hsr | macsec | netdevsim }\n"); + "LFT := forever | SECONDS\n"); + iplink_types_usage(); exit(-1); } @@ -744,7 +740,7 @@ static void __print_link_stats(FILE *fp, struct rtattr *tb[]) close_json_object(); } else { /* RX stats */ - fprintf(fp, " RX: bytes packets errors dropped overrun mcast %s%s", + fprintf(fp, " RX: bytes packets errors dropped missed mcast %s%s", s->rx_compressed ? "compressed" : "", _SL_); fprintf(fp, " "); @@ -752,7 +748,7 @@ static void __print_link_stats(FILE *fp, struct rtattr *tb[]) print_num(fp, 8, s->rx_packets); print_num(fp, 7, s->rx_errors); print_num(fp, 7, s->rx_dropped); - print_num(fp, 7, s->rx_over_errors); + print_num(fp, 7, s->rx_missed_errors); print_num(fp, 7, s->multicast); if (s->rx_compressed) print_num(fp, 7, s->rx_compressed); @@ -760,14 +756,14 @@ static void __print_link_stats(FILE *fp, struct rtattr *tb[]) /* RX error stats */ if (show_stats > 1) { fprintf(fp, "%s", _SL_); - fprintf(fp, " RX errors: length crc frame fifo missed%s%s", + fprintf(fp, " RX errors: length crc frame fifo overrun%s%s", s->rx_nohandler ? " nohandler" : "", _SL_); fprintf(fp, " "); print_num(fp, 8, s->rx_length_errors); print_num(fp, 7, s->rx_crc_errors); print_num(fp, 7, s->rx_frame_errors); print_num(fp, 7, s->rx_fifo_errors); - print_num(fp, 7, s->rx_missed_errors); + print_num(fp, 7, s->rx_over_errors); if (s->rx_nohandler) print_num(fp, 7, s->rx_nohandler); } @@ -874,6 +870,45 @@ static void print_link_event(FILE *f, __u32 event) } } +static void print_proto_down(FILE *f, struct rtattr *tb[]) +{ + struct rtattr *preason[IFLA_PROTO_DOWN_REASON_MAX+1]; + + if (tb[IFLA_PROTO_DOWN]) { + if (rta_getattr_u8(tb[IFLA_PROTO_DOWN])) + print_bool(PRINT_ANY, + "proto_down", " protodown on ", true); + } + + if (tb[IFLA_PROTO_DOWN_REASON]) { + char buf[255]; + __u32 reason; + int i, start = 1; + + parse_rtattr_nested(preason, IFLA_PROTO_DOWN_REASON_MAX, + tb[IFLA_PROTO_DOWN_REASON]); + if (!tb[IFLA_PROTO_DOWN_REASON_VALUE]) + return; + + reason = rta_getattr_u8(preason[IFLA_PROTO_DOWN_REASON_VALUE]); + if (!reason) + return; + + open_json_array(PRINT_ANY, + is_json_context() ? "proto_down_reason" : "protodown_reason <"); + for (i = 0; reason; i++, reason >>= 1) { + if (reason & 0x1) { + if (protodown_reason_n2a(i, buf, sizeof(buf))) + break; + print_string(PRINT_ANY, NULL, + start ? "%s" : ",%s", buf); + start = 0; + } + } + close_json_array(PRINT_ANY, ">"); + } +} + int print_linkinfo(struct nlmsghdr *n, void *arg) { FILE *fp = (FILE *)arg; @@ -883,6 +918,7 @@ int print_linkinfo(struct nlmsghdr *n, void *arg) const char *name; unsigned int m_flag = 0; SPRINT_BUF(b1); + bool truncated_vfs = false; if (n->nlmsg_type != RTM_NEWLINK && n->nlmsg_type != RTM_DELLINK) return 0; @@ -1066,11 +1102,8 @@ int print_linkinfo(struct nlmsghdr *n, void *arg) print_int(PRINT_FP, NULL, " new-ifindex %d", id); } - if (tb[IFLA_PROTO_DOWN]) { - if (rta_getattr_u8(tb[IFLA_PROTO_DOWN])) - print_bool(PRINT_ANY, - "proto_down", " protodown on ", true); - } + if (tb[IFLA_PROTO_DOWN]) + print_proto_down(fp, tb); if (show_details) { if (tb[IFLA_PROMISCUITY]) @@ -1163,15 +1196,18 @@ int print_linkinfo(struct nlmsghdr *n, void *arg) if ((do_link || show_details) && tb[IFLA_VFINFO_LIST] && tb[IFLA_NUM_VF]) { struct rtattr *i, *vflist = tb[IFLA_VFINFO_LIST]; - int rem = RTA_PAYLOAD(vflist); + int rem = RTA_PAYLOAD(vflist), count = 0; open_json_array(PRINT_JSON, "vfinfo_list"); for (i = RTA_DATA(vflist); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) { open_json_object(NULL); print_vfinfo(fp, ifi, i); close_json_object(); + count++; } close_json_array(PRINT_JSON, NULL); + if (count != rta_getattr_u32(tb[IFLA_NUM_VF])) + truncated_vfs = true; } if (tb[IFLA_PROP_LIST]) { @@ -1192,6 +1228,9 @@ int print_linkinfo(struct nlmsghdr *n, void *arg) print_string(PRINT_FP, NULL, "%s", "\n"); fflush(fp); + /* prettier here if stderr and stdout go to the same place */ + if (truncated_vfs) + fprintf(stderr, "Truncated VF list: %s\n", name); return 1; } diff --git a/ip/ipila.c b/ip/ipila.c index 739ee4e..475c35b 100644 --- a/ip/ipila.c +++ b/ip/ipila.c @@ -31,7 +31,8 @@ static void usage(void) "Usage: ip ila add loc_match LOCATOR_MATCH loc LOCATOR [ dev DEV ] OPTIONS\n" " ip ila del loc_match LOCATOR_MATCH [ loc LOCATOR ] [ dev DEV ]\n" " ip ila list\n" - "OPTIONS := [ csum-mode { adj-transport | neutral-map | neutral-map-auto | no-action } ]\n" + "OPTIONS := [ csum-mode { adj-transport | neutral-map |\n" + " neutral-map-auto | no-action } ]\n" " [ ident-type { luid | use-format } ]\n"); exit(-1); diff --git a/ip/iplink.c b/ip/iplink.c index 5ec33a9..faafd7e 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -34,9 +34,6 @@ #include "namespace.h" #define IPLINK_IOCTL_COMPAT 1 -#ifndef LIBDIR -#define LIBDIR "/usr/lib" -#endif #ifndef GSO_MAX_SIZE #define GSO_MAX_SIZE 65536 @@ -49,6 +46,19 @@ static void usage(void) __attribute__((noreturn)); static int iplink_have_newlink(void); +void iplink_types_usage(void) +{ + /* Remember to add new entry here if new type is added. */ + fprintf(stderr, + "TYPE := { bareudp | bond | bond_slave | bridge | bridge_slave |\n" + " dummy | erspan | geneve | gre | gretap | ifb |\n" + " ip6erspan | ip6gre | ip6gretap | ip6tnl |\n" + " ipip | ipoib | ipvlan | ipvtap |\n" + " macsec | macvlan | macvtap |\n" + " netdevsim | nlmon | rmnet | sit | team | team_slave |\n" + " vcan | veth | vlan | vrf | vti | vxcan | vxlan | xfrm }\n"); +} + void iplink_usage(void) { if (iplink_have_newlink()) { @@ -105,6 +115,7 @@ void iplink_usage(void) " [ nomaster ]\n" " [ addrgenmode { eui64 | none | stable_secret | random } ]\n" " [ protodown { on | off } ]\n" + " [ protodown_reason PREASON { on | off } ]\n" " [ gso_max_size BYTES ] | [ gso_max_segs PACKETS ]\n" "\n" " ip link show [ DEVICE | group GROUP ] [up] [master DEV] [vrf NAME] [type TYPE]\n" @@ -119,13 +130,8 @@ void iplink_usage(void) fprintf(stderr, "\n" " ip link help [ TYPE ]\n" - "\n" - "TYPE := { vlan | veth | vcan | vxcan | dummy | ifb | macvlan | macvtap |\n" - " bridge | bond | team | ipoib | ip6tnl | ipip | sit | vxlan |\n" - " gre | gretap | erspan | ip6gre | ip6gretap | ip6erspan |\n" - " vti | nlmon | team_slave | bond_slave | bridge_slave |\n" - " ipvlan | ipvtap | geneve | bareudp | vrf | macsec | netdevsim | rmnet |\n" - " xfrm }\n"); + "\n"); + iplink_types_usage(); } exit(-1); } @@ -156,7 +162,7 @@ struct link_util *get_link_kind(const char *id) if (strcmp(l->id, id) == 0) return l; - snprintf(buf, sizeof(buf), LIBDIR "/ip/link_%s.so", id); + snprintf(buf, sizeof(buf), "%s/link_%s.so", get_ip_lib_dir(), id); dlh = dlopen(buf, RTLD_LAZY); if (dlh == NULL) { /* look in current binary, only open once */ @@ -351,6 +357,7 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, int len, argc = *argcp; char **argv = *argvp; struct rtattr *vfinfo; + int ret; tivt.min_tx_rate = -1; tivt.max_tx_rate = -1; @@ -463,12 +470,9 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, struct ifla_vf_spoofchk ivs; NEXT_ARG(); - if (matches(*argv, "on") == 0) - ivs.setting = 1; - else if (matches(*argv, "off") == 0) - ivs.setting = 0; - else - return on_off("spoofchk", *argv); + ivs.setting = parse_on_off("spoofchk", *argv, &ret); + if (ret) + return ret; ivs.vf = vf; addattr_l(&req->n, sizeof(*req), IFLA_VF_SPOOFCHK, &ivs, sizeof(ivs)); @@ -477,12 +481,9 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, struct ifla_vf_rss_query_en ivs; NEXT_ARG(); - if (matches(*argv, "on") == 0) - ivs.setting = 1; - else if (matches(*argv, "off") == 0) - ivs.setting = 0; - else - return on_off("query_rss", *argv); + ivs.setting = parse_on_off("query_rss", *argv, &ret); + if (ret) + return ret; ivs.vf = vf; addattr_l(&req->n, sizeof(*req), IFLA_VF_RSS_QUERY_EN, &ivs, sizeof(ivs)); @@ -491,12 +492,9 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, struct ifla_vf_trust ivt; NEXT_ARG(); - if (matches(*argv, "on") == 0) - ivt.setting = 1; - else if (matches(*argv, "off") == 0) - ivt.setting = 0; - else - invarg("Invalid \"trust\" value\n", *argv); + ivt.setting = parse_on_off("trust", *argv, &ret); + if (ret) + return ret; ivt.vf = vf; addattr_l(&req->n, sizeof(*req), IFLA_VF_TRUST, &ivt, sizeof(ivt)); @@ -594,6 +592,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, char **type) int index = 0; int group = -1; int addr_len = 0; + int err; ret = argc; @@ -737,12 +736,9 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, char **type) int carrier; NEXT_ARG(); - if (strcmp(*argv, "on") == 0) - carrier = 1; - else if (strcmp(*argv, "off") == 0) - carrier = 0; - else - return on_off("carrier", *argv); + carrier = parse_on_off("carrier", *argv, &err); + if (err) + return err; addattr8(&req->n, sizeof(*req), IFLA_CARRIER, carrier); } else if (strcmp(*argv, "vf") == 0) { @@ -895,14 +891,33 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, char **type) unsigned int proto_down; NEXT_ARG(); - if (strcmp(*argv, "on") == 0) - proto_down = 1; - else if (strcmp(*argv, "off") == 0) - proto_down = 0; - else - return on_off("protodown", *argv); + proto_down = parse_on_off("protodown", *argv, &err); + if (err) + return err; addattr8(&req->n, sizeof(*req), IFLA_PROTO_DOWN, proto_down); + } else if (strcmp(*argv, "protodown_reason") == 0) { + struct rtattr *pr; + __u32 preason = 0, prvalue = 0, prmask = 0; + + NEXT_ARG(); + if (protodown_reason_a2n(&preason, *argv)) + invarg("invalid protodown reason\n", *argv); + NEXT_ARG(); + prmask = 1 << preason; + if (matches(*argv, "on") == 0) + prvalue |= prmask; + else if (matches(*argv, "off") == 0) + prvalue &= ~prmask; + else + return on_off("protodown_reason", *argv); + pr = addattr_nest(&req->n, sizeof(*req), + IFLA_PROTO_DOWN_REASON | NLA_F_NESTED); + addattr32(&req->n, sizeof(*req), + IFLA_PROTO_DOWN_REASON_MASK, prmask); + addattr32(&req->n, sizeof(*req), + IFLA_PROTO_DOWN_REASON_VALUE, prvalue); + addattr_nest_end(&req->n, pr); } else if (strcmp(*argv, "gso_max_size") == 0) { unsigned int max_size; diff --git a/ip/iplink_bareudp.c b/ip/iplink_bareudp.c index 860ec69..aa31110 100644 --- a/ip/iplink_bareudp.c +++ b/ip/iplink_bareudp.c @@ -22,9 +22,11 @@ static void print_explain(FILE *f) " [ srcportmin PORT ]\n" " [ [no]multiproto ]\n" "\n" - "Where: PORT := 0-65535\n" - " PROTO := NUMBER | ip | mpls\n" - " SRCPORTMIN := 0-65535\n" + "Where: PORT := UDP_PORT\n" + " PROTO := ETHERTYPE\n" + "\n" + "Note: ETHERTYPE can be given as number or as protocol name (\"ipv4\", \"ipv6\",\n" + " \"mpls_uc\", etc.).\n" ); } diff --git a/ip/iplink_bond.c b/ip/iplink_bond.c index 585b6be..d45845b 100644 --- a/ip/iplink_bond.c +++ b/ip/iplink_bond.c @@ -70,6 +70,7 @@ static const char *xmit_hash_policy_tbl[] = { "layer2+3", "encap2+3", "encap3+4", + "vlan+srcmac", NULL, }; @@ -148,7 +149,7 @@ static void print_explain(FILE *f) "ARP_ALL_TARGETS := any|all\n" "PRIMARY_RESELECT := always|better|failure\n" "FAIL_OVER_MAC := none|active|follow\n" - "XMIT_HASH_POLICY := layer2|layer2+3|layer3+4|encap2+3|encap3+4\n" + "XMIT_HASH_POLICY := layer2|layer2+3|layer3+4|encap2+3|encap3+4|vlan+srcmac\n" "LACP_RATE := slow|fast\n" "AD_SELECT := stable|bandwidth|count\n" ); diff --git a/ip/iplink_bridge.c b/ip/iplink_bridge.c index 3e81aa0..d12fd05 100644 --- a/ip/iplink_bridge.c +++ b/ip/iplink_bridge.c @@ -74,7 +74,7 @@ static void explain(void) void br_dump_bridge_id(const struct ifla_bridge_id *id, char *buf, size_t len) { - char eaddr[32]; + char eaddr[18]; ether_ntoa_r((const struct ether_addr *)id->addr, eaddr); snprintf(buf, len, "%.2x%.2x.%s", id->prio[0], id->prio[1], eaddr); diff --git a/ip/iplink_bridge_slave.c b/ip/iplink_bridge_slave.c index 79a1d2f..7178758 100644 --- a/ip/iplink_bridge_slave.c +++ b/ip/iplink_bridge_slave.c @@ -76,14 +76,6 @@ static void print_portstate(FILE *f, __u8 state) print_int(PRINT_ANY, "state_index", "state (%d) ", state); } -static void _print_onoff(FILE *f, char *json_flag, char *flag, __u8 val) -{ - if (is_json_context()) - print_bool(PRINT_JSON, flag, NULL, val); - else - fprintf(f, "%s %s ", flag, val ? "on" : "off"); -} - static void _print_timer(FILE *f, const char *attr, struct rtattr *timer) { struct timeval tv; @@ -145,27 +137,27 @@ static void bridge_slave_print_opt(struct link_util *lu, FILE *f, rta_getattr_u32(tb[IFLA_BRPORT_COST])); if (tb[IFLA_BRPORT_MODE]) - _print_onoff(f, "mode", "hairpin", + print_on_off(PRINT_ANY, "hairpin", "hairpin %s ", rta_getattr_u8(tb[IFLA_BRPORT_MODE])); if (tb[IFLA_BRPORT_GUARD]) - _print_onoff(f, "guard", "guard", + print_on_off(PRINT_ANY, "guard", "guard %s ", rta_getattr_u8(tb[IFLA_BRPORT_GUARD])); if (tb[IFLA_BRPORT_PROTECT]) - _print_onoff(f, "protect", "root_block", + print_on_off(PRINT_ANY, "root_block", "root_block %s ", rta_getattr_u8(tb[IFLA_BRPORT_PROTECT])); if (tb[IFLA_BRPORT_FAST_LEAVE]) - _print_onoff(f, "fast_leave", "fastleave", + print_on_off(PRINT_ANY, "fastleave", "fastleave %s ", rta_getattr_u8(tb[IFLA_BRPORT_FAST_LEAVE])); if (tb[IFLA_BRPORT_LEARNING]) - _print_onoff(f, "learning", "learning", + print_on_off(PRINT_ANY, "learning", "learning %s ", rta_getattr_u8(tb[IFLA_BRPORT_LEARNING])); if (tb[IFLA_BRPORT_UNICAST_FLOOD]) - _print_onoff(f, "unicast_flood", "flood", + print_on_off(PRINT_ANY, "flood", "flood %s ", rta_getattr_u8(tb[IFLA_BRPORT_UNICAST_FLOOD])); if (tb[IFLA_BRPORT_ID]) @@ -233,11 +225,11 @@ static void bridge_slave_print_opt(struct link_util *lu, FILE *f, rta_getattr_u8(tb[IFLA_BRPORT_CONFIG_PENDING])); if (tb[IFLA_BRPORT_PROXYARP]) - _print_onoff(f, "proxyarp", "proxy_arp", + print_on_off(PRINT_ANY, "proxy_arp", "proxy_arp %s ", rta_getattr_u8(tb[IFLA_BRPORT_PROXYARP])); if (tb[IFLA_BRPORT_PROXYARP_WIFI]) - _print_onoff(f, "proxyarp_wifi", "proxy_arp_wifi", + print_on_off(PRINT_ANY, "proxy_arp_wifi", "proxy_arp_wifi %s ", rta_getattr_u8(tb[IFLA_BRPORT_PROXYARP_WIFI])); if (tb[IFLA_BRPORT_MULTICAST_ROUTER]) @@ -255,15 +247,15 @@ static void bridge_slave_print_opt(struct link_util *lu, FILE *f, rta_getattr_u8(tb[IFLA_BRPORT_FAST_LEAVE]) ? "on" : "off"); if (tb[IFLA_BRPORT_MCAST_FLOOD]) - _print_onoff(f, "mcast_flood", "mcast_flood", + print_on_off(PRINT_ANY, "mcast_flood", "mcast_flood %s ", rta_getattr_u8(tb[IFLA_BRPORT_MCAST_FLOOD])); if (tb[IFLA_BRPORT_MCAST_TO_UCAST]) - _print_onoff(f, "mcast_to_unicast", "mcast_to_unicast", + print_on_off(PRINT_ANY, "mcast_to_unicast", "mcast_to_unicast %s ", rta_getattr_u8(tb[IFLA_BRPORT_MCAST_TO_UCAST])); if (tb[IFLA_BRPORT_NEIGH_SUPPRESS]) - _print_onoff(f, "neigh_suppress", "neigh_suppress", + print_on_off(PRINT_ANY, "neigh_suppress", "neigh_suppress %s ", rta_getattr_u8(tb[IFLA_BRPORT_NEIGH_SUPPRESS])); if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) { @@ -279,11 +271,11 @@ static void bridge_slave_print_opt(struct link_util *lu, FILE *f, } if (tb[IFLA_BRPORT_VLAN_TUNNEL]) - _print_onoff(f, "vlan_tunnel", "vlan_tunnel", + print_on_off(PRINT_ANY, "vlan_tunnel", "vlan_tunnel %s ", rta_getattr_u8(tb[IFLA_BRPORT_VLAN_TUNNEL])); if (tb[IFLA_BRPORT_ISOLATED]) - _print_onoff(f, "isolated", "isolated", + print_on_off(PRINT_ANY, "isolated", "isolated %s ", rta_getattr_u8(tb[IFLA_BRPORT_ISOLATED])); if (tb[IFLA_BRPORT_BACKUP_PORT]) { @@ -297,15 +289,11 @@ static void bridge_slave_print_opt(struct link_util *lu, FILE *f, static void bridge_slave_parse_on_off(char *arg_name, char *arg_val, struct nlmsghdr *n, int type) { - __u8 val; - - if (strcmp(arg_val, "on") == 0) - val = 1; - else if (strcmp(arg_val, "off") == 0) - val = 0; - else - invarg("should be \"on\" or \"off\"", arg_name); + int ret; + __u8 val = parse_on_off(arg_name, arg_val, &ret); + if (ret) + exit(1); addattr8(n, 1024, type, val); } diff --git a/ip/iplink_can.c b/ip/iplink_can.c index 735ab94..6a26f3f 100644 --- a/ip/iplink_can.c +++ b/ip/iplink_can.c @@ -37,6 +37,7 @@ static void print_usage(FILE *f) "\t[ fd { on | off } ]\n" "\t[ fd-non-iso { on | off } ]\n" "\t[ presume-ack { on | off } ]\n" + "\t[ cc-len8-dlc { on | off } ]\n" "\n" "\t[ restart-ms TIME-MS ]\n" "\t[ restart ]\n" @@ -103,6 +104,7 @@ static void print_ctrlmode(FILE *f, __u32 cm) _PF(CAN_CTRLMODE_FD, "FD"); _PF(CAN_CTRLMODE_FD_NON_ISO, "FD-NON-ISO"); _PF(CAN_CTRLMODE_PRESUME_ACK, "PRESUME-ACK"); + _PF(CAN_CTRLMODE_CC_LEN8_DLC, "CC-LEN8-DLC"); #undef _PF if (cm) print_hex(PRINT_ANY, NULL, "%x", cm); @@ -211,6 +213,10 @@ static int can_parse_opt(struct link_util *lu, int argc, char **argv, NEXT_ARG(); set_ctrlmode("presume-ack", *argv, &cm, CAN_CTRLMODE_PRESUME_ACK); + } else if (matches(*argv, "cc-len8-dlc") == 0) { + NEXT_ARG(); + set_ctrlmode("cc-len8-dlc", *argv, &cm, + CAN_CTRLMODE_CC_LEN8_DLC); } else if (matches(*argv, "restart") == 0) { __u32 val = 1; diff --git a/ip/iplink_macvlan.c b/ip/iplink_macvlan.c index b966a61..79df17e 100644 --- a/ip/iplink_macvlan.c +++ b/ip/iplink_macvlan.c @@ -30,12 +30,13 @@ static void print_explain(struct link_util *lu, FILE *f) { fprintf(f, - "Usage: ... %s mode MODE [flag MODE_FLAG] MODE_OPTS\n" + "Usage: ... %s mode MODE [flag MODE_FLAG] MODE_OPTS [bcqueuelen BC_QUEUE_LEN]\n" "\n" "MODE: private | vepa | bridge | passthru | source\n" "MODE_FLAG: null | nopromisc\n" "MODE_OPTS: for mode \"source\":\n" - "\tmacaddr { { add | del } | set [ [ ... ] ] | flush }\n", + "\tmacaddr { { add | del } | set [ [ ... ] ] | flush }\n" + "BC_QUEUE_LEN: Length of the rx queue for broadcast/multicast: [0-4294967295]\n", lu->id ); } @@ -62,6 +63,14 @@ static int flag_arg(const char *arg) return -1; } +static int bc_queue_len_arg(const char *arg) +{ + fprintf(stderr, + "Error: argument of \"bcqueuelen\" must be a positive integer [0-4294967295], not \"%s\"\n", + arg); + return -1; +} + static int macvlan_parse_opt(struct link_util *lu, int argc, char **argv, struct nlmsghdr *n) { @@ -150,6 +159,14 @@ static int macvlan_parse_opt(struct link_util *lu, int argc, char **argv, } else if (matches(*argv, "nopromisc") == 0) { flags |= MACVLAN_FLAG_NOPROMISC; has_flags = 1; + } else if (matches(*argv, "bcqueuelen") == 0) { + __u32 bc_queue_len; + NEXT_ARG(); + + if (get_u32(&bc_queue_len, *argv, 0)) { + return bc_queue_len_arg(*argv); + } + addattr32(n, 1024, IFLA_MACVLAN_BC_QUEUE_LEN, bc_queue_len); } else if (matches(*argv, "help") == 0) { explain(lu); return -1; @@ -212,6 +229,18 @@ static void macvlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[] if (flags & MACVLAN_FLAG_NOPROMISC) print_bool(PRINT_ANY, "nopromisc", "nopromisc ", true); + if (tb[IFLA_MACVLAN_BC_QUEUE_LEN] && + RTA_PAYLOAD(tb[IFLA_MACVLAN_BC_QUEUE_LEN]) >= sizeof(__u32)) { + __u32 bc_queue_len = rta_getattr_u32(tb[IFLA_MACVLAN_BC_QUEUE_LEN]); + print_luint(PRINT_ANY, "bcqueuelen", "bcqueuelen %lu ", bc_queue_len); + } + + if (tb[IFLA_MACVLAN_BC_QUEUE_LEN_USED] && + RTA_PAYLOAD(tb[IFLA_MACVLAN_BC_QUEUE_LEN_USED]) >= sizeof(__u32)) { + __u32 bc_queue_len = rta_getattr_u32(tb[IFLA_MACVLAN_BC_QUEUE_LEN_USED]); + print_luint(PRINT_ANY, "usedbcqueuelen", "usedbcqueuelen %lu ", bc_queue_len); + } + /* in source mode, there are more options to print */ if (mode != MACVLAN_MODE_SOURCE) diff --git a/ip/iplink_vlan.c b/ip/iplink_vlan.c index 1e6817f..1426f2a 100644 --- a/ip/iplink_vlan.c +++ b/ip/iplink_vlan.c @@ -49,36 +49,30 @@ static int on_off(const char *msg, const char *arg) return -1; } +static int parse_qos_mapping(__u32 key, char *value, void *data) +{ + struct nlmsghdr *n = data; + struct ifla_vlan_qos_mapping m = { + .from = key, + }; + + if (get_u32(&m.to, value, 0)) + return 1; + + return addattr_l(n, 1024, IFLA_VLAN_QOS_MAPPING, &m, sizeof(m)); +} + static int vlan_parse_qos_map(int *argcp, char ***argvp, struct nlmsghdr *n, int attrtype) { - int argc = *argcp; - char **argv = *argvp; - struct ifla_vlan_qos_mapping m; struct rtattr *tail; tail = addattr_nest(n, 1024, attrtype); - while (argc > 0) { - char *colon = strchr(*argv, ':'); - - if (!colon) - break; - *colon = '\0'; - - if (get_u32(&m.from, *argv, 0)) - return 1; - if (get_u32(&m.to, colon + 1, 0)) - return 1; - argc--, argv++; - - addattr_l(n, 1024, IFLA_VLAN_QOS_MAPPING, &m, sizeof(m)); - } + if (parse_mapping(argcp, argvp, false, &parse_qos_mapping, n)) + return 1; addattr_nest_end(n, tail); - - *argcp = argc; - *argvp = argv; return 0; } diff --git a/ip/ipmacsec.c b/ip/ipmacsec.c index 18289ec..bf48e8b 100644 --- a/ip/ipmacsec.c +++ b/ip/ipmacsec.c @@ -23,8 +23,6 @@ #include "ll_map.h" #include "libgenl.h" -static const char * const values_on_off[] = { "off", "on" }; - static const char * const validate_str[] = { [MACSEC_VALIDATE_DISABLED] = "disabled", [MACSEC_VALIDATE_CHECK] = "check", @@ -108,25 +106,6 @@ static void ipmacsec_usage(void) exit(-1); } -static int one_of(const char *msg, const char *realval, const char * const *list, - size_t len, int *index) -{ - int i; - - for (i = 0; i < len; i++) { - if (matches(realval, list[i]) == 0) { - *index = i; - return 0; - } - } - - fprintf(stderr, "Error: argument of \"%s\" must be one of ", msg); - for (i = 0; i < len; i++) - fprintf(stderr, "\"%s\", ", list[i]); - fprintf(stderr, "not \"%s\"\n", realval); - return -1; -} - static int get_an(__u8 *val, const char *arg) { int ret = get_u8(val, arg, 0); @@ -559,8 +538,7 @@ static int do_offload(enum cmd c, int argc, char **argv) if (argc == 0) ipmacsec_usage(); - ret = one_of("offload", *argv, offload_str, ARRAY_SIZE(offload_str), - (int *)&offload); + offload = parse_one_of("offload", *argv, offload_str, ARRAY_SIZE(offload_str), &ret); if (ret) ipmacsec_usage(); @@ -1334,8 +1312,7 @@ static int macsec_parse_opt(struct link_util *lu, int argc, char **argv, NEXT_ARG(); int i; - ret = one_of("encrypt", *argv, values_on_off, - ARRAY_SIZE(values_on_off), &i); + i = parse_on_off("encrypt", *argv, &ret); if (ret != 0) return ret; addattr8(n, MACSEC_BUFLEN, IFLA_MACSEC_ENCRYPT, i); @@ -1343,8 +1320,7 @@ static int macsec_parse_opt(struct link_util *lu, int argc, char **argv, NEXT_ARG(); int i; - ret = one_of("send_sci", *argv, values_on_off, - ARRAY_SIZE(values_on_off), &i); + i = parse_on_off("send_sci", *argv, &ret); if (ret != 0) return ret; send_sci = i; @@ -1354,8 +1330,7 @@ static int macsec_parse_opt(struct link_util *lu, int argc, char **argv, NEXT_ARG(); int i; - ret = one_of("end_station", *argv, values_on_off, - ARRAY_SIZE(values_on_off), &i); + i = parse_on_off("end_station", *argv, &ret); if (ret != 0) return ret; es = i; @@ -1364,8 +1339,7 @@ static int macsec_parse_opt(struct link_util *lu, int argc, char **argv, NEXT_ARG(); int i; - ret = one_of("scb", *argv, values_on_off, - ARRAY_SIZE(values_on_off), &i); + i = parse_on_off("scb", *argv, &ret); if (ret != 0) return ret; scb = i; @@ -1374,8 +1348,7 @@ static int macsec_parse_opt(struct link_util *lu, int argc, char **argv, NEXT_ARG(); int i; - ret = one_of("protect", *argv, values_on_off, - ARRAY_SIZE(values_on_off), &i); + i = parse_on_off("protect", *argv, &ret); if (ret != 0) return ret; addattr8(n, MACSEC_BUFLEN, IFLA_MACSEC_PROTECT, i); @@ -1383,8 +1356,7 @@ static int macsec_parse_opt(struct link_util *lu, int argc, char **argv, NEXT_ARG(); int i; - ret = one_of("replay", *argv, values_on_off, - ARRAY_SIZE(values_on_off), &i); + i = parse_on_off("replay", *argv, &ret); if (ret != 0) return ret; replay_protect = !!i; @@ -1395,9 +1367,8 @@ static int macsec_parse_opt(struct link_util *lu, int argc, char **argv, invarg("expected replay window size", *argv); } else if (strcmp(*argv, "validate") == 0) { NEXT_ARG(); - ret = one_of("validate", *argv, - validate_str, ARRAY_SIZE(validate_str), - (int *)&validate); + validate = parse_one_of("validate", *argv, validate_str, + ARRAY_SIZE(validate_str), &ret); if (ret != 0) return ret; addattr8(n, MACSEC_BUFLEN, @@ -1411,9 +1382,8 @@ static int macsec_parse_opt(struct link_util *lu, int argc, char **argv, invarg("expected an { 0..3 }", *argv); } else if (strcmp(*argv, "offload") == 0) { NEXT_ARG(); - ret = one_of("offload", *argv, - offload_str, ARRAY_SIZE(offload_str), - (int *)&offload); + offload = parse_one_of("offload", *argv, offload_str, + ARRAY_SIZE(offload_str), &ret); if (ret != 0) return ret; addattr8(n, MACSEC_BUFLEN, diff --git a/ip/ipmonitor.c b/ip/ipmonitor.c index 685be52..50aa013 100644 --- a/ip/ipmonitor.c +++ b/ip/ipmonitor.c @@ -30,9 +30,10 @@ int listen_all_nsid; static void usage(void) { fprintf(stderr, - "Usage: ip monitor [ all | LISTofOBJECTS ] [ FILE ] [ label ] [all-nsid] [dev DEVICE]\n" - "LISTofOBJECTS := link | address | route | mroute | prefix |\n" - " neigh | netconf | rule | nsid\n" + "Usage: ip monitor [ all | OBJECTS ] [ FILE ] [ label ] [ all-nsid ]\n" + " [ dev DEVICE ]\n" + "OBJECTS := address | link | mroute | neigh | netconf |\n" + " nexthop | nsid | prefix | route | rule\n" "FILE := file FILENAME\n"); exit(-1); } diff --git a/ip/ipneigh.c b/ip/ipneigh.c index 678b403..2d6b7f5 100644 --- a/ip/ipneigh.c +++ b/ip/ipneigh.c @@ -50,15 +50,15 @@ static void usage(void) { fprintf(stderr, "Usage: ip neigh { add | del | change | replace }\n" - " { ADDR [ lladdr LLADDR ] [ nud STATE ] | proxy ADDR } [ dev DEV ]\n" - " [ router ] [ extern_learn ] [ protocol PROTO ]\n" + " { ADDR [ lladdr LLADDR ] [ nud STATE ] proxy ADDR }\n" + " [ dev DEV ] [ router ] [ extern_learn ] [ protocol PROTO ]\n" "\n" " ip neigh { show | flush } [ proxy ] [ to PREFIX ] [ dev DEV ] [ nud STATE ]\n" " [ vrf NAME ]\n" " ip neigh get { ADDR | proxy ADDR } dev DEV\n" "\n" - "STATE := { permanent | noarp | stale | reachable | none |\n" - " incomplete | delay | probe | failed }\n"); + "STATE := { delay | failed | incomplete | noarp | none |\n" + " permanent | probe | reachable | stale }\n"); exit(-1); } diff --git a/ip/ipnetconf.c b/ip/ipnetconf.c index 0e946ca..bb0ebe1 100644 --- a/ip/ipnetconf.c +++ b/ip/ipnetconf.c @@ -41,14 +41,6 @@ static void usage(void) exit(-1); } -static void print_onoff(FILE *fp, const char *flag, __u32 val) -{ - if (is_json_context()) - print_bool(PRINT_JSON, flag, NULL, val); - else - fprintf(fp, "%s %s ", flag, val ? "on" : "off"); -} - static struct rtattr *netconf_rta(struct netconfmsg *ncm) { return (struct rtattr *)((char *)ncm @@ -117,8 +109,8 @@ int print_netconf(struct rtnl_ctrl_data *ctrl, struct nlmsghdr *n, void *arg) } if (tb[NETCONFA_FORWARDING]) - print_onoff(fp, "forwarding", - rta_getattr_u32(tb[NETCONFA_FORWARDING])); + print_on_off(PRINT_ANY, "forwarding", "forwarding %s ", + rta_getattr_u32(tb[NETCONFA_FORWARDING])); if (tb[NETCONFA_RP_FILTER]) { __u32 rp_filter = rta_getattr_u32(tb[NETCONFA_RP_FILTER]); @@ -133,19 +125,21 @@ int print_netconf(struct rtnl_ctrl_data *ctrl, struct nlmsghdr *n, void *arg) } if (tb[NETCONFA_MC_FORWARDING]) - print_onoff(fp, "mc_forwarding", - rta_getattr_u32(tb[NETCONFA_MC_FORWARDING])); + print_on_off(PRINT_ANY, "mc_forwarding", "mc_forwarding %s ", + rta_getattr_u32(tb[NETCONFA_MC_FORWARDING])); if (tb[NETCONFA_PROXY_NEIGH]) - print_onoff(fp, "proxy_neigh", - rta_getattr_u32(tb[NETCONFA_PROXY_NEIGH])); + print_on_off(PRINT_ANY, "proxy_neigh", "proxy_neigh %s ", + rta_getattr_u32(tb[NETCONFA_PROXY_NEIGH])); if (tb[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]) - print_onoff(fp, "ignore_routes_with_linkdown", - rta_getattr_u32(tb[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN])); + print_on_off(PRINT_ANY, "ignore_routes_with_linkdown", + "ignore_routes_with_linkdown %s ", + rta_getattr_u32(tb[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN])); if (tb[NETCONFA_INPUT]) - print_onoff(fp, "input", rta_getattr_u32(tb[NETCONFA_INPUT])); + print_on_off(PRINT_ANY, "input", "input %s ", + rta_getattr_u32(tb[NETCONFA_INPUT])); close_json_object(); print_string(PRINT_FP, NULL, "\n", NULL); diff --git a/ip/ipnetns.c b/ip/ipnetns.c index 14e8e08..1203534 100644 --- a/ip/ipnetns.c +++ b/ip/ipnetns.c @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #define _ATFILE_SOURCE +#include #include #include #include @@ -578,18 +579,18 @@ static int netns_pids(int argc, char **argv) { const char *name; char net_path[PATH_MAX]; - int netns; + int netns = -1, ret = -1; struct stat netst; DIR *dir; struct dirent *entry; if (argc < 1) { fprintf(stderr, "No netns name specified\n"); - return -1; + goto out; } if (argc > 1) { fprintf(stderr, "extra arguments specified\n"); - return -1; + goto out; } name = argv[0]; @@ -598,18 +599,18 @@ static int netns_pids(int argc, char **argv) if (netns < 0) { fprintf(stderr, "Cannot open network namespace: %s\n", strerror(errno)); - return -1; + goto out; } if (fstat(netns, &netst) < 0) { fprintf(stderr, "Stat of netns failed: %s\n", strerror(errno)); - return -1; + goto out; } dir = opendir("/proc/"); if (!dir) { fprintf(stderr, "Open of /proc failed: %s\n", strerror(errno)); - return -1; + goto out; } while ((entry = readdir(dir))) { char pid_net_path[PATH_MAX]; @@ -626,15 +627,19 @@ static int netns_pids(int argc, char **argv) printf("%s\n", entry->d_name); } } + ret = 0; closedir(dir); - return 0; +out: + if (netns >= 0) + close(netns); + return ret; } int netns_identify_pid(const char *pidstr, char *name, int len) { char net_path[PATH_MAX]; - int netns; + int netns = -1, ret = -1; struct stat netst; DIR *dir; struct dirent *entry; @@ -646,22 +651,24 @@ int netns_identify_pid(const char *pidstr, char *name, int len) if (netns < 0) { fprintf(stderr, "Cannot open network namespace: %s\n", strerror(errno)); - return -1; + goto out; } if (fstat(netns, &netst) < 0) { fprintf(stderr, "Stat of netns failed: %s\n", strerror(errno)); - return -1; + goto out; } dir = opendir(NETNS_RUN_DIR); if (!dir) { /* Succeed treat a missing directory as an empty directory */ - if (errno == ENOENT) - return 0; + if (errno == ENOENT) { + ret = 0; + goto out; + } fprintf(stderr, "Failed to open directory %s:%s\n", NETNS_RUN_DIR, strerror(errno)); - return -1; + goto out; } while ((entry = readdir(dir))) { @@ -684,8 +691,12 @@ int netns_identify_pid(const char *pidstr, char *name, int len) strlcpy(name, entry->d_name, len); } } + ret = 0; closedir(dir); - return 0; +out: + if (netns >= 0) + close(netns); + return ret; } @@ -801,6 +812,7 @@ static int netns_add(int argc, char **argv, bool create) const char *name; pid_t pid; int fd; + int lock; int made_netns_run_dir_mount = 0; if (create) { @@ -831,12 +843,37 @@ static int netns_add(int argc, char **argv, bool create) * namespace file in one namespace will unmount the network namespace * file in all namespaces allowing the network namespace to be freed * sooner. + * These setup steps need to happen only once, as if multiple ip processes + * try to attempt the same operation at the same time, the mountpoints will + * be recursively created multiple times, eventually causing the system + * to lock up. For example, this has been observed when multiple netns + * namespaces are created in parallel at boot. See: + * https://bugs.debian.org/949235 + * Try to take an exclusive file lock on the top level directory to ensure + * this cannot happen, but proceed nonetheless if it cannot happen for any + * reason. */ + lock = open(NETNS_RUN_DIR, O_RDONLY|O_DIRECTORY, 0); + if (lock < 0) { + fprintf(stderr, "Cannot open netns runtime directory \"%s\": %s\n", + NETNS_RUN_DIR, strerror(errno)); + return -1; + } + if (flock(lock, LOCK_EX) < 0) { + fprintf(stderr, "Warning: could not flock netns runtime directory \"%s\": %s\n", + NETNS_RUN_DIR, strerror(errno)); + close(lock); + lock = -1; + } while (mount("", NETNS_RUN_DIR, "none", MS_SHARED | MS_REC, NULL)) { /* Fail unless we need to make the mount point */ if (errno != EINVAL || made_netns_run_dir_mount) { fprintf(stderr, "mount --make-shared %s failed: %s\n", NETNS_RUN_DIR, strerror(errno)); + if (lock != -1) { + flock(lock, LOCK_UN); + close(lock); + } return -1; } @@ -844,10 +881,18 @@ static int netns_add(int argc, char **argv, bool create) if (mount(NETNS_RUN_DIR, NETNS_RUN_DIR, "none", MS_BIND | MS_REC, NULL)) { fprintf(stderr, "mount --bind %s %s failed: %s\n", NETNS_RUN_DIR, NETNS_RUN_DIR, strerror(errno)); + if (lock != -1) { + flock(lock, LOCK_UN); + close(lock); + } return -1; } made_netns_run_dir_mount = 1; } + if (lock != -1) { + flock(lock, LOCK_UN); + close(lock); + } /* Create the filesystem state */ fd = open(netns_path, O_RDONLY|O_CREAT|O_EXCL, 0); diff --git a/ip/ipnexthop.c b/ip/ipnexthop.c index 22c6649..f0658a9 100644 --- a/ip/ipnexthop.c +++ b/ip/ipnexthop.c @@ -38,12 +38,12 @@ static void usage(void) fprintf(stderr, "Usage: ip nexthop { list | flush } [ protocol ID ] SELECTOR\n" " ip nexthop { add | replace } id ID NH [ protocol ID ]\n" - " ip nexthop { get| del } id ID\n" + " ip nexthop { get | del } id ID\n" "SELECTOR := [ id ID ] [ dev DEV ] [ vrf NAME ] [ master DEV ]\n" " [ groups ] [ fdb ]\n" "NH := { blackhole | [ via ADDRESS ] [ dev DEV ] [ onlink ]\n" - " [ encap ENCAPTYPE ENCAPHDR ] | group GROUP ] }\n" - "GROUP := [ id[,weight]>//... ]\n" + " [ encap ENCAPTYPE ENCAPHDR ] | group GROUP [ fdb ] }\n" + "GROUP := [ //... ]\n" "ENCAPTYPE := [ mpls ]\n" "ENCAPHDR := [ MPLSLABEL ]\n"); exit(-1); @@ -263,8 +263,7 @@ int print_nexthop(struct nlmsghdr *n, void *arg) rtnl_rtprot_n2a(nhm->nh_protocol, b1, sizeof(b1))); } - if (tb[NHA_OIF]) - print_rt_flags(fp, nhm->nh_flags); + print_rt_flags(fp, nhm->nh_flags); if (tb[NHA_FDB]) print_null(PRINT_ANY, "fdb", "fdb", NULL); @@ -278,8 +277,9 @@ int print_nexthop(struct nlmsghdr *n, void *arg) static int add_nh_group_attr(struct nlmsghdr *n, int maxlen, char *argv) { - struct nexthop_grp *grps; + struct nexthop_grp *grps = NULL; int count = 0, i; + int err = -1; char *sep, *wsep; if (*argv != '\0') @@ -293,11 +293,11 @@ static int add_nh_group_attr(struct nlmsghdr *n, int maxlen, char *argv) } if (count == 0) - return -1; + goto out; grps = calloc(count, sizeof(*grps)); if (!grps) - return -1; + goto out; for (i = 0; i < count; ++i) { sep = strchr(argv, '/'); @@ -309,7 +309,7 @@ static int add_nh_group_attr(struct nlmsghdr *n, int maxlen, char *argv) *wsep = '\0'; if (get_unsigned(&grps[i].id, argv, 0)) - return -1; + goto out; if (wsep) { unsigned int w; @@ -325,7 +325,10 @@ static int add_nh_group_attr(struct nlmsghdr *n, int maxlen, char *argv) argv = sep + 1; } - return addattr_l(n, maxlen, NHA_GROUP, grps, count * sizeof(*grps)); + err = addattr_l(n, maxlen, NHA_GROUP, grps, count * sizeof(*grps)); +out: + free(grps); + return err; } static int ipnh_modify(int cmd, unsigned int flags, int argc, char **argv) diff --git a/ip/iproute.c b/ip/iproute.c index 05ec2c2..5853f02 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -362,6 +362,8 @@ void print_rt_flags(FILE *fp, unsigned int flags) print_string(PRINT_ANY, NULL, "%s ", "pervasive"); if (flags & RTNH_F_OFFLOAD) print_string(PRINT_ANY, NULL, "%s ", "offload"); + if (flags & RTNH_F_TRAP) + print_string(PRINT_ANY, NULL, "%s ", "trap"); if (flags & RTM_F_NOTIFY) print_string(PRINT_ANY, NULL, "%s ", "notify"); if (flags & RTNH_F_LINKDOWN) @@ -372,6 +374,8 @@ void print_rt_flags(FILE *fp, unsigned int flags) print_string(PRINT_ANY, NULL, "%s ", "rt_offload"); if (flags & RTM_F_TRAP) print_string(PRINT_ANY, NULL, "%s ", "rt_trap"); + if (flags & RTM_F_OFFLOAD_FAILED) + print_string(PRINT_ANY, NULL, "%s ", "rt_offload_failed"); close_json_array(PRINT_JSON, NULL); } @@ -792,9 +796,10 @@ int print_route(struct nlmsghdr *n, void *arg) "%s/%u", rt_addr_n2a_rta(family, tb[RTA_DST]), r->rtm_dst_len); } else { - format_host_rta_r(family, tb[RTA_DST], + const char *hostname = format_host_rta_r(family, tb[RTA_DST], b1, sizeof(b1)); - + if (hostname) + strncpy(b1, hostname, sizeof(b1) - 1); } } else if (r->rtm_dst_len) { snprintf(b1, sizeof(b1), "0/%d ", r->rtm_dst_len); @@ -814,8 +819,10 @@ int print_route(struct nlmsghdr *n, void *arg) rt_addr_n2a_rta(family, tb[RTA_SRC]), r->rtm_src_len); } else { - format_host_rta_r(family, tb[RTA_SRC], + const char *hostname = format_host_rta_r(family, tb[RTA_SRC], b1, sizeof(b1)); + if (hostname) + strncpy(b1, hostname, sizeof(b1) - 1); } print_color_string(PRINT_ANY, color, "from", "from %s ", b1); @@ -2067,7 +2074,18 @@ static int iproute_get(int argc, char **argv) if (addr.bytelen) addattr_l(&req.n, sizeof(req), RTA_DST, &addr.data, addr.bytelen); - req.r.rtm_dst_len = addr.bitlen; + if (req.r.rtm_family == AF_INET && addr.bitlen != 32) { + fprintf(stderr, + "Warning: /%u as prefix is invalid, only /32 (or none) is supported.\n", + addr.bitlen); + req.r.rtm_dst_len = 32; + } else if (req.r.rtm_family == AF_INET6 && addr.bitlen != 128) { + fprintf(stderr, + "Warning: /%u as prefix is invalid, only /128 (or none) is supported.\n", + addr.bitlen); + req.r.rtm_dst_len = 128; + } else + req.r.rtm_dst_len = addr.bitlen; address_found = true; } argc--; argv++; diff --git a/ip/iproute_lwtunnel.c b/ip/iproute_lwtunnel.c index 9b4f088..566fc7e 100644 --- a/ip/iproute_lwtunnel.c +++ b/ip/iproute_lwtunnel.c @@ -294,6 +294,11 @@ static void print_encap_seg6local(FILE *fp, struct rtattr *encap) rtnl_rttable_n2a(rta_getattr_u32(tb[SEG6_LOCAL_TABLE]), b1, sizeof(b1))); + if (tb[SEG6_LOCAL_VRFTABLE]) + print_string(PRINT_ANY, "vrftable", "vrftable %s ", + rtnl_rttable_n2a(rta_getattr_u32(tb[SEG6_LOCAL_VRFTABLE]), + b1, sizeof(b1))); + if (tb[SEG6_LOCAL_NH4]) { print_string(PRINT_ANY, "nh4", "nh4 %s ", rt_addr_n2a_rta(AF_INET, tb[SEG6_LOCAL_NH4])); @@ -860,9 +865,10 @@ static int lwt_parse_bpf(struct rtattr *rta, size_t len, static int parse_encap_seg6local(struct rtattr *rta, size_t len, int *argcp, char ***argvp) { - int segs_ok = 0, hmac_ok = 0, table_ok = 0, nh4_ok = 0, nh6_ok = 0; - int iif_ok = 0, oif_ok = 0, action_ok = 0, srh_ok = 0, bpf_ok = 0; - __u32 action = 0, table, iif, oif; + int segs_ok = 0, hmac_ok = 0, table_ok = 0, vrftable_ok = 0; + int nh4_ok = 0, nh6_ok = 0, iif_ok = 0, oif_ok = 0; + __u32 action = 0, table, vrftable, iif, oif; + int action_ok = 0, srh_ok = 0, bpf_ok = 0; struct ipv6_sr_hdr *srh; char **argv = *argvp; int argc = *argcp; @@ -885,8 +891,17 @@ static int parse_encap_seg6local(struct rtattr *rta, size_t len, int *argcp, NEXT_ARG(); if (table_ok++) duparg2("table", *argv); - rtnl_rttable_a2n(&table, *argv); + if (rtnl_rttable_a2n(&table, *argv)) + invarg("invalid table id\n", *argv); ret = rta_addattr32(rta, len, SEG6_LOCAL_TABLE, table); + } else if (strcmp(*argv, "vrftable") == 0) { + NEXT_ARG(); + if (vrftable_ok++) + duparg2("vrftable", *argv); + if (rtnl_rttable_a2n(&vrftable, *argv)) + invarg("invalid vrf table id\n", *argv); + ret = rta_addattr32(rta, len, SEG6_LOCAL_VRFTABLE, + vrftable); } else if (strcmp(*argv, "nh4") == 0) { NEXT_ARG(); if (nh4_ok++) diff --git a/ip/iprule.c b/ip/iprule.c index 9f5d998..4166073 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -44,7 +44,8 @@ static void usage(void) "Usage: ip rule { add | del } SELECTOR ACTION\n" " ip rule { flush | save | restore }\n" " ip rule [ list [ SELECTOR ]]\n" - "SELECTOR := [ not ] [ from PREFIX ] [ to PREFIX ] [ tos TOS ] [ fwmark FWMARK[/MASK] ]\n" + "SELECTOR := [ not ] [ from PREFIX ] [ to PREFIX ] [ tos TOS ]\n" + " [ fwmark FWMARK[/MASK] ]\n" " [ iif STRING ] [ oif STRING ] [ pref NUMBER ] [ l3mdev ]\n" " [ uidrange NUMBER-NUMBER ]\n" " [ ipproto PROTOCOL ]\n" diff --git a/ip/iptunnel.c b/ip/iptunnel.c index 696f3b9..2369ee0 100644 --- a/ip/iptunnel.c +++ b/ip/iptunnel.c @@ -34,7 +34,8 @@ static void usage(void) { fprintf(stderr, "Usage: ip tunnel { add | change | del | show | prl | 6rd } [ NAME ]\n" - " [ mode { ipip | gre | sit | isatap | vti } ] [ remote ADDR ] [ local ADDR ]\n" + " [ mode { gre | ipip | isatap | sit | vti } ]\n" + " [ remote ADDR ] [ local ADDR ]\n" " [ [i|o]seq ] [ [i|o]key KEY ] [ [i|o]csum ]\n" " [ prl-default ADDR ] [ prl-nodefault ADDR ] [ prl-delete ADDR ]\n" " [ 6rd-prefix ADDR ] [ 6rd-relay_prefix ADDR ] [ 6rd-reset ]\n" diff --git a/ip/iptuntap.c b/ip/iptuntap.c index 82e3849..e9cc7c0 100644 --- a/ip/iptuntap.c +++ b/ip/iptuntap.c @@ -541,14 +541,6 @@ static void print_mq(FILE *f, struct rtattr *tb[]) } } -static void print_onoff(FILE *f, const char *flag, __u8 val) -{ - if (is_json_context()) - print_bool(PRINT_JSON, flag, NULL, !!val); - else - fprintf(f, "%s %s ", flag, val ? "on" : "off"); -} - static void print_type(FILE *f, __u8 type) { SPRINT_BUF(buf); @@ -573,17 +565,19 @@ static void tun_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) print_type(f, rta_getattr_u8(tb[IFLA_TUN_TYPE])); if (tb[IFLA_TUN_PI]) - print_onoff(f, "pi", rta_getattr_u8(tb[IFLA_TUN_PI])); + print_on_off(PRINT_ANY, "pi", "pi %s ", + rta_getattr_u8(tb[IFLA_TUN_PI])); if (tb[IFLA_TUN_VNET_HDR]) { - print_onoff(f, "vnet_hdr", - rta_getattr_u8(tb[IFLA_TUN_VNET_HDR])); + print_on_off(PRINT_ANY, "vnet_hdr", "vnet_hdr %s ", + rta_getattr_u8(tb[IFLA_TUN_VNET_HDR])); } print_mq(f, tb); if (tb[IFLA_TUN_PERSIST]) - print_onoff(f, "persist", rta_getattr_u8(tb[IFLA_TUN_PERSIST])); + print_on_off(PRINT_ANY, "persist", "persist %s ", + rta_getattr_u8(tb[IFLA_TUN_PERSIST])); if (tb[IFLA_TUN_OWNER]) print_owner(f, rta_getattr_u32(tb[IFLA_TUN_OWNER])); diff --git a/ip/ipvrf.c b/ip/ipvrf.c index 28dd8e2..9157803 100644 --- a/ip/ipvrf.c +++ b/ip/ipvrf.c @@ -256,8 +256,8 @@ static int prog_load(int idx) BPF_EXIT_INSN(), }; - return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog), - "GPL", bpf_log_buf, sizeof(bpf_log_buf)); + return bpf_program_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog), + "GPL", bpf_log_buf, sizeof(bpf_log_buf)); } static int vrf_configure_cgroup(const char *path, int ifindex) @@ -278,8 +278,8 @@ static int vrf_configure_cgroup(const char *path, int ifindex) */ prog_fd = prog_load(ifindex); if (prog_fd < 0) { - fprintf(stderr, "Failed to load BPF prog: '%s'\n", - strerror(errno)); + fprintf(stderr, "Failed to load BPF prog: '%s'\n%s", + strerror(errno), bpf_log_buf); if (errno != EPERM) { fprintf(stderr, @@ -288,7 +288,7 @@ static int vrf_configure_cgroup(const char *path, int ifindex) goto out; } - if (bpf_prog_attach_fd(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE)) { + if (bpf_program_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE)) { fprintf(stderr, "Failed to attach prog to cgroup: '%s'\n", strerror(errno)); goto out; diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c index cac8ba2..8a79403 100644 --- a/ip/ipxfrm.c +++ b/ip/ipxfrm.c @@ -649,6 +649,10 @@ static void xfrm_output_mark_print(struct rtattr *tb[], FILE *fp) __u32 output_mark = rta_getattr_u32(tb[XFRMA_OUTPUT_MARK]); fprintf(fp, "output-mark 0x%x", output_mark); + if (tb[XFRMA_SET_MARK_MASK]) { + __u32 mask = rta_getattr_u32(tb[XFRMA_SET_MARK_MASK]); + fprintf(fp, "/0x%x", mask); + } } int xfrm_parse_mark(struct xfrm_mark *mark, int *argcp, char ***argvp) @@ -912,6 +916,19 @@ static int xfrm_selector_iszero(struct xfrm_selector *s) return (memcmp(&s0, s, sizeof(s0)) == 0); } +static void xfrm_sec_ctx_print(FILE *fp, struct rtattr *attr) +{ + struct xfrm_user_sec_ctx *sctx; + + fprintf(fp, "\tsecurity context "); + + if (RTA_PAYLOAD(attr) < sizeof(*sctx)) + fprintf(fp, "(ERROR truncated)"); + + sctx = RTA_DATA(attr); + fprintf(fp, "%.*s %s", sctx->ctx_len, (char *)(sctx + 1), _SL_); +} + void xfrm_state_info_print(struct xfrm_usersa_info *xsinfo, struct rtattr *tb[], FILE *fp, const char *prefix, const char *title, bool nokeys) @@ -979,19 +996,8 @@ void xfrm_state_info_print(struct xfrm_usersa_info *xsinfo, xfrm_stats_print(&xsinfo->stats, fp, buf); } - if (tb[XFRMA_SEC_CTX]) { - struct xfrm_user_sec_ctx *sctx; - - fprintf(fp, "\tsecurity context "); - - if (RTA_PAYLOAD(tb[XFRMA_SEC_CTX]) < sizeof(*sctx)) - fprintf(fp, "(ERROR truncated)"); - - sctx = RTA_DATA(tb[XFRMA_SEC_CTX]); - - fprintf(fp, "%s %s", (char *)(sctx + 1), _SL_); - } - + if (tb[XFRMA_SEC_CTX]) + xfrm_sec_ctx_print(fp, tb[XFRMA_SEC_CTX]); } void xfrm_policy_info_print(struct xfrm_userpolicy_info *xpinfo, @@ -1002,19 +1008,8 @@ void xfrm_policy_info_print(struct xfrm_userpolicy_info *xpinfo, xfrm_selector_print(&xpinfo->sel, preferred_family, fp, title); - if (tb[XFRMA_SEC_CTX]) { - struct xfrm_user_sec_ctx *sctx; - - fprintf(fp, "\tsecurity context "); - - if (RTA_PAYLOAD(tb[XFRMA_SEC_CTX]) < sizeof(*sctx)) - fprintf(fp, "(ERROR truncated)"); - - sctx = RTA_DATA(tb[XFRMA_SEC_CTX]); - - fprintf(fp, "%s ", (char *)(sctx + 1)); - fprintf(fp, "%s", _SL_); - } + if (tb[XFRMA_SEC_CTX]) + xfrm_sec_ctx_print(fp, tb[XFRMA_SEC_CTX]); if (prefix) strlcat(buf, prefix, sizeof(buf)); diff --git a/ip/link_gre.c b/ip/link_gre.c index 0461e5d..6d4a8be 100644 --- a/ip/link_gre.c +++ b/ip/link_gre.c @@ -536,10 +536,10 @@ static void gre_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (erspan_dir == 0) print_string(PRINT_ANY, "erspan_dir", - "erspan_dir ingress ", NULL); + "erspan_dir %s ", "ingress"); else print_string(PRINT_ANY, "erspan_dir", - "erspan_dir egress ", NULL); + "erspan_dir %s ", "egress"); } if (tb[IFLA_GRE_ERSPAN_HWID]) { diff --git a/ip/link_gre6.c b/ip/link_gre6.c index 9d270f4..f33598a 100644 --- a/ip/link_gre6.c +++ b/ip/link_gre6.c @@ -594,10 +594,10 @@ static void gre_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (erspan_dir == 0) print_string(PRINT_ANY, "erspan_dir", - "erspan_dir ingress ", NULL); + "erspan_dir %s ", "ingress"); else print_string(PRINT_ANY, "erspan_dir", - "erspan_dir egress ", NULL); + "erspan_dir %s ", "egress"); } if (tb[IFLA_GRE_ERSPAN_HWID]) { diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c index ddf784c..a4f452f 100644 --- a/ip/xfrm_state.c +++ b/ip/xfrm_state.c @@ -62,7 +62,7 @@ static void usage(void) " [ flag FLAG-LIST ] [ sel SELECTOR ] [ LIMIT-LIST ] [ encap ENCAP ]\n" " [ coa ADDR[/PLEN] ] [ ctx CTX ] [ extra-flag EXTRA-FLAG-LIST ]\n" " [ offload [dev DEV] dir DIR ]\n" - " [ output-mark OUTPUT-MARK ]\n" + " [ output-mark OUTPUT-MARK [ mask MASK ] ]\n" " [ if_id IF_ID ]\n" "Usage: ip xfrm state allocspi ID [ mode MODE ] [ mark MARK [ mask MASK ] ]\n" " [ reqid REQID ] [ seq SEQ ] [ min SPI max SPI ]\n" @@ -328,7 +328,7 @@ static int xfrm_state_modify(int cmd, unsigned int flags, int argc, char **argv) struct xfrm_user_sec_ctx sctx; char str[CTX_BUF_SIZE]; } ctx = {}; - __u32 output_mark = 0; + struct xfrm_mark output_mark = {0, 0}; bool is_if_id_set = false; __u32 if_id = 0; @@ -448,8 +448,18 @@ static int xfrm_state_modify(int cmd, unsigned int flags, int argc, char **argv) } } else if (strcmp(*argv, "output-mark") == 0) { NEXT_ARG(); - if (get_u32(&output_mark, *argv, 0)) + if (get_u32(&output_mark.v, *argv, 0)) invarg("value after \"output-mark\" is invalid", *argv); + if (argc > 1) { + NEXT_ARG(); + if (strcmp(*argv, "mask") == 0) { + NEXT_ARG(); + if (get_u32(&output_mark.m, *argv, 0)) + invarg("mask value is invalid\n", *argv); + } else { + PREV_ARG(); + } + } } else if (strcmp(*argv, "if_id") == 0) { NEXT_ARG(); if (get_u32(&if_id, *argv, 0)) @@ -741,8 +751,11 @@ static int xfrm_state_modify(int cmd, unsigned int flags, int argc, char **argv) } } - if (output_mark) - addattr32(&req.n, sizeof(req.buf), XFRMA_OUTPUT_MARK, output_mark); + if (output_mark.v) + addattr32(&req.n, sizeof(req.buf), XFRMA_OUTPUT_MARK, output_mark.v); + + if (output_mark.m) + addattr32(&req.n, sizeof(req.buf), XFRMA_SET_MARK_MASK, output_mark.m); if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0) exit(1); diff --git a/lib/Makefile b/lib/Makefile index 7cba185..6c98f9a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -3,11 +3,20 @@ include ../config.mk CFLAGS += -fPIC -UTILOBJ = utils.o rt_names.o ll_map.o ll_types.o ll_proto.o ll_addr.o \ - inet_proto.o namespace.o json_writer.o json_print.o \ - names.o color.o bpf.o exec.o fs.o cg_map.o +UTILOBJ = utils.o utils_math.o rt_names.o ll_map.o ll_types.o ll_proto.o ll_addr.o \ + inet_proto.o namespace.o json_writer.o json_print.o json_print_math.o \ + names.o color.o bpf_legacy.o bpf_glue.o exec.o fs.o cg_map.o + +ifeq ($(HAVE_ELF),y) +ifeq ($(HAVE_LIBBPF),y) +UTILOBJ += bpf_libbpf.o +endif +endif NLOBJ=libgenl.o libnetlink.o +ifeq ($(HAVE_MNL),y) +NLOBJ += mnl_utils.o +endif all: libnetlink.a libutil.a diff --git a/lib/bpf_glue.c b/lib/bpf_glue.c new file mode 100644 index 0000000..eaa9504 --- /dev/null +++ b/lib/bpf_glue.c @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * bpf_glue.c: BPF code to call both legacy and libbpf code + * Authors: Hangbin Liu + * + */ +#include + +#include "bpf_util.h" +#ifdef HAVE_LIBBPF +#include +#endif + +int bpf_program_load(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t size_insns, const char *license, char *log, + size_t size_log) +{ +#ifdef HAVE_LIBBPF + return bpf_load_program(type, insns, size_insns / sizeof(struct bpf_insn), + license, 0, log, size_log); +#else + return bpf_prog_load_dev(type, insns, size_insns, license, 0, log, size_log); +#endif +} + +int bpf_program_attach(int prog_fd, int target_fd, enum bpf_attach_type type) +{ +#ifdef HAVE_LIBBPF + return bpf_prog_attach(prog_fd, target_fd, type, 0); +#else + return bpf_prog_attach_fd(prog_fd, target_fd, type); +#endif +} + +#ifdef HAVE_LIBBPF +static const char *_libbpf_compile_version = LIBBPF_VERSION; +static char _libbpf_version[10] = {}; + +const char *get_libbpf_version(void) +{ + /* Start by copying compile-time version into buffer so we have a + * fallback value in case we are dynamically linked, or can't find a + * version in /proc/self/maps below. + */ + strncpy(_libbpf_version, _libbpf_compile_version, + sizeof(_libbpf_version)-1); +#ifdef LIBBPF_DYNAMIC + char buf[PATH_MAX], *s; + bool found = false; + FILE *fp; + + /* When dynamically linking against libbpf, we can't be sure that the + * version we discovered at compile time is actually the one we are + * using at runtime. This can lead to hard-to-debug errors, so we try to + * discover the correct version at runtime. + * + * The simple solution to this would be if libbpf itself exported a + * version in its API. But since it doesn't, we work around this by + * parsing the mappings of the binary at runtime, looking for the full + * filename of libbpf.so and using that. + */ + fp = fopen("/proc/self/maps", "r"); + if (fp == NULL) + goto out; + + while ((s = fgets(buf, sizeof(buf), fp)) != NULL) { + if ((s = strstr(buf, "libbpf.so.")) != NULL) { + strncpy(_libbpf_version, s+10, sizeof(_libbpf_version)-1); + strtok(_libbpf_version, "\n"); + found = true; + break; + } + } + + fclose(fp); +out: + if (!found) + fprintf(stderr, "Couldn't find runtime libbpf version - falling back to compile-time value!\n"); +#endif /* LIBBPF_DYNAMIC */ + + _libbpf_version[sizeof(_libbpf_version)-1] = '\0'; + return _libbpf_version; +} +#else +const char *get_libbpf_version(void) +{ + return NULL; +} +#endif /* HAVE_LIBBPF */ diff --git a/lib/bpf_legacy.c b/lib/bpf_legacy.c new file mode 100644 index 0000000..7ec9ce9 --- /dev/null +++ b/lib/bpf_legacy.c @@ -0,0 +1,3335 @@ +/* + * bpf.c BPF common code + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Daniel Borkmann + * Jiri Pirko + * Alexei Starovoitov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_ELF +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "utils.h" +#include "json_print.h" + +#include "bpf_util.h" +#include "bpf_elf.h" +#include "bpf_scm.h" + +struct bpf_prog_meta { + const char *type; + const char *subdir; + const char *section; + bool may_uds_export; +}; + +static const enum bpf_prog_type __bpf_types[] = { + BPF_PROG_TYPE_SCHED_CLS, + BPF_PROG_TYPE_SCHED_ACT, + BPF_PROG_TYPE_XDP, + BPF_PROG_TYPE_LWT_IN, + BPF_PROG_TYPE_LWT_OUT, + BPF_PROG_TYPE_LWT_XMIT, +}; + +static const struct bpf_prog_meta __bpf_prog_meta[] = { + [BPF_PROG_TYPE_SCHED_CLS] = { + .type = "cls", + .subdir = "tc", + .section = ELF_SECTION_CLASSIFIER, + .may_uds_export = true, + }, + [BPF_PROG_TYPE_SCHED_ACT] = { + .type = "act", + .subdir = "tc", + .section = ELF_SECTION_ACTION, + .may_uds_export = true, + }, + [BPF_PROG_TYPE_XDP] = { + .type = "xdp", + .subdir = "xdp", + .section = ELF_SECTION_PROG, + }, + [BPF_PROG_TYPE_LWT_IN] = { + .type = "lwt_in", + .subdir = "ip", + .section = ELF_SECTION_PROG, + }, + [BPF_PROG_TYPE_LWT_OUT] = { + .type = "lwt_out", + .subdir = "ip", + .section = ELF_SECTION_PROG, + }, + [BPF_PROG_TYPE_LWT_XMIT] = { + .type = "lwt_xmit", + .subdir = "ip", + .section = ELF_SECTION_PROG, + }, + [BPF_PROG_TYPE_LWT_SEG6LOCAL] = { + .type = "lwt_seg6local", + .subdir = "ip", + .section = ELF_SECTION_PROG, + }, +}; + +static const char *bpf_prog_to_subdir(enum bpf_prog_type type) +{ + assert(type < ARRAY_SIZE(__bpf_prog_meta) && + __bpf_prog_meta[type].subdir); + return __bpf_prog_meta[type].subdir; +} + +const char *bpf_prog_to_default_section(enum bpf_prog_type type) +{ + assert(type < ARRAY_SIZE(__bpf_prog_meta) && + __bpf_prog_meta[type].section); + return __bpf_prog_meta[type].section; +} + +#ifdef HAVE_ELF +static int bpf_obj_open(const char *path, enum bpf_prog_type type, + const char *sec, __u32 ifindex, bool verbose); +#else +static int bpf_obj_open(const char *path, enum bpf_prog_type type, + const char *sec, __u32 ifindex, bool verbose) +{ + fprintf(stderr, "No ELF library support compiled in.\n"); + errno = ENOSYS; + return -1; +} +#endif + +static inline __u64 bpf_ptr_to_u64(const void *ptr) +{ + return (__u64)(unsigned long)ptr; +} + +static int bpf(int cmd, union bpf_attr *attr, unsigned int size) +{ +#ifdef __NR_bpf + return syscall(__NR_bpf, cmd, attr, size); +#else + fprintf(stderr, "No bpf syscall, kernel headers too old?\n"); + errno = ENOSYS; + return -1; +#endif +} + +static int bpf_map_update(int fd, const void *key, const void *value, + uint64_t flags) +{ + union bpf_attr attr = {}; + + attr.map_fd = fd; + attr.key = bpf_ptr_to_u64(key); + attr.value = bpf_ptr_to_u64(value); + attr.flags = flags; + + return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); +} + +static int bpf_prog_fd_by_id(uint32_t id) +{ + union bpf_attr attr = {}; + + attr.prog_id = id; + + return bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); +} + +static int bpf_prog_info_by_fd(int fd, struct bpf_prog_info *info, + uint32_t *info_len) +{ + union bpf_attr attr = {}; + int ret; + + attr.info.bpf_fd = fd; + attr.info.info = bpf_ptr_to_u64(info); + attr.info.info_len = *info_len; + + *info_len = 0; + ret = bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); + if (!ret) + *info_len = attr.info.info_len; + + return ret; +} + +int bpf_dump_prog_info(FILE *f, uint32_t id) +{ + struct bpf_prog_info info = {}; + uint32_t len = sizeof(info); + int fd, ret, dump_ok = 0; + SPRINT_BUF(tmp); + + open_json_object("prog"); + print_uint(PRINT_ANY, "id", "id %u ", id); + + fd = bpf_prog_fd_by_id(id); + if (fd < 0) + goto out; + + ret = bpf_prog_info_by_fd(fd, &info, &len); + if (!ret && len) { + int jited = !!info.jited_prog_len; + + print_string(PRINT_ANY, "tag", "tag %s ", + hexstring_n2a(info.tag, sizeof(info.tag), + tmp, sizeof(tmp))); + print_uint(PRINT_JSON, "jited", NULL, jited); + if (jited && !is_json_context()) + fprintf(f, "jited "); + dump_ok = 1; + } + + close(fd); +out: + close_json_object(); + return dump_ok; +} + +static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, + char **bpf_string, bool *need_release, + const char separator) +{ + char sp; + + if (from_file) { + size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,"); + char *tmp_string, *pos, c_prev = ' '; + FILE *fp; + int c; + + tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len; + tmp_string = pos = calloc(1, tmp_len); + if (tmp_string == NULL) + return -ENOMEM; + + fp = fopen(arg, "r"); + if (fp == NULL) { + perror("Cannot fopen"); + free(tmp_string); + return -ENOENT; + } + + while ((c = fgetc(fp)) != EOF) { + switch (c) { + case '\n': + if (c_prev != ',') + *(pos++) = ','; + c_prev = ','; + break; + case ' ': + case '\t': + if (c_prev != ' ') + *(pos++) = c; + c_prev = ' '; + break; + default: + *(pos++) = c; + c_prev = c; + } + if (pos - tmp_string == tmp_len) + break; + } + + if (!feof(fp)) { + free(tmp_string); + fclose(fp); + return -E2BIG; + } + + fclose(fp); + *pos = 0; + + *need_release = true; + *bpf_string = tmp_string; + } else { + *need_release = false; + *bpf_string = arg; + } + + if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 || + sp != separator) { + if (*need_release) + free(*bpf_string); + return -EINVAL; + } + + return 0; +} + +static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops, + bool from_file) +{ + char *bpf_string, *token, separator = ','; + int ret = 0, i = 0; + bool need_release; + __u16 bpf_len = 0; + + if (argc < 1) + return -EINVAL; + if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string, + &need_release, separator)) + return -EINVAL; + if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) { + ret = -EINVAL; + goto out; + } + + token = bpf_string; + while ((token = strchr(token, separator)) && (++token)[0]) { + if (i >= bpf_len) { + fprintf(stderr, "Real program length exceeds encoded length parameter!\n"); + ret = -EINVAL; + goto out; + } + + if (sscanf(token, "%hu %hhu %hhu %u,", + &bpf_ops[i].code, &bpf_ops[i].jt, + &bpf_ops[i].jf, &bpf_ops[i].k) != 4) { + fprintf(stderr, "Error at instruction %d!\n", i); + ret = -EINVAL; + goto out; + } + + i++; + } + + if (i != bpf_len) { + fprintf(stderr, "Parsed program length is less than encoded length parameter!\n"); + ret = -EINVAL; + goto out; + } + ret = bpf_len; +out: + if (need_release) + free(bpf_string); + + return ret; +} + +void bpf_print_ops(struct rtattr *bpf_ops, __u16 len) +{ + struct sock_filter *ops = RTA_DATA(bpf_ops); + int i; + + if (len == 0) + return; + + open_json_object("bytecode"); + print_uint(PRINT_ANY, "length", "bytecode \'%u,", len); + open_json_array(PRINT_JSON, "insns"); + + for (i = 0; i < len; i++) { + open_json_object(NULL); + print_hu(PRINT_ANY, "code", "%hu ", ops[i].code); + print_hhu(PRINT_ANY, "jt", "%hhu ", ops[i].jt); + print_hhu(PRINT_ANY, "jf", "%hhu ", ops[i].jf); + if (i == len - 1) + print_uint(PRINT_ANY, "k", "%u\'", ops[i].k); + else + print_uint(PRINT_ANY, "k", "%u,", ops[i].k); + close_json_object(); + } + + close_json_array(PRINT_JSON, NULL); + close_json_object(); +} + +static void bpf_map_pin_report(const struct bpf_elf_map *pin, + const struct bpf_elf_map *obj) +{ + fprintf(stderr, "Map specification differs from pinned file!\n"); + + if (obj->type != pin->type) + fprintf(stderr, " - Type: %u (obj) != %u (pin)\n", + obj->type, pin->type); + if (obj->size_key != pin->size_key) + fprintf(stderr, " - Size key: %u (obj) != %u (pin)\n", + obj->size_key, pin->size_key); + if (obj->size_value != pin->size_value) + fprintf(stderr, " - Size value: %u (obj) != %u (pin)\n", + obj->size_value, pin->size_value); + if (obj->max_elem != pin->max_elem) + fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n", + obj->max_elem, pin->max_elem); + if (obj->flags != pin->flags) + fprintf(stderr, " - Flags: %#x (obj) != %#x (pin)\n", + obj->flags, pin->flags); + + fprintf(stderr, "\n"); +} + +struct bpf_prog_data { + unsigned int type; + unsigned int jited; +}; + +struct bpf_map_ext { + struct bpf_prog_data owner; + unsigned int btf_id_key; + unsigned int btf_id_val; +}; + +static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map, + struct bpf_map_ext *ext) +{ + unsigned int val, owner_type = 0, owner_jited = 0; + char file[PATH_MAX], buff[4096]; + FILE *fp; + + snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); + memset(map, 0, sizeof(*map)); + + fp = fopen(file, "r"); + if (!fp) { + fprintf(stderr, "No procfs support?!\n"); + return -EIO; + } + + while (fgets(buff, sizeof(buff), fp)) { + if (sscanf(buff, "map_type:\t%u", &val) == 1) + map->type = val; + else if (sscanf(buff, "key_size:\t%u", &val) == 1) + map->size_key = val; + else if (sscanf(buff, "value_size:\t%u", &val) == 1) + map->size_value = val; + else if (sscanf(buff, "max_entries:\t%u", &val) == 1) + map->max_elem = val; + else if (sscanf(buff, "map_flags:\t%i", &val) == 1) + map->flags = val; + else if (sscanf(buff, "owner_prog_type:\t%i", &val) == 1) + owner_type = val; + else if (sscanf(buff, "owner_jited:\t%i", &val) == 1) + owner_jited = val; + } + + fclose(fp); + if (ext) { + memset(ext, 0, sizeof(*ext)); + ext->owner.type = owner_type; + ext->owner.jited = owner_jited; + } + + return 0; +} + +static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map, + struct bpf_map_ext *ext, int length, + enum bpf_prog_type type) +{ + struct bpf_elf_map tmp, zero = {}; + int ret; + + ret = bpf_derive_elf_map_from_fdinfo(fd, &tmp, ext); + if (ret < 0) + return ret; + + /* The decision to reject this is on kernel side eventually, but + * at least give the user a chance to know what's wrong. + */ + if (ext->owner.type && ext->owner.type != type) + fprintf(stderr, "Program array map owner types differ: %u (obj) != %u (pin)\n", + type, ext->owner.type); + + if (!memcmp(&tmp, map, length)) { + return 0; + } else { + /* If kernel doesn't have eBPF-related fdinfo, we cannot do much, + * so just accept it. We know we do have an eBPF fd and in this + * case, everything is 0. It is guaranteed that no such map exists + * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC. + */ + if (!memcmp(&tmp, &zero, length)) + return 0; + + bpf_map_pin_report(&tmp, map); + return -EINVAL; + } +} + +static int bpf_mnt_fs(const char *target) +{ + bool bind_done = false; + + while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) { + if (errno != EINVAL || bind_done) { + fprintf(stderr, "mount --make-private %s failed: %s\n", + target, strerror(errno)); + return -1; + } + + if (mount(target, target, "none", MS_BIND, NULL)) { + fprintf(stderr, "mount --bind %s %s failed: %s\n", + target, target, strerror(errno)); + return -1; + } + + bind_done = true; + } + + if (mount("bpf", target, "bpf", 0, "mode=0700")) { + fprintf(stderr, "mount -t bpf bpf %s failed: %s\n", + target, strerror(errno)); + return -1; + } + + return 0; +} + +static int bpf_mnt_check_target(const char *target) +{ + int ret; + + ret = mkdir(target, S_IRWXU); + if (ret && errno != EEXIST) + fprintf(stderr, "mkdir %s failed: %s\n", target, + strerror(errno)); + + return ret; +} + +static int bpf_valid_mntpt(const char *mnt, unsigned long magic) +{ + struct statfs st_fs; + + if (statfs(mnt, &st_fs) < 0) + return -ENOENT; + if ((unsigned long)st_fs.f_type != magic) + return -ENOENT; + + return 0; +} + +static const char *bpf_find_mntpt_single(unsigned long magic, char *mnt, + int len, const char *mntpt) +{ + int ret; + + ret = bpf_valid_mntpt(mntpt, magic); + if (!ret) { + strlcpy(mnt, mntpt, len); + return mnt; + } + + return NULL; +} + +static const char *bpf_find_mntpt(const char *fstype, unsigned long magic, + char *mnt, int len, + const char * const *known_mnts) +{ + const char * const *ptr; + char type[100]; + FILE *fp; + + if (known_mnts) { + ptr = known_mnts; + while (*ptr) { + if (bpf_find_mntpt_single(magic, mnt, len, *ptr)) + return mnt; + ptr++; + } + } + + if (len != PATH_MAX) + return NULL; + + fp = fopen("/proc/mounts", "r"); + if (fp == NULL) + return NULL; + + while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n", + mnt, type) == 2) { + if (strcmp(type, fstype) == 0) + break; + } + + fclose(fp); + if (strcmp(type, fstype) != 0) + return NULL; + + return mnt; +} + +int bpf_trace_pipe(void) +{ + char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT; + static const char * const tracefs_known_mnts[] = { + TRACE_DIR_MNT, + "/sys/kernel/debug/tracing", + "/tracing", + "/trace", + 0, + }; + int fd_in, fd_out = STDERR_FILENO; + char tpipe[PATH_MAX]; + const char *mnt; + + mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt, + sizeof(tracefs_mnt), tracefs_known_mnts); + if (!mnt) { + fprintf(stderr, "tracefs not mounted?\n"); + return -1; + } + + snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt); + + fd_in = open(tpipe, O_RDONLY); + if (fd_in < 0) + return -1; + + fprintf(stderr, "Running! Hang up with ^C!\n\n"); + while (1) { + static char buff[4096]; + ssize_t ret; + + ret = read(fd_in, buff, sizeof(buff)); + if (ret > 0 && write(fd_out, buff, ret) == ret) + continue; + break; + } + + close(fd_in); + return -1; +} + +static int bpf_gen_global(const char *bpf_sub_dir) +{ + char bpf_glo_dir[PATH_MAX]; + int ret; + + snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s/", + bpf_sub_dir, BPF_DIR_GLOBALS); + + ret = mkdir(bpf_glo_dir, S_IRWXU); + if (ret && errno != EEXIST) { + fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir, + strerror(errno)); + return ret; + } + + return 0; +} + +static int bpf_gen_master(const char *base, const char *name) +{ + char bpf_sub_dir[PATH_MAX + NAME_MAX + 1]; + int ret; + + snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s/", base, name); + + ret = mkdir(bpf_sub_dir, S_IRWXU); + if (ret && errno != EEXIST) { + fprintf(stderr, "mkdir %s failed: %s\n", bpf_sub_dir, + strerror(errno)); + return ret; + } + + return bpf_gen_global(bpf_sub_dir); +} + +static int bpf_slave_via_bind_mnt(const char *full_name, + const char *full_link) +{ + int ret; + + ret = mkdir(full_name, S_IRWXU); + if (ret) { + assert(errno != EEXIST); + fprintf(stderr, "mkdir %s failed: %s\n", full_name, + strerror(errno)); + return ret; + } + + ret = mount(full_link, full_name, "none", MS_BIND, NULL); + if (ret) { + rmdir(full_name); + fprintf(stderr, "mount --bind %s %s failed: %s\n", + full_link, full_name, strerror(errno)); + } + + return ret; +} + +static int bpf_gen_slave(const char *base, const char *name, + const char *link) +{ + char bpf_lnk_dir[PATH_MAX + NAME_MAX + 1]; + char bpf_sub_dir[PATH_MAX + NAME_MAX]; + struct stat sb = {}; + int ret; + + snprintf(bpf_lnk_dir, sizeof(bpf_lnk_dir), "%s%s/", base, link); + snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s", base, name); + + ret = symlink(bpf_lnk_dir, bpf_sub_dir); + if (ret) { + if (errno != EEXIST) { + if (errno != EPERM) { + fprintf(stderr, "symlink %s failed: %s\n", + bpf_sub_dir, strerror(errno)); + return ret; + } + + return bpf_slave_via_bind_mnt(bpf_sub_dir, + bpf_lnk_dir); + } + + ret = lstat(bpf_sub_dir, &sb); + if (ret) { + fprintf(stderr, "lstat %s failed: %s\n", + bpf_sub_dir, strerror(errno)); + return ret; + } + + if ((sb.st_mode & S_IFMT) != S_IFLNK) + return bpf_gen_global(bpf_sub_dir); + } + + return 0; +} + +static int bpf_gen_hierarchy(const char *base) +{ + int ret, i; + + ret = bpf_gen_master(base, bpf_prog_to_subdir(__bpf_types[0])); + for (i = 1; i < ARRAY_SIZE(__bpf_types) && !ret; i++) + ret = bpf_gen_slave(base, + bpf_prog_to_subdir(__bpf_types[i]), + bpf_prog_to_subdir(__bpf_types[0])); + return ret; +} + +static const char *bpf_get_work_dir(enum bpf_prog_type type) +{ + static char bpf_tmp[PATH_MAX] = BPF_DIR_MNT; + static char bpf_wrk_dir[PATH_MAX]; + static const char *mnt; + static bool bpf_mnt_cached; + const char *mnt_env = getenv(BPF_ENV_MNT); + static const char * const bpf_known_mnts[] = { + BPF_DIR_MNT, + "/bpf", + 0, + }; + int ret; + + if (bpf_mnt_cached) { + const char *out = mnt; + + if (out && type) { + snprintf(bpf_tmp, sizeof(bpf_tmp), "%s%s/", + out, bpf_prog_to_subdir(type)); + out = bpf_tmp; + } + return out; + } + + if (mnt_env) + mnt = bpf_find_mntpt_single(BPF_FS_MAGIC, bpf_tmp, + sizeof(bpf_tmp), mnt_env); + else + mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_tmp, + sizeof(bpf_tmp), bpf_known_mnts); + if (!mnt) { + mnt = mnt_env ? : BPF_DIR_MNT; + ret = bpf_mnt_check_target(mnt); + if (!ret) + ret = bpf_mnt_fs(mnt); + if (ret) { + mnt = NULL; + goto out; + } + } + + ret = snprintf(bpf_wrk_dir, sizeof(bpf_wrk_dir), "%s/", mnt); + if (ret < 0 || ret >= sizeof(bpf_wrk_dir)) { + mnt = NULL; + goto out; + } + + ret = bpf_gen_hierarchy(bpf_wrk_dir); + if (ret) { + mnt = NULL; + goto out; + } + + mnt = bpf_wrk_dir; +out: + bpf_mnt_cached = true; + return mnt; +} + +static int bpf_obj_get(const char *pathname, enum bpf_prog_type type) +{ + union bpf_attr attr = {}; + char tmp[PATH_MAX]; + + if (strlen(pathname) > 2 && pathname[0] == 'm' && + pathname[1] == ':' && bpf_get_work_dir(type)) { + snprintf(tmp, sizeof(tmp), "%s/%s", + bpf_get_work_dir(type), pathname + 2); + pathname = tmp; + } + + attr.pathname = bpf_ptr_to_u64(pathname); + + return bpf(BPF_OBJ_GET, &attr, sizeof(attr)); +} + +static int bpf_obj_pinned(const char *pathname, enum bpf_prog_type type) +{ + int prog_fd = bpf_obj_get(pathname, type); + + if (prog_fd < 0) + fprintf(stderr, "Couldn\'t retrieve pinned program \'%s\': %s\n", + pathname, strerror(errno)); + return prog_fd; +} + +static int bpf_do_parse(struct bpf_cfg_in *cfg, const bool *opt_tbl) +{ + const char *file, *section, *uds_name; + bool verbose = false; + int i, ret, argc; + char **argv; + + argv = cfg->argv; + argc = cfg->argc; + + if (opt_tbl[CBPF_BYTECODE] && + (matches(*argv, "bytecode") == 0 || + strcmp(*argv, "bc") == 0)) { + cfg->mode = CBPF_BYTECODE; + } else if (opt_tbl[CBPF_FILE] && + (matches(*argv, "bytecode-file") == 0 || + strcmp(*argv, "bcf") == 0)) { + cfg->mode = CBPF_FILE; + } else if (opt_tbl[EBPF_OBJECT] && + (matches(*argv, "object-file") == 0 || + strcmp(*argv, "obj") == 0)) { + cfg->mode = EBPF_OBJECT; + } else if (opt_tbl[EBPF_PINNED] && + (matches(*argv, "object-pinned") == 0 || + matches(*argv, "pinned") == 0 || + matches(*argv, "fd") == 0)) { + cfg->mode = EBPF_PINNED; + } else { + fprintf(stderr, "What mode is \"%s\"?\n", *argv); + return -1; + } + + NEXT_ARG(); + file = section = uds_name = NULL; + if (cfg->mode == EBPF_OBJECT || cfg->mode == EBPF_PINNED) { + file = *argv; + NEXT_ARG_FWD(); + + if (cfg->type == BPF_PROG_TYPE_UNSPEC) { + if (argc > 0 && matches(*argv, "type") == 0) { + NEXT_ARG(); + for (i = 0; i < ARRAY_SIZE(__bpf_prog_meta); + i++) { + if (!__bpf_prog_meta[i].type) + continue; + if (!matches(*argv, + __bpf_prog_meta[i].type)) { + cfg->type = i; + break; + } + } + + if (cfg->type == BPF_PROG_TYPE_UNSPEC) { + fprintf(stderr, "What type is \"%s\"?\n", + *argv); + return -1; + } + NEXT_ARG_FWD(); + } else { + cfg->type = BPF_PROG_TYPE_SCHED_CLS; + } + } + + section = bpf_prog_to_default_section(cfg->type); + if (argc > 0 && matches(*argv, "section") == 0) { + NEXT_ARG(); + section = *argv; + NEXT_ARG_FWD(); + } + + if (__bpf_prog_meta[cfg->type].may_uds_export) { + uds_name = getenv(BPF_ENV_UDS); + if (argc > 0 && !uds_name && + matches(*argv, "export") == 0) { + NEXT_ARG(); + uds_name = *argv; + NEXT_ARG_FWD(); + } + } + + if (argc > 0 && matches(*argv, "verbose") == 0) { + verbose = true; + NEXT_ARG_FWD(); + } + + PREV_ARG(); + } + + if (cfg->mode == CBPF_BYTECODE || cfg->mode == CBPF_FILE) { + ret = bpf_ops_parse(argc, argv, cfg->opcodes, + cfg->mode == CBPF_FILE); + cfg->n_opcodes = ret; + } else if (cfg->mode == EBPF_OBJECT) { + ret = 0; /* program will be loaded by load stage */ + } else if (cfg->mode == EBPF_PINNED) { + ret = bpf_obj_pinned(file, cfg->type); + cfg->prog_fd = ret; + } else { + return -1; + } + + cfg->object = file; + cfg->section = section; + cfg->uds = uds_name; + cfg->argc = argc; + cfg->argv = argv; + cfg->verbose = verbose; + + return ret; +} + +static int bpf_do_load(struct bpf_cfg_in *cfg) +{ + if (cfg->mode == EBPF_OBJECT) { +#ifdef HAVE_LIBBPF + return iproute2_load_libbpf(cfg); +#endif + cfg->prog_fd = bpf_obj_open(cfg->object, cfg->type, + cfg->section, cfg->ifindex, + cfg->verbose); + return cfg->prog_fd; + } + return 0; +} + +int bpf_load_common(struct bpf_cfg_in *cfg, const struct bpf_cfg_ops *ops, + void *nl) +{ + char annotation[256]; + int ret; + + ret = bpf_do_load(cfg); + if (ret < 0) + return ret; + + if (cfg->mode == CBPF_BYTECODE || cfg->mode == CBPF_FILE) + ops->cbpf_cb(nl, cfg->opcodes, cfg->n_opcodes); + if (cfg->mode == EBPF_OBJECT || cfg->mode == EBPF_PINNED) { + snprintf(annotation, sizeof(annotation), "%s:[%s]", + basename(cfg->object), cfg->mode == EBPF_PINNED ? + "*fsobj" : cfg->section); + ops->ebpf_cb(nl, cfg->prog_fd, annotation); + } + + return 0; +} + +int bpf_parse_common(struct bpf_cfg_in *cfg, const struct bpf_cfg_ops *ops) +{ + bool opt_tbl[BPF_MODE_MAX] = {}; + + if (ops->cbpf_cb) { + opt_tbl[CBPF_BYTECODE] = true; + opt_tbl[CBPF_FILE] = true; + } + + if (ops->ebpf_cb) { + opt_tbl[EBPF_OBJECT] = true; + opt_tbl[EBPF_PINNED] = true; + } + + return bpf_do_parse(cfg, opt_tbl); +} + +int bpf_parse_and_load_common(struct bpf_cfg_in *cfg, + const struct bpf_cfg_ops *ops, void *nl) +{ + int ret; + + ret = bpf_parse_common(cfg, ops); + if (ret < 0) + return ret; + + return bpf_load_common(cfg, ops, nl); +} + +int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv) +{ + const bool opt_tbl[BPF_MODE_MAX] = { + [EBPF_OBJECT] = true, + [EBPF_PINNED] = true, + }; + const struct bpf_elf_map test = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .size_key = sizeof(int), + .size_value = sizeof(int), + }; + struct bpf_cfg_in cfg = { + .type = BPF_PROG_TYPE_UNSPEC, + .argc = argc, + .argv = argv, + }; + struct bpf_map_ext ext = {}; + int ret, prog_fd, map_fd; + uint32_t map_key; + + ret = bpf_do_parse(&cfg, opt_tbl); + if (ret < 0) + return ret; + + ret = bpf_do_load(&cfg); + if (ret < 0) + return ret; + + prog_fd = cfg.prog_fd; + + if (key) { + map_key = *key; + } else { + ret = sscanf(cfg.section, "%*i/%i", &map_key); + if (ret != 1) { + fprintf(stderr, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n"); + ret = -EINVAL; + goto out_prog; + } + } + + map_fd = bpf_obj_get(map_path, cfg.type); + if (map_fd < 0) { + fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n", + map_path, strerror(errno)); + ret = map_fd; + goto out_prog; + } + + ret = bpf_map_selfcheck_pinned(map_fd, &test, &ext, + offsetof(struct bpf_elf_map, max_elem), + cfg.type); + if (ret < 0) { + fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path); + goto out_map; + } + + ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY); + if (ret < 0) + fprintf(stderr, "Map update failed: %s\n", strerror(errno)); +out_map: + close(map_fd); +out_prog: + close(prog_fd); + return ret; +} + +int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr = {}; + + attr.target_fd = target_fd; + attr.attach_bpf_fd = prog_fd; + attr.attach_type = type; + + return bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); +} + +int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr = {}; + + attr.target_fd = target_fd; + attr.attach_type = type; + + return bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); +} + +int bpf_prog_load_dev(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t size_insns, const char *license, __u32 ifindex, + char *log, size_t size_log) +{ + union bpf_attr attr = {}; + + attr.prog_type = type; + attr.insns = bpf_ptr_to_u64(insns); + attr.insn_cnt = size_insns / sizeof(struct bpf_insn); + attr.license = bpf_ptr_to_u64(license); + attr.prog_ifindex = ifindex; + + if (size_log > 0) { + attr.log_buf = bpf_ptr_to_u64(log); + attr.log_size = size_log; + attr.log_level = 1; + } + + return bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); +} + +#ifdef HAVE_ELF +struct bpf_elf_prog { + enum bpf_prog_type type; + struct bpf_insn *insns; + unsigned int insns_num; + size_t size; + const char *license; +}; + +struct bpf_hash_entry { + unsigned int pinning; + const char *subpath; + struct bpf_hash_entry *next; +}; + +struct bpf_config { + unsigned int jit_enabled; +}; + +struct bpf_btf { + const struct btf_header *hdr; + const void *raw; + const char *strings; + const struct btf_type **types; + int types_num; +}; + +struct bpf_elf_ctx { + struct bpf_config cfg; + Elf *elf_fd; + GElf_Ehdr elf_hdr; + Elf_Data *sym_tab; + Elf_Data *str_tab; + Elf_Data *btf_data; + char obj_uid[64]; + int obj_fd; + int btf_fd; + int map_fds[ELF_MAX_MAPS]; + struct bpf_elf_map maps[ELF_MAX_MAPS]; + struct bpf_map_ext maps_ext[ELF_MAX_MAPS]; + struct bpf_elf_prog prog_text; + struct bpf_btf btf; + int sym_num; + int map_num; + int map_len; + bool *sec_done; + int sec_maps; + int sec_text; + int sec_btf; + char license[ELF_MAX_LICENSE_LEN]; + enum bpf_prog_type type; + __u32 ifindex; + bool verbose; + bool noafalg; + struct bpf_elf_st stat; + struct bpf_hash_entry *ht[256]; + char *log; + size_t log_size; +}; + +struct bpf_elf_sec_data { + GElf_Shdr sec_hdr; + Elf_Data *sec_data; + const char *sec_name; +}; + +struct bpf_map_data { + int *fds; + const char *obj; + struct bpf_elf_st *st; + struct bpf_elf_map *ent; +}; + +static bool bpf_log_has_data(struct bpf_elf_ctx *ctx) +{ + return ctx->log && ctx->log[0]; +} + +static __check_format_string(2, 3) void +bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...) +{ + va_list vl; + + va_start(vl, format); + vfprintf(stderr, format, vl); + va_end(vl); + + if (bpf_log_has_data(ctx)) { + if (ctx->verbose) { + fprintf(stderr, "%s\n", ctx->log); + } else { + unsigned int off = 0, len = strlen(ctx->log); + + if (len > BPF_MAX_LOG) { + off = len - BPF_MAX_LOG; + fprintf(stderr, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n", + off); + } + fprintf(stderr, "%s\n", ctx->log + off); + } + + memset(ctx->log, 0, ctx->log_size); + } +} + +static int bpf_log_realloc(struct bpf_elf_ctx *ctx) +{ + const size_t log_max = UINT_MAX >> 8; + size_t log_size = ctx->log_size; + char *ptr; + + if (!ctx->log) { + log_size = 65536; + } else if (log_size < log_max) { + log_size <<= 1; + if (log_size > log_max) + log_size = log_max; + } else { + return -EINVAL; + } + + ptr = realloc(ctx->log, log_size); + if (!ptr) + return -ENOMEM; + + ptr[0] = 0; + ctx->log = ptr; + ctx->log_size = log_size; + + return 0; +} + +static int bpf_map_create(enum bpf_map_type type, uint32_t size_key, + uint32_t size_value, uint32_t max_elem, + uint32_t flags, int inner_fd, int btf_fd, + uint32_t ifindex, uint32_t btf_id_key, + uint32_t btf_id_val) +{ + union bpf_attr attr = {}; + + attr.map_type = type; + attr.key_size = size_key; + attr.value_size = inner_fd ? sizeof(int) : size_value; + attr.max_entries = max_elem; + attr.map_flags = flags; + attr.inner_map_fd = inner_fd; + attr.map_ifindex = ifindex; + attr.btf_fd = btf_fd; + attr.btf_key_type_id = btf_id_key; + attr.btf_value_type_id = btf_id_val; + + return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); +} + +static int bpf_btf_load(void *btf, size_t size_btf, + char *log, size_t size_log) +{ + union bpf_attr attr = {}; + + attr.btf = bpf_ptr_to_u64(btf); + attr.btf_size = size_btf; + + if (size_log > 0) { + attr.btf_log_buf = bpf_ptr_to_u64(log); + attr.btf_log_size = size_log; + attr.btf_log_level = 1; + } + + return bpf(BPF_BTF_LOAD, &attr, sizeof(attr)); +} + +static int bpf_obj_pin(int fd, const char *pathname) +{ + union bpf_attr attr = {}; + + attr.pathname = bpf_ptr_to_u64(pathname); + attr.bpf_fd = fd; + + return bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); +} + +static int bpf_obj_hash(const char *object, uint8_t *out, size_t len) +{ + struct sockaddr_alg alg = { + .salg_family = AF_ALG, + .salg_type = "hash", + .salg_name = "sha1", + }; + int ret, cfd, ofd, ffd; + struct stat stbuff; + ssize_t size; + + if (!object || len != 20) + return -EINVAL; + + cfd = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (cfd < 0) + return cfd; + + ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg)); + if (ret < 0) + goto out_cfd; + + ofd = accept(cfd, NULL, 0); + if (ofd < 0) { + ret = ofd; + goto out_cfd; + } + + ffd = open(object, O_RDONLY); + if (ffd < 0) { + fprintf(stderr, "Error opening object %s: %s\n", + object, strerror(errno)); + ret = ffd; + goto out_ofd; + } + + ret = fstat(ffd, &stbuff); + if (ret < 0) { + fprintf(stderr, "Error doing fstat: %s\n", + strerror(errno)); + goto out_ffd; + } + + size = sendfile(ofd, ffd, NULL, stbuff.st_size); + if (size != stbuff.st_size) { + fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n", + size, stbuff.st_size, strerror(errno)); + ret = -1; + goto out_ffd; + } + + size = read(ofd, out, len); + if (size != len) { + fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n", + size, len, strerror(errno)); + ret = -1; + } else { + ret = 0; + } +out_ffd: + close(ffd); +out_ofd: + close(ofd); +out_cfd: + close(cfd); + return ret; +} + +static void bpf_init_env(void) +{ + struct rlimit limit = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + /* Don't bother in case we fail! */ + setrlimit(RLIMIT_MEMLOCK, &limit); + + if (!bpf_get_work_dir(BPF_PROG_TYPE_UNSPEC)) + fprintf(stderr, "Continuing without mounted eBPF fs. Too old kernel?\n"); +} + +static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx, + uint32_t pinning) +{ + struct bpf_hash_entry *entry; + + entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)]; + while (entry && entry->pinning != pinning) + entry = entry->next; + + return entry ? entry->subpath : NULL; +} + +static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx, + uint32_t pinning) +{ + switch (pinning) { + case PIN_OBJECT_NS: + case PIN_GLOBAL_NS: + return false; + case PIN_NONE: + return true; + default: + return !bpf_custom_pinning(ctx, pinning); + } +} + +static void bpf_make_pathname(char *pathname, size_t len, const char *name, + const struct bpf_elf_ctx *ctx, uint32_t pinning) +{ + switch (pinning) { + case PIN_OBJECT_NS: + snprintf(pathname, len, "%s/%s/%s", + bpf_get_work_dir(ctx->type), + ctx->obj_uid, name); + break; + case PIN_GLOBAL_NS: + snprintf(pathname, len, "%s/%s/%s", + bpf_get_work_dir(ctx->type), + BPF_DIR_GLOBALS, name); + break; + default: + snprintf(pathname, len, "%s/../%s/%s", + bpf_get_work_dir(ctx->type), + bpf_custom_pinning(ctx, pinning), name); + break; + } +} + +static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx, + uint32_t pinning) +{ + char pathname[PATH_MAX]; + + if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type)) + return 0; + + bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning); + return bpf_obj_get(pathname, ctx->type); +} + +static int bpf_make_obj_path(const struct bpf_elf_ctx *ctx) +{ + char tmp[PATH_MAX]; + int ret; + + snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_work_dir(ctx->type), + ctx->obj_uid); + + ret = mkdir(tmp, S_IRWXU); + if (ret && errno != EEXIST) { + fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno)); + return ret; + } + + return 0; +} + +static int bpf_make_custom_path(const struct bpf_elf_ctx *ctx, + const char *todo) +{ + char tmp[PATH_MAX], rem[PATH_MAX], *sub; + int ret; + + snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_work_dir(ctx->type)); + snprintf(rem, sizeof(rem), "%s/", todo); + sub = strtok(rem, "/"); + + while (sub) { + if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX) + return -EINVAL; + + strcat(tmp, sub); + strcat(tmp, "/"); + + ret = mkdir(tmp, S_IRWXU); + if (ret && errno != EEXIST) { + fprintf(stderr, "mkdir %s failed: %s\n", tmp, + strerror(errno)); + return ret; + } + + sub = strtok(NULL, "/"); + } + + return 0; +} + +static int bpf_place_pinned(int fd, const char *name, + const struct bpf_elf_ctx *ctx, uint32_t pinning) +{ + char pathname[PATH_MAX]; + const char *tmp; + int ret = 0; + + if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type)) + return 0; + + if (pinning == PIN_OBJECT_NS) + ret = bpf_make_obj_path(ctx); + else if ((tmp = bpf_custom_pinning(ctx, pinning))) + ret = bpf_make_custom_path(ctx, tmp); + if (ret < 0) + return ret; + + bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning); + return bpf_obj_pin(fd, pathname); +} + +static void bpf_prog_report(int fd, const char *section, + const struct bpf_elf_prog *prog, + struct bpf_elf_ctx *ctx) +{ + unsigned int insns = prog->size / sizeof(struct bpf_insn); + + fprintf(stderr, "\nProg section \'%s\' %s%s (%d)!\n", section, + fd < 0 ? "rejected: " : "loaded", + fd < 0 ? strerror(errno) : "", + fd < 0 ? errno : fd); + + fprintf(stderr, " - Type: %u\n", prog->type); + fprintf(stderr, " - Instructions: %u (%u over limit)\n", + insns, insns > BPF_MAXINSNS ? insns - BPF_MAXINSNS : 0); + fprintf(stderr, " - License: %s\n\n", prog->license); + + bpf_dump_error(ctx, "Verifier analysis:\n\n"); +} + +static int bpf_prog_attach(const char *section, + const struct bpf_elf_prog *prog, + struct bpf_elf_ctx *ctx) +{ + int tries = 0, fd; +retry: + errno = 0; + fd = bpf_prog_load_dev(prog->type, prog->insns, prog->size, + prog->license, ctx->ifindex, + ctx->log, ctx->log_size); + if (fd < 0 || ctx->verbose) { + /* The verifier log is pretty chatty, sometimes so chatty + * on larger programs, that we could fail to dump everything + * into our buffer. Still, try to give a debuggable error + * log for the user, so enlarge it and re-fail. + */ + if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) { + if (tries++ < 10 && !bpf_log_realloc(ctx)) + goto retry; + + fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n", + ctx->log_size, tries); + return fd; + } + + bpf_prog_report(fd, section, prog, ctx); + } + + return fd; +} + +static void bpf_map_report(int fd, const char *name, + const struct bpf_elf_map *map, + struct bpf_elf_ctx *ctx, int inner_fd) +{ + fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name, + fd < 0 ? "rejected: " : "loaded", + fd < 0 ? strerror(errno) : "", + fd < 0 ? errno : fd); + + fprintf(stderr, " - Type: %u\n", map->type); + fprintf(stderr, " - Identifier: %u\n", map->id); + fprintf(stderr, " - Pinning: %u\n", map->pinning); + fprintf(stderr, " - Size key: %u\n", map->size_key); + fprintf(stderr, " - Size value: %u\n", + inner_fd ? (int)sizeof(int) : map->size_value); + fprintf(stderr, " - Max elems: %u\n", map->max_elem); + fprintf(stderr, " - Flags: %#x\n\n", map->flags); +} + +static int bpf_find_map_id(const struct bpf_elf_ctx *ctx, uint32_t id) +{ + int i; + + for (i = 0; i < ctx->map_num; i++) { + if (ctx->maps[i].id != id) + continue; + if (ctx->map_fds[i] < 0) + return -EINVAL; + + return ctx->map_fds[i]; + } + + return -ENOENT; +} + +static void bpf_report_map_in_map(int outer_fd, uint32_t idx) +{ + struct bpf_elf_map outer_map; + int ret; + + fprintf(stderr, "Cannot insert map into map! "); + + ret = bpf_derive_elf_map_from_fdinfo(outer_fd, &outer_map, NULL); + if (!ret) { + if (idx >= outer_map.max_elem && + outer_map.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { + fprintf(stderr, "Outer map has %u elements, index %u is invalid!\n", + outer_map.max_elem, idx); + return; + } + } + + fprintf(stderr, "Different map specs used for outer and inner map?\n"); +} + +static bool bpf_is_map_in_map_type(const struct bpf_elf_map *map) +{ + return map->type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + map->type == BPF_MAP_TYPE_HASH_OF_MAPS; +} + +static bool bpf_map_offload_neutral(enum bpf_map_type type) +{ + return type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; +} + +static int bpf_map_attach(const char *name, struct bpf_elf_ctx *ctx, + const struct bpf_elf_map *map, struct bpf_map_ext *ext, + int *have_map_in_map) +{ + int fd, ifindex, ret, map_inner_fd = 0; + bool retried = false; + +probe: + fd = bpf_probe_pinned(name, ctx, map->pinning); + if (fd > 0) { + ret = bpf_map_selfcheck_pinned(fd, map, ext, + offsetof(struct bpf_elf_map, + id), ctx->type); + if (ret < 0) { + close(fd); + fprintf(stderr, "Map \'%s\' self-check failed!\n", + name); + return ret; + } + if (ctx->verbose) + fprintf(stderr, "Map \'%s\' loaded as pinned!\n", + name); + return fd; + } + + if (have_map_in_map && bpf_is_map_in_map_type(map)) { + (*have_map_in_map)++; + if (map->inner_id) + return 0; + fprintf(stderr, "Map \'%s\' cannot be created since no inner map ID defined!\n", + name); + return -EINVAL; + } + + if (!have_map_in_map && bpf_is_map_in_map_type(map)) { + map_inner_fd = bpf_find_map_id(ctx, map->inner_id); + if (map_inner_fd < 0) { + fprintf(stderr, "Map \'%s\' cannot be loaded. Inner map with ID %u not found!\n", + name, map->inner_id); + return -EINVAL; + } + } + + ifindex = bpf_map_offload_neutral(map->type) ? 0 : ctx->ifindex; + errno = 0; + fd = bpf_map_create(map->type, map->size_key, map->size_value, + map->max_elem, map->flags, map_inner_fd, ctx->btf_fd, + ifindex, ext->btf_id_key, ext->btf_id_val); + + if (fd < 0 || ctx->verbose) { + bpf_map_report(fd, name, map, ctx, map_inner_fd); + if (fd < 0) + return fd; + } + + ret = bpf_place_pinned(fd, name, ctx, map->pinning); + if (ret < 0) { + close(fd); + if (!retried && errno == EEXIST) { + retried = true; + goto probe; + } + fprintf(stderr, "Could not pin %s map: %s\n", name, + strerror(errno)); + return ret; + } + + return fd; +} + +static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx, + const GElf_Sym *sym) +{ + return ctx->str_tab->d_buf + sym->st_name; +} + +static int bpf_btf_find(struct bpf_elf_ctx *ctx, const char *name) +{ + const struct btf_type *type; + const char *res; + int id; + + for (id = 1; id < ctx->btf.types_num; id++) { + type = ctx->btf.types[id]; + if (type->name_off >= ctx->btf.hdr->str_len) + continue; + res = &ctx->btf.strings[type->name_off]; + if (!strcmp(res, name)) + return id; + } + + return -ENOENT; +} + +static int bpf_btf_find_kv(struct bpf_elf_ctx *ctx, const struct bpf_elf_map *map, + const char *name, uint32_t *id_key, uint32_t *id_val) +{ + const struct btf_member *key, *val; + const struct btf_type *type; + char btf_name[512]; + const char *res; + int id; + + snprintf(btf_name, sizeof(btf_name), "____btf_map_%s", name); + id = bpf_btf_find(ctx, btf_name); + if (id < 0) + return id; + + type = ctx->btf.types[id]; + if (BTF_INFO_KIND(type->info) != BTF_KIND_STRUCT) + return -EINVAL; + if (BTF_INFO_VLEN(type->info) != 2) + return -EINVAL; + + key = ((void *) type) + sizeof(*type); + val = key + 1; + if (!key->type || key->type >= ctx->btf.types_num || + !val->type || val->type >= ctx->btf.types_num) + return -EINVAL; + + if (key->name_off >= ctx->btf.hdr->str_len || + val->name_off >= ctx->btf.hdr->str_len) + return -EINVAL; + + res = &ctx->btf.strings[key->name_off]; + if (strcmp(res, "key")) + return -EINVAL; + + res = &ctx->btf.strings[val->name_off]; + if (strcmp(res, "value")) + return -EINVAL; + + *id_key = key->type; + *id_val = val->type; + return 0; +} + +static void bpf_btf_annotate(struct bpf_elf_ctx *ctx, int which, const char *name) +{ + uint32_t id_key = 0, id_val = 0; + + if (!bpf_btf_find_kv(ctx, &ctx->maps[which], name, &id_key, &id_val)) { + ctx->maps_ext[which].btf_id_key = id_key; + ctx->maps_ext[which].btf_id_val = id_val; + } +} + +static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which) +{ + const char *name; + GElf_Sym sym; + int i; + + for (i = 0; i < ctx->sym_num; i++) { + int type; + + if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) + continue; + + type = GELF_ST_TYPE(sym.st_info); + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || + (type != STT_NOTYPE && type != STT_OBJECT) || + sym.st_shndx != ctx->sec_maps || + sym.st_value / ctx->map_len != which) + continue; + + name = bpf_str_tab_name(ctx, &sym); + bpf_btf_annotate(ctx, which, name); + return name; + } + + return NULL; +} + +static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx) +{ + int i, j, ret, fd, inner_fd, inner_idx, have_map_in_map = 0; + const char *map_name; + + for (i = 0; i < ctx->map_num; i++) { + if (ctx->maps[i].pinning == PIN_OBJECT_NS && + ctx->noafalg) { + fprintf(stderr, "Missing kernel AF_ALG support for PIN_OBJECT_NS!\n"); + return -ENOTSUP; + } + + map_name = bpf_map_fetch_name(ctx, i); + if (!map_name) + return -EIO; + + fd = bpf_map_attach(map_name, ctx, &ctx->maps[i], + &ctx->maps_ext[i], &have_map_in_map); + if (fd < 0) + return fd; + + ctx->map_fds[i] = !fd ? -1 : fd; + } + + for (i = 0; have_map_in_map && i < ctx->map_num; i++) { + if (ctx->map_fds[i] >= 0) + continue; + + map_name = bpf_map_fetch_name(ctx, i); + if (!map_name) + return -EIO; + + fd = bpf_map_attach(map_name, ctx, &ctx->maps[i], + &ctx->maps_ext[i], NULL); + if (fd < 0) + return fd; + + ctx->map_fds[i] = fd; + } + + for (i = 0; have_map_in_map && i < ctx->map_num; i++) { + if (!ctx->maps[i].id || + ctx->maps[i].inner_id || + ctx->maps[i].inner_idx == -1) + continue; + + inner_fd = ctx->map_fds[i]; + inner_idx = ctx->maps[i].inner_idx; + + for (j = 0; j < ctx->map_num; j++) { + if (!bpf_is_map_in_map_type(&ctx->maps[j])) + continue; + if (ctx->maps[j].inner_id != ctx->maps[i].id) + continue; + + ret = bpf_map_update(ctx->map_fds[j], &inner_idx, + &inner_fd, BPF_ANY); + if (ret < 0) { + bpf_report_map_in_map(ctx->map_fds[j], + inner_idx); + return ret; + } + } + } + + return 0; +} + +static int bpf_map_num_sym(struct bpf_elf_ctx *ctx) +{ + int i, num = 0; + GElf_Sym sym; + + for (i = 0; i < ctx->sym_num; i++) { + int type; + + if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) + continue; + + type = GELF_ST_TYPE(sym.st_info); + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || + (type != STT_NOTYPE && type != STT_OBJECT) || + sym.st_shndx != ctx->sec_maps) + continue; + num++; + } + + return num; +} + +static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + Elf_Data *sec_edata; + GElf_Shdr sec_hdr; + Elf_Scn *sec_fd; + char *sec_name; + + memset(data, 0, sizeof(*data)); + + sec_fd = elf_getscn(ctx->elf_fd, section); + if (!sec_fd) + return -EINVAL; + if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr) + return -EIO; + + sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx, + sec_hdr.sh_name); + if (!sec_name || !sec_hdr.sh_size) + return -ENOENT; + + sec_edata = elf_getdata(sec_fd, NULL); + if (!sec_edata || elf_getdata(sec_fd, sec_edata)) + return -EIO; + + memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr)); + + data->sec_name = sec_name; + data->sec_data = sec_edata; + return 0; +} + +struct bpf_elf_map_min { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; +}; + +static int bpf_fetch_maps_begin(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + ctx->map_num = data->sec_data->d_size; + ctx->sec_maps = section; + ctx->sec_done[section] = true; + + if (ctx->map_num > sizeof(ctx->maps)) { + fprintf(stderr, "Too many BPF maps in ELF section!\n"); + return -ENOMEM; + } + + memcpy(ctx->maps, data->sec_data->d_buf, ctx->map_num); + return 0; +} + +static int bpf_map_verify_all_offs(struct bpf_elf_ctx *ctx, int end) +{ + GElf_Sym sym; + int off, i; + + for (off = 0; off < end; off += ctx->map_len) { + /* Order doesn't need to be linear here, hence we walk + * the table again. + */ + for (i = 0; i < ctx->sym_num; i++) { + int type; + + if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) + continue; + + type = GELF_ST_TYPE(sym.st_info); + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || + (type != STT_NOTYPE && type != STT_OBJECT) || + sym.st_shndx != ctx->sec_maps) + continue; + if (sym.st_value == off) + break; + if (i == ctx->sym_num - 1) + return -1; + } + } + + return off == end ? 0 : -1; +} + +static int bpf_fetch_maps_end(struct bpf_elf_ctx *ctx) +{ + struct bpf_elf_map fixup[ARRAY_SIZE(ctx->maps)] = {}; + int i, sym_num = bpf_map_num_sym(ctx); + __u8 *buff; + + if (sym_num == 0 || sym_num > ARRAY_SIZE(ctx->maps)) { + fprintf(stderr, "%u maps not supported in current map section!\n", + sym_num); + return -EINVAL; + } + + if (ctx->map_num % sym_num != 0 || + ctx->map_num % sizeof(__u32) != 0) { + fprintf(stderr, "Number BPF map symbols are not multiple of struct bpf_elf_map!\n"); + return -EINVAL; + } + + ctx->map_len = ctx->map_num / sym_num; + if (bpf_map_verify_all_offs(ctx, ctx->map_num)) { + fprintf(stderr, "Different struct bpf_elf_map in use!\n"); + return -EINVAL; + } + + if (ctx->map_len == sizeof(struct bpf_elf_map)) { + ctx->map_num = sym_num; + return 0; + } else if (ctx->map_len > sizeof(struct bpf_elf_map)) { + fprintf(stderr, "struct bpf_elf_map not supported, coming from future version?\n"); + return -EINVAL; + } else if (ctx->map_len < sizeof(struct bpf_elf_map_min)) { + fprintf(stderr, "struct bpf_elf_map too small, not supported!\n"); + return -EINVAL; + } + + ctx->map_num = sym_num; + for (i = 0, buff = (void *)ctx->maps; i < ctx->map_num; + i++, buff += ctx->map_len) { + /* The fixup leaves the rest of the members as zero, which + * is fine currently, but option exist to set some other + * default value as well when needed in future. + */ + memcpy(&fixup[i], buff, ctx->map_len); + } + + memcpy(ctx->maps, fixup, sizeof(fixup)); + if (ctx->verbose) + printf("%zu bytes struct bpf_elf_map fixup performed due to size mismatch!\n", + sizeof(struct bpf_elf_map) - ctx->map_len); + return 0; +} + +static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + if (data->sec_data->d_size > sizeof(ctx->license)) + return -ENOMEM; + + memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size); + ctx->sec_done[section] = true; + return 0; +} + +static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + ctx->sym_tab = data->sec_data; + ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize; + ctx->sec_done[section] = true; + return 0; +} + +static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + ctx->str_tab = data->sec_data; + ctx->sec_done[section] = true; + return 0; +} + +static int bpf_fetch_text(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + ctx->sec_text = section; + ctx->sec_done[section] = true; + return 0; +} + +static void bpf_btf_report(int fd, struct bpf_elf_ctx *ctx) +{ + fprintf(stderr, "\nBTF debug data section \'.BTF\' %s%s (%d)!\n", + fd < 0 ? "rejected: " : "loaded", + fd < 0 ? strerror(errno) : "", + fd < 0 ? errno : fd); + + fprintf(stderr, " - Length: %zu\n", ctx->btf_data->d_size); + + bpf_dump_error(ctx, "Verifier analysis:\n\n"); +} + +static int bpf_btf_attach(struct bpf_elf_ctx *ctx) +{ + int tries = 0, fd; +retry: + errno = 0; + fd = bpf_btf_load(ctx->btf_data->d_buf, ctx->btf_data->d_size, + ctx->log, ctx->log_size); + if (fd < 0 || ctx->verbose) { + if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) { + if (tries++ < 10 && !bpf_log_realloc(ctx)) + goto retry; + + fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n", + ctx->log_size, tries); + return fd; + } + + if (bpf_log_has_data(ctx)) + bpf_btf_report(fd, ctx); + } + + return fd; +} + +static int bpf_fetch_btf_begin(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + ctx->btf_data = data->sec_data; + ctx->sec_btf = section; + ctx->sec_done[section] = true; + return 0; +} + +static int bpf_btf_check_header(struct bpf_elf_ctx *ctx) +{ + const struct btf_header *hdr = ctx->btf_data->d_buf; + const char *str_start, *str_end; + unsigned int data_len; + + if (hdr->magic != BTF_MAGIC) { + fprintf(stderr, "Object has wrong BTF magic: %x, expected: %x!\n", + hdr->magic, BTF_MAGIC); + return -EINVAL; + } + + if (hdr->version != BTF_VERSION) { + fprintf(stderr, "Object has wrong BTF version: %u, expected: %u!\n", + hdr->version, BTF_VERSION); + return -EINVAL; + } + + if (hdr->flags) { + fprintf(stderr, "Object has unsupported BTF flags %x!\n", + hdr->flags); + return -EINVAL; + } + + data_len = ctx->btf_data->d_size - sizeof(*hdr); + if (data_len < hdr->type_off || + data_len < hdr->str_off || + data_len < hdr->type_len + hdr->str_len || + hdr->type_off >= hdr->str_off || + hdr->type_off + hdr->type_len != hdr->str_off || + hdr->str_off + hdr->str_len != data_len || + (hdr->type_off & (sizeof(uint32_t) - 1))) { + fprintf(stderr, "Object has malformed BTF data!\n"); + return -EINVAL; + } + + ctx->btf.hdr = hdr; + ctx->btf.raw = hdr + 1; + + str_start = ctx->btf.raw + hdr->str_off; + str_end = str_start + hdr->str_len; + if (!hdr->str_len || + hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || + str_start[0] || str_end[-1]) { + fprintf(stderr, "Object has malformed BTF string data!\n"); + return -EINVAL; + } + + ctx->btf.strings = str_start; + return 0; +} + +static int bpf_btf_register_type(struct bpf_elf_ctx *ctx, + const struct btf_type *type) +{ + int cur = ctx->btf.types_num, num = cur + 1; + const struct btf_type **types; + + types = realloc(ctx->btf.types, num * sizeof(type)); + if (!types) { + free(ctx->btf.types); + ctx->btf.types = NULL; + ctx->btf.types_num = 0; + return -ENOMEM; + } + + ctx->btf.types = types; + ctx->btf.types[cur] = type; + ctx->btf.types_num = num; + return 0; +} + +static struct btf_type btf_type_void; + +static int bpf_btf_prep_type_data(struct bpf_elf_ctx *ctx) +{ + const void *type_cur = ctx->btf.raw + ctx->btf.hdr->type_off; + const void *type_end = ctx->btf.raw + ctx->btf.hdr->str_off; + const struct btf_type *type; + uint16_t var_len; + int ret, kind; + + ret = bpf_btf_register_type(ctx, &btf_type_void); + if (ret < 0) + return ret; + + while (type_cur < type_end) { + type = type_cur; + type_cur += sizeof(*type); + + var_len = BTF_INFO_VLEN(type->info); + kind = BTF_INFO_KIND(type->info); + + switch (kind) { + case BTF_KIND_INT: + type_cur += sizeof(int); + break; + case BTF_KIND_ARRAY: + type_cur += sizeof(struct btf_array); + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + type_cur += var_len * sizeof(struct btf_member); + break; + case BTF_KIND_ENUM: + type_cur += var_len * sizeof(struct btf_enum); + break; + case BTF_KIND_FUNC_PROTO: + type_cur += var_len * sizeof(struct btf_param); + break; + case BTF_KIND_TYPEDEF: + case BTF_KIND_PTR: + case BTF_KIND_FWD: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_FUNC: + break; + default: + fprintf(stderr, "Object has unknown BTF type: %u!\n", kind); + return -EINVAL; + } + + ret = bpf_btf_register_type(ctx, type); + if (ret < 0) + return ret; + } + + return 0; +} + +static int bpf_btf_prep_data(struct bpf_elf_ctx *ctx) +{ + int ret = bpf_btf_check_header(ctx); + + if (!ret) + return bpf_btf_prep_type_data(ctx); + return ret; +} + +static void bpf_fetch_btf_end(struct bpf_elf_ctx *ctx) +{ + int fd = bpf_btf_attach(ctx); + + if (fd < 0) + return; + ctx->btf_fd = fd; + if (bpf_btf_prep_data(ctx) < 0) { + close(ctx->btf_fd); + ctx->btf_fd = 0; + } +} + +static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx) +{ + return ctx->sym_tab && ctx->str_tab && ctx->sec_maps; +} + +static bool bpf_has_btf_data(const struct bpf_elf_ctx *ctx) +{ + return ctx->sec_btf; +} + +static bool bpf_has_call_data(const struct bpf_elf_ctx *ctx) +{ + return ctx->sec_text; +} + +static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx, bool check_text_sec) +{ + struct bpf_elf_sec_data data; + int i, ret = -1; + + for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { + ret = bpf_fill_section_data(ctx, i, &data); + if (ret < 0) + continue; + + if (data.sec_hdr.sh_type == SHT_PROGBITS && + !strcmp(data.sec_name, ELF_SECTION_MAPS)) + ret = bpf_fetch_maps_begin(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_PROGBITS && + !strcmp(data.sec_name, ELF_SECTION_LICENSE)) + ret = bpf_fetch_license(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_PROGBITS && + (data.sec_hdr.sh_flags & SHF_EXECINSTR) && + !strcmp(data.sec_name, ".text") && + check_text_sec) + ret = bpf_fetch_text(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_SYMTAB && + !strcmp(data.sec_name, ".symtab")) + ret = bpf_fetch_symtab(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_STRTAB && + !strcmp(data.sec_name, ".strtab")) + ret = bpf_fetch_strtab(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_PROGBITS && + !strcmp(data.sec_name, ".BTF")) + ret = bpf_fetch_btf_begin(ctx, i, &data); + if (ret < 0) { + fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n", + i); + return ret; + } + } + + if (bpf_has_btf_data(ctx)) + bpf_fetch_btf_end(ctx); + if (bpf_has_map_data(ctx)) { + ret = bpf_fetch_maps_end(ctx); + if (ret < 0) { + fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n"); + return ret; + } + + ret = bpf_maps_attach_all(ctx); + if (ret < 0) { + fprintf(stderr, "Error loading maps into kernel!\n"); + return ret; + } + } + + return ret; +} + +static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section, + bool *sseen) +{ + struct bpf_elf_sec_data data; + struct bpf_elf_prog prog; + int ret, i, fd = -1; + + for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { + if (ctx->sec_done[i]) + continue; + + ret = bpf_fill_section_data(ctx, i, &data); + if (ret < 0 || + !(data.sec_hdr.sh_type == SHT_PROGBITS && + (data.sec_hdr.sh_flags & SHF_EXECINSTR) && + !strcmp(data.sec_name, section))) + continue; + + *sseen = true; + + memset(&prog, 0, sizeof(prog)); + prog.type = ctx->type; + prog.license = ctx->license; + prog.size = data.sec_data->d_size; + prog.insns_num = prog.size / sizeof(struct bpf_insn); + prog.insns = data.sec_data->d_buf; + + fd = bpf_prog_attach(section, &prog, ctx); + if (fd < 0) + return fd; + + ctx->sec_done[i] = true; + break; + } + + return fd; +} + +struct bpf_relo_props { + struct bpf_tail_call { + unsigned int total; + unsigned int jited; + } tc; + int main_num; +}; + +static int bpf_apply_relo_map(struct bpf_elf_ctx *ctx, struct bpf_elf_prog *prog, + GElf_Rel *relo, GElf_Sym *sym, + struct bpf_relo_props *props) +{ + unsigned int insn_off = relo->r_offset / sizeof(struct bpf_insn); + unsigned int map_idx = sym->st_value / ctx->map_len; + + if (insn_off >= prog->insns_num) + return -EINVAL; + if (prog->insns[insn_off].code != (BPF_LD | BPF_IMM | BPF_DW)) { + fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n", + insn_off); + return -EINVAL; + } + + if (map_idx >= ARRAY_SIZE(ctx->map_fds)) + return -EINVAL; + if (!ctx->map_fds[map_idx]) + return -EINVAL; + if (ctx->maps[map_idx].type == BPF_MAP_TYPE_PROG_ARRAY) { + props->tc.total++; + if (ctx->maps_ext[map_idx].owner.jited || + (ctx->maps_ext[map_idx].owner.type == 0 && + ctx->cfg.jit_enabled)) + props->tc.jited++; + } + + prog->insns[insn_off].src_reg = BPF_PSEUDO_MAP_FD; + prog->insns[insn_off].imm = ctx->map_fds[map_idx]; + return 0; +} + +static int bpf_apply_relo_call(struct bpf_elf_ctx *ctx, struct bpf_elf_prog *prog, + GElf_Rel *relo, GElf_Sym *sym, + struct bpf_relo_props *props) +{ + unsigned int insn_off = relo->r_offset / sizeof(struct bpf_insn); + struct bpf_elf_prog *prog_text = &ctx->prog_text; + + if (insn_off >= prog->insns_num) + return -EINVAL; + if (prog->insns[insn_off].code != (BPF_JMP | BPF_CALL) && + prog->insns[insn_off].src_reg != BPF_PSEUDO_CALL) { + fprintf(stderr, "ELF contains relo data for non call instruction at offset %u! Compiler bug?!\n", + insn_off); + return -EINVAL; + } + + if (!props->main_num) { + struct bpf_insn *insns = realloc(prog->insns, + prog->size + prog_text->size); + if (!insns) + return -ENOMEM; + + memcpy(insns + prog->insns_num, prog_text->insns, + prog_text->size); + props->main_num = prog->insns_num; + prog->insns = insns; + prog->insns_num += prog_text->insns_num; + prog->size += prog_text->size; + } + + prog->insns[insn_off].imm += props->main_num - insn_off; + return 0; +} + +static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx, + struct bpf_elf_sec_data *data_relo, + struct bpf_elf_prog *prog, + struct bpf_relo_props *props) +{ + GElf_Shdr *rhdr = &data_relo->sec_hdr; + int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize; + + for (relo_ent = 0; relo_ent < relo_num; relo_ent++) { + GElf_Rel relo; + GElf_Sym sym; + int ret = -EIO; + + if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo) + return -EIO; + if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym) + return -EIO; + + if (sym.st_shndx == ctx->sec_maps) + ret = bpf_apply_relo_map(ctx, prog, &relo, &sym, props); + else if (sym.st_shndx == ctx->sec_text) + ret = bpf_apply_relo_call(ctx, prog, &relo, &sym, props); + else + fprintf(stderr, "ELF contains non-{map,call} related relo data in entry %u pointing to section %u! Compiler bug?!\n", + relo_ent, sym.st_shndx); + if (ret < 0) + return ret; + } + + return 0; +} + +static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section, + bool *lderr, bool *sseen, struct bpf_elf_prog *prog) +{ + struct bpf_elf_sec_data data_relo, data_insn; + int ret, idx, i, fd = -1; + + for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { + struct bpf_relo_props props = {}; + + ret = bpf_fill_section_data(ctx, i, &data_relo); + if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL) + continue; + + idx = data_relo.sec_hdr.sh_info; + + ret = bpf_fill_section_data(ctx, idx, &data_insn); + if (ret < 0 || + !(data_insn.sec_hdr.sh_type == SHT_PROGBITS && + (data_insn.sec_hdr.sh_flags & SHF_EXECINSTR) && + !strcmp(data_insn.sec_name, section))) + continue; + if (sseen) + *sseen = true; + + memset(prog, 0, sizeof(*prog)); + prog->type = ctx->type; + prog->license = ctx->license; + prog->size = data_insn.sec_data->d_size; + prog->insns_num = prog->size / sizeof(struct bpf_insn); + prog->insns = malloc(prog->size); + if (!prog->insns) { + *lderr = true; + return -ENOMEM; + } + + memcpy(prog->insns, data_insn.sec_data->d_buf, prog->size); + + ret = bpf_apply_relo_data(ctx, &data_relo, prog, &props); + if (ret < 0) { + *lderr = true; + if (ctx->sec_text != idx) + free(prog->insns); + return ret; + } + if (ctx->sec_text == idx) { + fd = 0; + goto out; + } + + fd = bpf_prog_attach(section, prog, ctx); + free(prog->insns); + if (fd < 0) { + *lderr = true; + if (props.tc.total) { + if (ctx->cfg.jit_enabled && + props.tc.total != props.tc.jited) + fprintf(stderr, "JIT enabled, but only %u/%u tail call maps in the program have JITed owner!\n", + props.tc.jited, props.tc.total); + if (!ctx->cfg.jit_enabled && + props.tc.jited) + fprintf(stderr, "JIT disabled, but %u/%u tail call maps in the program have JITed owner!\n", + props.tc.jited, props.tc.total); + } + return fd; + } +out: + ctx->sec_done[i] = true; + ctx->sec_done[idx] = true; + break; + } + + return fd; +} + +static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section) +{ + bool lderr = false, sseen = false; + struct bpf_elf_prog prog; + int ret = -1; + + if (bpf_has_call_data(ctx)) { + ret = bpf_fetch_prog_relo(ctx, ".text", &lderr, NULL, + &ctx->prog_text); + if (ret < 0) + return ret; + } + + if (bpf_has_map_data(ctx) || bpf_has_call_data(ctx)) + ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen, &prog); + if (ret < 0 && !lderr) + ret = bpf_fetch_prog(ctx, section, &sseen); + if (ret < 0 && !sseen) + fprintf(stderr, "Program section \'%s\' not found in ELF file!\n", + section); + return ret; +} + +static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) + if (ctx->map_fds[i] && ctx->maps[i].id == id && + ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) + return i; + return -1; +} + +struct bpf_jited_aux { + int prog_fd; + int map_fd; + struct bpf_prog_data prog; + struct bpf_map_ext map; +}; + +static int bpf_derive_prog_from_fdinfo(int fd, struct bpf_prog_data *prog) +{ + char file[PATH_MAX], buff[4096]; + unsigned int val; + FILE *fp; + + snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); + memset(prog, 0, sizeof(*prog)); + + fp = fopen(file, "r"); + if (!fp) { + fprintf(stderr, "No procfs support?!\n"); + return -EIO; + } + + while (fgets(buff, sizeof(buff), fp)) { + if (sscanf(buff, "prog_type:\t%u", &val) == 1) + prog->type = val; + else if (sscanf(buff, "prog_jited:\t%u", &val) == 1) + prog->jited = val; + } + + fclose(fp); + return 0; +} + +static int bpf_tail_call_get_aux(struct bpf_jited_aux *aux) +{ + struct bpf_elf_map tmp; + int ret; + + ret = bpf_derive_elf_map_from_fdinfo(aux->map_fd, &tmp, &aux->map); + if (!ret) + ret = bpf_derive_prog_from_fdinfo(aux->prog_fd, &aux->prog); + + return ret; +} + +static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx) +{ + struct bpf_elf_sec_data data; + uint32_t map_id, key_id; + int fd, i, ret, idx; + + for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { + if (ctx->sec_done[i]) + continue; + + ret = bpf_fill_section_data(ctx, i, &data); + if (ret < 0) + continue; + + ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id); + if (ret != 2) + continue; + + idx = bpf_find_map_by_id(ctx, map_id); + if (idx < 0) + continue; + + fd = bpf_fetch_prog_sec(ctx, data.sec_name); + if (fd < 0) + return -EIO; + + ret = bpf_map_update(ctx->map_fds[idx], &key_id, + &fd, BPF_ANY); + if (ret < 0) { + struct bpf_jited_aux aux = {}; + + ret = -errno; + if (errno == E2BIG) { + fprintf(stderr, "Tail call key %u for map %u out of bounds?\n", + key_id, map_id); + return ret; + } + + aux.map_fd = ctx->map_fds[idx]; + aux.prog_fd = fd; + + if (bpf_tail_call_get_aux(&aux)) + return ret; + if (!aux.map.owner.type) + return ret; + + if (aux.prog.type != aux.map.owner.type) + fprintf(stderr, "Tail call map owned by prog type %u, but prog type is %u!\n", + aux.map.owner.type, aux.prog.type); + if (aux.prog.jited != aux.map.owner.jited) + fprintf(stderr, "Tail call map %s jited, but prog %s!\n", + aux.map.owner.jited ? "is" : "not", + aux.prog.jited ? "is" : "not"); + return ret; + } + + ctx->sec_done[i] = true; + } + + return 0; +} + +static void bpf_save_finfo(struct bpf_elf_ctx *ctx) +{ + struct stat st; + int ret; + + memset(&ctx->stat, 0, sizeof(ctx->stat)); + + ret = fstat(ctx->obj_fd, &st); + if (ret < 0) { + fprintf(stderr, "Stat of elf file failed: %s\n", + strerror(errno)); + return; + } + + ctx->stat.st_dev = st.st_dev; + ctx->stat.st_ino = st.st_ino; +} + +static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path) +{ + char buff[PATH_MAX]; + + while (fgets(buff, sizeof(buff), fp)) { + char *ptr = buff; + + while (*ptr == ' ' || *ptr == '\t') + ptr++; + + if (*ptr == '#' || *ptr == '\n' || *ptr == 0) + continue; + + if (sscanf(ptr, "%i %s\n", id, path) != 2 && + sscanf(ptr, "%i %s #", id, path) != 2) { + strcpy(path, ptr); + return -1; + } + + return 1; + } + + return 0; +} + +static bool bpf_pinning_reserved(uint32_t pinning) +{ + switch (pinning) { + case PIN_NONE: + case PIN_OBJECT_NS: + case PIN_GLOBAL_NS: + return true; + default: + return false; + } +} + +static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file) +{ + struct bpf_hash_entry *entry; + char subpath[PATH_MAX] = {}; + uint32_t pinning; + FILE *fp; + int ret; + + fp = fopen(db_file, "r"); + if (!fp) + return; + + while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) { + if (ret == -1) { + fprintf(stderr, "Database %s is corrupted at: %s\n", + db_file, subpath); + fclose(fp); + return; + } + + if (bpf_pinning_reserved(pinning)) { + fprintf(stderr, "Database %s, id %u is reserved - ignoring!\n", + db_file, pinning); + continue; + } + + entry = malloc(sizeof(*entry)); + if (!entry) { + fprintf(stderr, "No memory left for db entry!\n"); + continue; + } + + entry->pinning = pinning; + entry->subpath = strdup(subpath); + if (!entry->subpath) { + fprintf(stderr, "No memory left for db entry!\n"); + free(entry); + continue; + } + + entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)]; + ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry; + } + + fclose(fp); +} + +static void bpf_hash_destroy(struct bpf_elf_ctx *ctx) +{ + struct bpf_hash_entry *entry; + int i; + + for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) { + while ((entry = ctx->ht[i]) != NULL) { + ctx->ht[i] = entry->next; + free((char *)entry->subpath); + free(entry); + } + } +} + +static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx) +{ + if (ctx->elf_hdr.e_type != ET_REL || + (ctx->elf_hdr.e_machine != EM_NONE && + ctx->elf_hdr.e_machine != EM_BPF) || + ctx->elf_hdr.e_version != EV_CURRENT) { + fprintf(stderr, "ELF format error, ELF file not for eBPF?\n"); + return -EINVAL; + } + + switch (ctx->elf_hdr.e_ident[EI_DATA]) { + default: + fprintf(stderr, "ELF format error, wrong endianness info?\n"); + return -EINVAL; + case ELFDATA2LSB: + if (htons(1) == 1) { + fprintf(stderr, + "We are big endian, eBPF object is little endian!\n"); + return -EIO; + } + break; + case ELFDATA2MSB: + if (htons(1) != 1) { + fprintf(stderr, + "We are little endian, eBPF object is big endian!\n"); + return -EIO; + } + break; + } + + return 0; +} + +static void bpf_get_cfg(struct bpf_elf_ctx *ctx) +{ + static const char *path_jit = "/proc/sys/net/core/bpf_jit_enable"; + int fd; + + fd = open(path_jit, O_RDONLY); + if (fd >= 0) { + char tmp[16] = {}; + + if (read(fd, tmp, sizeof(tmp)) > 0) + ctx->cfg.jit_enabled = atoi(tmp); + close(fd); + } +} + +static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname, + enum bpf_prog_type type, __u32 ifindex, + bool verbose) +{ + uint8_t tmp[20]; + int ret; + + if (elf_version(EV_CURRENT) == EV_NONE) + return -EINVAL; + + bpf_init_env(); + + memset(ctx, 0, sizeof(*ctx)); + bpf_get_cfg(ctx); + + ret = bpf_obj_hash(pathname, tmp, sizeof(tmp)); + if (ret) + ctx->noafalg = true; + else + hexstring_n2a(tmp, sizeof(tmp), ctx->obj_uid, + sizeof(ctx->obj_uid)); + + ctx->verbose = verbose; + ctx->type = type; + ctx->ifindex = ifindex; + + ctx->obj_fd = open(pathname, O_RDONLY); + if (ctx->obj_fd < 0) + return ctx->obj_fd; + + ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL); + if (!ctx->elf_fd) { + ret = -EINVAL; + goto out_fd; + } + + if (elf_kind(ctx->elf_fd) != ELF_K_ELF) { + ret = -EINVAL; + goto out_fd; + } + + if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) != + &ctx->elf_hdr) { + ret = -EIO; + goto out_elf; + } + + ret = bpf_elf_check_ehdr(ctx); + if (ret < 0) + goto out_elf; + + ctx->sec_done = calloc(ctx->elf_hdr.e_shnum, + sizeof(*(ctx->sec_done))); + if (!ctx->sec_done) { + ret = -ENOMEM; + goto out_elf; + } + + if (ctx->verbose && bpf_log_realloc(ctx)) { + ret = -ENOMEM; + goto out_free; + } + + bpf_save_finfo(ctx); + bpf_hash_init(ctx, CONFDIR "/bpf_pinning"); + + return 0; +out_free: + free(ctx->sec_done); +out_elf: + elf_end(ctx->elf_fd); +out_fd: + close(ctx->obj_fd); + return ret; +} + +static int bpf_maps_count(struct bpf_elf_ctx *ctx) +{ + int i, count = 0; + + for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) { + if (!ctx->map_fds[i]) + break; + count++; + } + + return count; +} + +static void bpf_maps_teardown(struct bpf_elf_ctx *ctx) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) { + if (ctx->map_fds[i]) + close(ctx->map_fds[i]); + } + + if (ctx->btf_fd) + close(ctx->btf_fd); + free(ctx->btf.types); +} + +static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure) +{ + if (failure) + bpf_maps_teardown(ctx); + + bpf_hash_destroy(ctx); + + free(ctx->prog_text.insns); + free(ctx->sec_done); + free(ctx->log); + + elf_end(ctx->elf_fd); + close(ctx->obj_fd); +} + +static struct bpf_elf_ctx __ctx; + +static int bpf_obj_open(const char *pathname, enum bpf_prog_type type, + const char *section, __u32 ifindex, bool verbose) +{ + struct bpf_elf_ctx *ctx = &__ctx; + int fd = 0, ret; + + ret = bpf_elf_ctx_init(ctx, pathname, type, ifindex, verbose); + if (ret < 0) { + fprintf(stderr, "Cannot initialize ELF context!\n"); + return ret; + } + + ret = bpf_fetch_ancillary(ctx, strcmp(section, ".text")); + if (ret < 0) { + fprintf(stderr, "Error fetching ELF ancillary data!\n"); + goto out; + } + + fd = bpf_fetch_prog_sec(ctx, section); + if (fd < 0) { + fprintf(stderr, "Error fetching program/map!\n"); + ret = fd; + goto out; + } + + ret = bpf_fill_prog_arrays(ctx); + if (ret < 0) + fprintf(stderr, "Error filling program arrays!\n"); +out: + bpf_elf_ctx_destroy(ctx, ret < 0); + if (ret < 0) { + if (fd) + close(fd); + return ret; + } + + return fd; +} + +static int +bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len, + const struct bpf_map_data *aux, unsigned int entries) +{ + struct bpf_map_set_msg msg = { + .aux.uds_ver = BPF_SCM_AUX_VER, + .aux.num_ent = entries, + }; + int *cmsg_buf, min_fd; + char *amsg_buf; + int i; + + strlcpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name)); + memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st)); + + cmsg_buf = bpf_map_set_init(&msg, addr, addr_len); + amsg_buf = (char *)msg.aux.ent; + + for (i = 0; i < entries; i += min_fd) { + int ret; + + min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i); + bpf_map_set_init_single(&msg, min_fd); + + memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd); + memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd); + + ret = sendmsg(fd, &msg.hdr, 0); + if (ret <= 0) + return ret ? : -1; + } + + return 0; +} + +static int +bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux, + unsigned int entries) +{ + struct bpf_map_set_msg msg; + int *cmsg_buf, min_fd; + char *amsg_buf, *mmsg_buf; + unsigned int needed = 1; + int i; + + cmsg_buf = bpf_map_set_init(&msg, NULL, 0); + amsg_buf = (char *)msg.aux.ent; + mmsg_buf = (char *)&msg.aux; + + for (i = 0; i < min(entries, needed); i += min_fd) { + struct cmsghdr *cmsg; + int ret; + + min_fd = min(entries, entries - i); + bpf_map_set_init_single(&msg, min_fd); + + ret = recvmsg(fd, &msg.hdr, 0); + if (ret <= 0) + return ret ? : -1; + + cmsg = CMSG_FIRSTHDR(&msg.hdr); + if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS) + return -EINVAL; + if (msg.hdr.msg_flags & MSG_CTRUNC) + return -EIO; + if (msg.aux.uds_ver != BPF_SCM_AUX_VER) + return -ENOSYS; + + min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd); + if (min_fd > entries || min_fd <= 0) + return -EINVAL; + + memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd); + memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd); + memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent)); + + needed = aux->num_ent; + } + + return 0; +} + +int bpf_send_map_fds(const char *path, const char *obj) +{ + struct bpf_elf_ctx *ctx = &__ctx; + struct sockaddr_un addr = { .sun_family = AF_UNIX }; + struct bpf_map_data bpf_aux = { + .fds = ctx->map_fds, + .ent = ctx->maps, + .st = &ctx->stat, + .obj = obj, + }; + int fd, ret = -1; + + fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (fd < 0) { + fprintf(stderr, "Cannot open socket: %s\n", + strerror(errno)); + goto out; + } + + strlcpy(addr.sun_path, path, sizeof(addr.sun_path)); + + ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr)); + if (ret < 0) { + fprintf(stderr, "Cannot connect to %s: %s\n", + path, strerror(errno)); + goto out; + } + + ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux, + bpf_maps_count(ctx)); + if (ret < 0) + fprintf(stderr, "Cannot send fds to %s: %s\n", + path, strerror(errno)); + + bpf_maps_teardown(ctx); +out: + if (fd >= 0) + close(fd); + return ret; +} + +int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux, + unsigned int entries) +{ + struct sockaddr_un addr = { .sun_family = AF_UNIX }; + int fd, ret = -1; + + fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (fd < 0) { + fprintf(stderr, "Cannot open socket: %s\n", + strerror(errno)); + goto out; + } + + strlcpy(addr.sun_path, path, sizeof(addr.sun_path)); + + ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr)); + if (ret < 0) { + fprintf(stderr, "Cannot bind to socket: %s\n", + strerror(errno)); + goto out; + } + + ret = bpf_map_set_recv(fd, fds, aux, entries); + if (ret < 0) + fprintf(stderr, "Cannot recv fds from %s: %s\n", + path, strerror(errno)); + + unlink(addr.sun_path); + +out: + if (fd >= 0) + close(fd); + return ret; +} + +#ifdef HAVE_LIBBPF +/* The following functions are wrapper functions for libbpf code to be + * compatible with the legacy format. So all the functions have prefix + * with iproute2_ + */ +int iproute2_bpf_elf_ctx_init(struct bpf_cfg_in *cfg) +{ + struct bpf_elf_ctx *ctx = &__ctx; + + return bpf_elf_ctx_init(ctx, cfg->object, cfg->type, cfg->ifindex, cfg->verbose); +} + +int iproute2_bpf_fetch_ancillary(void) +{ + struct bpf_elf_ctx *ctx = &__ctx; + struct bpf_elf_sec_data data; + int i, ret = 0; + + for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { + ret = bpf_fill_section_data(ctx, i, &data); + if (ret < 0) + continue; + + if (data.sec_hdr.sh_type == SHT_PROGBITS && + !strcmp(data.sec_name, ELF_SECTION_MAPS)) + ret = bpf_fetch_maps_begin(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_SYMTAB && + !strcmp(data.sec_name, ".symtab")) + ret = bpf_fetch_symtab(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_STRTAB && + !strcmp(data.sec_name, ".strtab")) + ret = bpf_fetch_strtab(ctx, i, &data); + if (ret < 0) { + fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n", + i); + return ret; + } + } + + if (bpf_has_map_data(ctx)) { + ret = bpf_fetch_maps_end(ctx); + if (ret < 0) { + fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n"); + return ret; + } + } + + return ret; +} + +int iproute2_get_root_path(char *root_path, size_t len) +{ + struct bpf_elf_ctx *ctx = &__ctx; + int ret = 0; + + snprintf(root_path, len, "%s/%s", + bpf_get_work_dir(ctx->type), BPF_DIR_GLOBALS); + + ret = mkdir(root_path, S_IRWXU); + if (ret && errno != EEXIST) { + fprintf(stderr, "mkdir %s failed: %s\n", root_path, strerror(errno)); + return ret; + } + + return 0; +} + +bool iproute2_is_pin_map(const char *libbpf_map_name, char *pathname) +{ + struct bpf_elf_ctx *ctx = &__ctx; + const char *map_name, *tmp; + unsigned int pinning; + int i, ret = 0; + + for (i = 0; i < ctx->map_num; i++) { + if (ctx->maps[i].pinning == PIN_OBJECT_NS && + ctx->noafalg) { + fprintf(stderr, "Missing kernel AF_ALG support for PIN_OBJECT_NS!\n"); + return false; + } + + map_name = bpf_map_fetch_name(ctx, i); + if (!map_name) { + return false; + } + + if (strcmp(libbpf_map_name, map_name)) + continue; + + pinning = ctx->maps[i].pinning; + + if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type)) + return false; + + if (pinning == PIN_OBJECT_NS) + ret = bpf_make_obj_path(ctx); + else if ((tmp = bpf_custom_pinning(ctx, pinning))) + ret = bpf_make_custom_path(ctx, tmp); + if (ret < 0) + return false; + + bpf_make_pathname(pathname, PATH_MAX, map_name, ctx, pinning); + + return true; + } + + return false; +} + +bool iproute2_is_map_in_map(const char *libbpf_map_name, struct bpf_elf_map *imap, + struct bpf_elf_map *omap, char *omap_name) +{ + struct bpf_elf_ctx *ctx = &__ctx; + const char *inner_map_name, *outer_map_name; + int i, j; + + for (i = 0; i < ctx->map_num; i++) { + inner_map_name = bpf_map_fetch_name(ctx, i); + if (!inner_map_name) { + return false; + } + + if (strcmp(libbpf_map_name, inner_map_name)) + continue; + + if (!ctx->maps[i].id || + ctx->maps[i].inner_id || + ctx->maps[i].inner_idx == -1) + continue; + + *imap = ctx->maps[i]; + + for (j = 0; j < ctx->map_num; j++) { + if (!bpf_is_map_in_map_type(&ctx->maps[j])) + continue; + if (ctx->maps[j].inner_id != ctx->maps[i].id) + continue; + + *omap = ctx->maps[j]; + outer_map_name = bpf_map_fetch_name(ctx, j); + memcpy(omap_name, outer_map_name, strlen(outer_map_name) + 1); + + return true; + } + } + + return false; +} + +int iproute2_find_map_name_by_id(unsigned int map_id, char *name) +{ + struct bpf_elf_ctx *ctx = &__ctx; + const char *map_name; + int i, idx = -1; + + for (i = 0; i < ctx->map_num; i++) { + if (ctx->maps[i].id == map_id && + ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) { + idx = i; + break; + } + } + + if (idx < 0) + return -1; + + map_name = bpf_map_fetch_name(ctx, idx); + if (!map_name) + return -1; + + memcpy(name, map_name, strlen(map_name) + 1); + return 0; +} +#endif /* HAVE_LIBBPF */ +#endif /* HAVE_ELF */ diff --git a/lib/bpf_libbpf.c b/lib/bpf_libbpf.c new file mode 100644 index 0000000..864f8c3 --- /dev/null +++ b/lib/bpf_libbpf.c @@ -0,0 +1,349 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * bpf_libbpf.c BPF code relay on libbpf + * Authors: Hangbin Liu + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "bpf_util.h" + +static int verbose_print(enum libbpf_print_level level, const char *format, va_list args) +{ + return vfprintf(stderr, format, args); +} + +static int silent_print(enum libbpf_print_level level, const char *format, va_list args) +{ + if (level > LIBBPF_WARN) + return 0; + + /* Skip warning from bpf_object__init_user_maps() for legacy maps */ + if (strstr(format, "has unrecognized, non-zero options")) + return 0; + + return vfprintf(stderr, format, args); +} + +static const char *get_bpf_program__section_name(const struct bpf_program *prog) +{ +#ifdef HAVE_LIBBPF_SECTION_NAME + return bpf_program__section_name(prog); +#else + return bpf_program__title(prog, false); +#endif +} + +static int create_map(const char *name, struct bpf_elf_map *map, + __u32 ifindex, int inner_fd) +{ + struct bpf_create_map_attr map_attr = {}; + + map_attr.name = name; + map_attr.map_type = map->type; + map_attr.map_flags = map->flags; + map_attr.key_size = map->size_key; + map_attr.value_size = map->size_value; + map_attr.max_entries = map->max_elem; + map_attr.map_ifindex = ifindex; + map_attr.inner_map_fd = inner_fd; + + return bpf_create_map_xattr(&map_attr); +} + +static int create_map_in_map(struct bpf_object *obj, struct bpf_map *map, + struct bpf_elf_map *elf_map, int inner_fd, + bool *reuse_pin_map) +{ + char pathname[PATH_MAX]; + const char *map_name; + bool pin_map = false; + int map_fd, ret = 0; + + map_name = bpf_map__name(map); + + if (iproute2_is_pin_map(map_name, pathname)) { + pin_map = true; + + /* Check if there already has a pinned map */ + map_fd = bpf_obj_get(pathname); + if (map_fd > 0) { + if (reuse_pin_map) + *reuse_pin_map = true; + close(map_fd); + return bpf_map__set_pin_path(map, pathname); + } + } + + map_fd = create_map(map_name, elf_map, bpf_map__ifindex(map), inner_fd); + if (map_fd < 0) { + fprintf(stderr, "create map %s failed\n", map_name); + return map_fd; + } + + ret = bpf_map__reuse_fd(map, map_fd); + if (ret < 0) { + fprintf(stderr, "map %s reuse fd failed\n", map_name); + goto err_out; + } + + if (pin_map) { + ret = bpf_map__set_pin_path(map, pathname); + if (ret < 0) + goto err_out; + } + + return 0; +err_out: + close(map_fd); + return ret; +} + +static int +handle_legacy_map_in_map(struct bpf_object *obj, struct bpf_map *inner_map, + const char *inner_map_name) +{ + int inner_fd, outer_fd, inner_idx, ret = 0; + struct bpf_elf_map imap, omap; + struct bpf_map *outer_map; + /* What's the size limit of map name? */ + char outer_map_name[128]; + bool reuse_pin_map = false; + + /* Deal with map-in-map */ + if (iproute2_is_map_in_map(inner_map_name, &imap, &omap, outer_map_name)) { + ret = create_map_in_map(obj, inner_map, &imap, -1, NULL); + if (ret < 0) + return ret; + + inner_fd = bpf_map__fd(inner_map); + outer_map = bpf_object__find_map_by_name(obj, outer_map_name); + ret = create_map_in_map(obj, outer_map, &omap, inner_fd, &reuse_pin_map); + if (ret < 0) + return ret; + + if (!reuse_pin_map) { + inner_idx = imap.inner_idx; + outer_fd = bpf_map__fd(outer_map); + ret = bpf_map_update_elem(outer_fd, &inner_idx, &inner_fd, 0); + if (ret < 0) + fprintf(stderr, "Cannot update inner_idx into outer_map\n"); + } + } + + return ret; +} + +static int find_legacy_tail_calls(struct bpf_program *prog, struct bpf_object *obj) +{ + unsigned int map_id, key_id; + const char *sec_name; + struct bpf_map *map; + char map_name[128]; + int ret; + + /* Handle iproute2 tail call */ + sec_name = get_bpf_program__section_name(prog); + ret = sscanf(sec_name, "%i/%i", &map_id, &key_id); + if (ret != 2) + return -1; + + ret = iproute2_find_map_name_by_id(map_id, map_name); + if (ret < 0) { + fprintf(stderr, "unable to find map id %u for tail call\n", map_id); + return ret; + } + + map = bpf_object__find_map_by_name(obj, map_name); + if (!map) + return -1; + + /* Save the map here for later updating */ + bpf_program__set_priv(prog, map, NULL); + + return 0; +} + +static int update_legacy_tail_call_maps(struct bpf_object *obj) +{ + int prog_fd, map_fd, ret = 0; + unsigned int map_id, key_id; + struct bpf_program *prog; + const char *sec_name; + struct bpf_map *map; + + bpf_object__for_each_program(prog, obj) { + map = bpf_program__priv(prog); + if (!map) + continue; + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) + continue; + + sec_name = get_bpf_program__section_name(prog); + ret = sscanf(sec_name, "%i/%i", &map_id, &key_id); + if (ret != 2) + continue; + + map_fd = bpf_map__fd(map); + ret = bpf_map_update_elem(map_fd, &key_id, &prog_fd, 0); + if (ret < 0) { + fprintf(stderr, "Cannot update map key for tail call!\n"); + return ret; + } + } + + return 0; +} + +static int handle_legacy_maps(struct bpf_object *obj) +{ + char pathname[PATH_MAX]; + struct bpf_map *map; + const char *map_name; + int map_fd, ret = 0; + + bpf_object__for_each_map(map, obj) { + map_name = bpf_map__name(map); + + ret = handle_legacy_map_in_map(obj, map, map_name); + if (ret) + return ret; + + /* If it is a iproute2 legacy pin maps, just set pin path + * and let bpf_object__load() to deal with the map creation. + * We need to ignore map-in-maps which have pinned maps manually + */ + map_fd = bpf_map__fd(map); + if (map_fd < 0 && iproute2_is_pin_map(map_name, pathname)) { + ret = bpf_map__set_pin_path(map, pathname); + if (ret) { + fprintf(stderr, "map '%s': couldn't set pin path.\n", map_name); + break; + } + } + + } + + return ret; +} + +static int load_bpf_object(struct bpf_cfg_in *cfg) +{ + struct bpf_program *p, *prog = NULL; + struct bpf_object *obj; + char root_path[PATH_MAX]; + struct bpf_map *map; + int prog_fd, ret = 0; + + ret = iproute2_get_root_path(root_path, PATH_MAX); + if (ret) + return ret; + + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts, + .relaxed_maps = true, + .pin_root_path = root_path, + ); + + obj = bpf_object__open_file(cfg->object, &open_opts); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return -ENOENT; + } + + bpf_object__for_each_program(p, obj) { + /* Only load the programs that will either be subsequently + * attached or inserted into a tail call map */ + if (find_legacy_tail_calls(p, obj) < 0 && cfg->section && + strcmp(get_bpf_program__section_name(p), cfg->section)) { + ret = bpf_program__set_autoload(p, false); + if (ret) + return -EINVAL; + continue; + } + + bpf_program__set_type(p, cfg->type); + bpf_program__set_ifindex(p, cfg->ifindex); + if (!prog) + prog = p; + } + + bpf_object__for_each_map(map, obj) { + if (!bpf_map__is_offload_neutral(map)) + bpf_map__set_ifindex(map, cfg->ifindex); + } + + if (!prog) { + fprintf(stderr, "object file doesn't contain sec %s\n", cfg->section); + return -ENOENT; + } + + /* Handle iproute2 legacy pin maps and map-in-maps */ + ret = handle_legacy_maps(obj); + if (ret) + goto unload_obj; + + ret = bpf_object__load(obj); + if (ret) + goto unload_obj; + + ret = update_legacy_tail_call_maps(obj); + if (ret) + goto unload_obj; + + prog_fd = fcntl(bpf_program__fd(prog), F_DUPFD_CLOEXEC, 1); + if (prog_fd < 0) + ret = -errno; + else + cfg->prog_fd = prog_fd; + +unload_obj: + /* Close obj as we don't need it */ + bpf_object__close(obj); + return ret; +} + +/* Load ebpf and return prog fd */ +int iproute2_load_libbpf(struct bpf_cfg_in *cfg) +{ + int ret = 0; + + if (cfg->verbose) + libbpf_set_print(verbose_print); + else + libbpf_set_print(silent_print); + + ret = iproute2_bpf_elf_ctx_init(cfg); + if (ret < 0) { + fprintf(stderr, "Cannot initialize ELF context!\n"); + return ret; + } + + ret = iproute2_bpf_fetch_ancillary(); + if (ret < 0) { + fprintf(stderr, "Error fetching ELF ancillary data!\n"); + return ret; + } + + ret = load_bpf_object(cfg); + if (ret) + return ret; + + return cfg->prog_fd; +} diff --git a/lib/fs.c b/lib/fs.c index 4b90a70..ee0b130 100644 --- a/lib/fs.c +++ b/lib/fs.c @@ -157,7 +157,8 @@ __u64 get_cgroup2_id(const char *path) memcpy(cg_id.bytes, fhp->f_handle, sizeof(__u64)); out: - close(mnt_fd); + if (mnt_fd >= 0) + close(mnt_fd); free(mnt); return cg_id.id; @@ -179,16 +180,16 @@ char *get_cgroup2_path(__u64 id, bool full) char *path = NULL; char fd_path[64]; int link_len; - char *mnt; + char *mnt = NULL; if (!id) { fprintf(stderr, "Invalid cgroup2 ID\n"); - return NULL; + goto out; } mnt = find_cgroup2_mount(false); if (!mnt) - return NULL; + goto out; mnt_fd = open(mnt, O_RDONLY); if (mnt_fd < 0) { @@ -225,8 +226,10 @@ char *get_cgroup2_path(__u64 id, bool full) "Failed to allocate memory for cgroup2 path\n"); out: - close(fd); - close(mnt_fd); + if (fd >= 0) + close(fd); + if (mnt_fd >= 0) + close(mnt_fd); free(mnt); return path; @@ -253,7 +256,7 @@ int make_path(const char *path, mode_t mode) *delim = '\0'; rc = mkdir(dir, mode); - if (mkdir(dir, mode) != 0 && errno != EEXIST) { + if (rc && errno != EEXIST) { fprintf(stderr, "mkdir failed for %s: %s\n", dir, strerror(errno)); goto out; diff --git a/lib/json_print.c b/lib/json_print.c index fe0705b..994a2f8 100644 --- a/lib/json_print.c +++ b/lib/json_print.c @@ -17,9 +17,6 @@ static json_writer_t *_jw; -#define _IS_JSON_CONTEXT(type) ((type & PRINT_JSON || type & PRINT_ANY) && _jw) -#define _IS_FP_CONTEXT(type) (!_jw && (type & PRINT_FP || type & PRINT_ANY)) - static void __new_json_obj(int json, bool have_array) { if (json) { @@ -191,11 +188,12 @@ int print_color_string(enum output_type type, * a value to it, you will need to use "is_json_context()" to have different * branch for json and regular output. grep -r "print_bool" for example */ -int print_color_bool(enum output_type type, - enum color_attr color, - const char *key, - const char *fmt, - bool value) +static int __print_color_bool(enum output_type type, + enum color_attr color, + const char *key, + const char *fmt, + bool value, + const char *str) { int ret = 0; @@ -205,13 +203,32 @@ int print_color_bool(enum output_type type, else jsonw_bool(_jw, value); } else if (_IS_FP_CONTEXT(type)) { - ret = color_fprintf(stdout, color, fmt, - value ? "true" : "false"); + ret = color_fprintf(stdout, color, fmt, str); } return ret; } +int print_color_bool(enum output_type type, + enum color_attr color, + const char *key, + const char *fmt, + bool value) +{ + return __print_color_bool(type, color, key, fmt, value, + value ? "true" : "false"); +} + +int print_color_on_off(enum output_type type, + enum color_attr color, + const char *key, + const char *fmt, + bool value) +{ + return __print_color_bool(type, color, key, fmt, value, + value ? "on" : "off"); +} + /* * In JSON context uses hardcode %#x format: 42 -> 0x2a */ @@ -288,3 +305,36 @@ void print_nl(void) if (!_jw) printf("%s", _SL_); } + +int print_color_rate(bool use_iec, enum output_type type, enum color_attr color, + const char *key, const char *fmt, unsigned long long rate) +{ + unsigned long kilo = use_iec ? 1024 : 1000; + const char *str = use_iec ? "i" : ""; + static char *units[5] = {"", "K", "M", "G", "T"}; + char *buf; + int rc; + int i; + + if (_IS_JSON_CONTEXT(type)) + return print_color_lluint(type, color, key, "%llu", rate); + + rate <<= 3; /* bytes/sec -> bits/sec */ + + for (i = 0; i < ARRAY_SIZE(units) - 1; i++) { + if (rate < kilo) + break; + if (((rate % kilo) != 0) && rate < 1000*kilo) + break; + rate /= kilo; + } + + rc = asprintf(&buf, "%.0f%s%sbit", (double)rate, units[i], + i > 0 ? str : ""); + if (rc < 0) + return -1; + + rc = print_color_string(type, color, key, fmt, buf); + free(buf); + return rc; +} diff --git a/lib/json_print_math.c b/lib/json_print_math.c new file mode 100644 index 0000000..f4d5049 --- /dev/null +++ b/lib/json_print_math.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include +#include +#include + +#include "utils.h" +#include "json_print.h" + +char *sprint_size(__u32 sz, char *buf) +{ + long kilo = 1024; + long mega = kilo * kilo; + size_t len = SPRINT_BSIZE - 1; + double tmp = sz; + + if (sz >= mega && fabs(mega * rint(tmp / mega) - sz) < 1024) + snprintf(buf, len, "%gMb", rint(tmp / mega)); + else if (sz >= kilo && fabs(kilo * rint(tmp / kilo) - sz) < 16) + snprintf(buf, len, "%gKb", rint(tmp / kilo)); + else + snprintf(buf, len, "%ub", sz); + + return buf; +} + +int print_color_size(enum output_type type, enum color_attr color, + const char *key, const char *fmt, __u32 sz) +{ + SPRINT_BUF(buf); + + if (_IS_JSON_CONTEXT(type)) + return print_color_uint(type, color, key, "%u", sz); + + sprint_size(sz, buf); + return print_color_string(type, color, key, fmt, buf); +} diff --git a/lib/libnetlink.c b/lib/libnetlink.c index e02d629..c958aa5 100644 --- a/lib/libnetlink.c +++ b/lib/libnetlink.c @@ -28,8 +28,11 @@ #include #include "libnetlink.h" +#include "utils.h" +#ifndef __aligned #define __aligned(x) __attribute__((aligned(x))) +#endif #ifndef SOL_NETLINK #define SOL_NETLINK 270 @@ -1440,3 +1443,75 @@ int __parse_rtattr_nested_compat(struct rtattr *tb[], int max, memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); return 0; } + +static const char *get_nla_type_str(unsigned int attr) +{ + switch (attr) { +#define C(x) case NL_ATTR_TYPE_ ## x: return #x + C(U8); + C(U16); + C(U32); + C(U64); + C(STRING); + C(FLAG); + C(NESTED); + C(NESTED_ARRAY); + C(NUL_STRING); + C(BINARY); + C(S8); + C(S16); + C(S32); + C(S64); + C(BITFIELD32); + default: + return "unknown"; + } +} + +void nl_print_policy(const struct rtattr *attr, FILE *fp) +{ + const struct rtattr *pos; + + rtattr_for_each_nested(pos, attr) { + const struct rtattr *attr; + + fprintf(fp, " policy[%u]:", pos->rta_type & ~NLA_F_NESTED); + + rtattr_for_each_nested(attr, pos) { + struct rtattr *tp[NL_POLICY_TYPE_ATTR_MAX + 1]; + + parse_rtattr_nested(tp, ARRAY_SIZE(tp) - 1, attr); + + if (tp[NL_POLICY_TYPE_ATTR_TYPE]) + fprintf(fp, "attr[%u]: type=%s", + attr->rta_type & ~NLA_F_NESTED, + get_nla_type_str(rta_getattr_u32(tp[NL_POLICY_TYPE_ATTR_TYPE]))); + + if (tp[NL_POLICY_TYPE_ATTR_POLICY_IDX]) + fprintf(fp, " policy:%u", + rta_getattr_u32(tp[NL_POLICY_TYPE_ATTR_POLICY_IDX])); + + if (tp[NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE]) + fprintf(fp, " maxattr:%u", + rta_getattr_u32(tp[NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE])); + + if (tp[NL_POLICY_TYPE_ATTR_MIN_VALUE_S] && tp[NL_POLICY_TYPE_ATTR_MAX_VALUE_S]) + fprintf(fp, " range:[%lld,%lld]", + (signed long long)rta_getattr_u64(tp[NL_POLICY_TYPE_ATTR_MIN_VALUE_S]), + (signed long long)rta_getattr_u64(tp[NL_POLICY_TYPE_ATTR_MAX_VALUE_S])); + + if (tp[NL_POLICY_TYPE_ATTR_MIN_VALUE_U] && tp[NL_POLICY_TYPE_ATTR_MAX_VALUE_U]) + fprintf(fp, " range:[%llu,%llu]", + (unsigned long long)rta_getattr_u64(tp[NL_POLICY_TYPE_ATTR_MIN_VALUE_U]), + (unsigned long long)rta_getattr_u64(tp[NL_POLICY_TYPE_ATTR_MAX_VALUE_U])); + + if (tp[NL_POLICY_TYPE_ATTR_MIN_LENGTH]) + fprintf(fp, " min len:%u", + rta_getattr_u32(tp[NL_POLICY_TYPE_ATTR_MIN_LENGTH])); + + if (tp[NL_POLICY_TYPE_ATTR_MAX_LENGTH]) + fprintf(fp, " max len:%u", + rta_getattr_u32(tp[NL_POLICY_TYPE_ATTR_MAX_LENGTH])); + } + } +} diff --git a/lib/ll_proto.c b/lib/ll_proto.c index 2a0c1cb..7817931 100644 --- a/lib/ll_proto.c +++ b/lib/ll_proto.c @@ -80,6 +80,7 @@ __PF(8021Q,802.1Q) __PF(8021AD,802.1ad) __PF(MPLS_UC,mpls_uc) __PF(MPLS_MC,mpls_mc) +__PF(TEB,teb) { 0x8100, "802.1Q" }, { 0x88cc, "LLDP" }, diff --git a/lib/mnl_utils.c b/lib/mnl_utils.c new file mode 100644 index 0000000..4f69945 --- /dev/null +++ b/lib/mnl_utils.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * mnl_utils.c Helpers for working with libmnl. + */ + +#include +#include +#include +#include +#include + +#include "libnetlink.h" +#include "mnl_utils.h" +#include "utils.h" + +struct mnl_socket *mnlu_socket_open(int bus) +{ + struct mnl_socket *nl; + int one = 1; + + nl = mnl_socket_open(bus); + if (nl == NULL) + return NULL; + + mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &one, sizeof(one)); + mnl_socket_setsockopt(nl, NETLINK_EXT_ACK, &one, sizeof(one)); + + if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) + goto err_bind; + + return nl; + +err_bind: + mnl_socket_close(nl); + return NULL; +} + +struct nlmsghdr *mnlu_msg_prepare(void *buf, uint32_t nlmsg_type, uint16_t flags, + void *extra_header, size_t extra_header_size) +{ + struct nlmsghdr *nlh; + void *eh; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = nlmsg_type; + nlh->nlmsg_flags = flags; + nlh->nlmsg_seq = time(NULL); + + eh = mnl_nlmsg_put_extra_header(nlh, extra_header_size); + memcpy(eh, extra_header, extra_header_size); + + return nlh; +} + +static int mnlu_cb_noop(const struct nlmsghdr *nlh, void *data) +{ + return MNL_CB_OK; +} + +static int mnlu_cb_error(const struct nlmsghdr *nlh, void *data) +{ + const struct nlmsgerr *err = mnl_nlmsg_get_payload(nlh); + + /* Netlink subsystems returns the errno value with different signess */ + if (err->error < 0) + errno = -err->error; + else + errno = err->error; + + if (nl_dump_ext_ack(nlh, NULL)) + return MNL_CB_ERROR; + + return err->error == 0 ? MNL_CB_STOP : MNL_CB_ERROR; +} + +static int mnlu_cb_stop(const struct nlmsghdr *nlh, void *data) +{ + int len = *(int *)NLMSG_DATA(nlh); + + if (len < 0) { + errno = -len; + nl_dump_ext_ack_done(nlh, len); + return MNL_CB_ERROR; + } + return MNL_CB_STOP; +} + +static mnl_cb_t mnlu_cb_array[NLMSG_MIN_TYPE] = { + [NLMSG_NOOP] = mnlu_cb_noop, + [NLMSG_ERROR] = mnlu_cb_error, + [NLMSG_DONE] = mnlu_cb_stop, + [NLMSG_OVERRUN] = mnlu_cb_noop, +}; + +int mnlu_socket_recv_run(struct mnl_socket *nl, unsigned int seq, void *buf, size_t buf_size, + mnl_cb_t cb, void *data) +{ + unsigned int portid = mnl_socket_get_portid(nl); + int err; + + do { + err = mnl_socket_recvfrom(nl, buf, buf_size); + if (err <= 0) + break; + err = mnl_cb_run2(buf, err, seq, portid, + cb, data, mnlu_cb_array, + ARRAY_SIZE(mnlu_cb_array)); + } while (err > 0); + + return err; +} + +static int get_family_id_attr_cb(const struct nlattr *attr, void *data) +{ + int type = mnl_attr_get_type(attr); + const struct nlattr **tb = data; + + if (mnl_attr_type_valid(attr, CTRL_ATTR_MAX) < 0) + return MNL_CB_ERROR; + + if (type == CTRL_ATTR_FAMILY_ID && + mnl_attr_validate(attr, MNL_TYPE_U16) < 0) + return MNL_CB_ERROR; + tb[type] = attr; + return MNL_CB_OK; +} + +static int get_family_id_cb(const struct nlmsghdr *nlh, void *data) +{ + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + struct nlattr *tb[CTRL_ATTR_MAX + 1] = {}; + uint32_t *p_id = data; + + mnl_attr_parse(nlh, sizeof(*genl), get_family_id_attr_cb, tb); + if (!tb[CTRL_ATTR_FAMILY_ID]) + return MNL_CB_ERROR; + *p_id = mnl_attr_get_u16(tb[CTRL_ATTR_FAMILY_ID]); + return MNL_CB_OK; +} + +static int family_get(struct mnlu_gen_socket *nlg, const char *family_name) +{ + struct genlmsghdr hdr = {}; + struct nlmsghdr *nlh; + int err; + + hdr.cmd = CTRL_CMD_GETFAMILY; + hdr.version = 0x1; + + nlh = mnlu_msg_prepare(nlg->buf, GENL_ID_CTRL, + NLM_F_REQUEST | NLM_F_ACK, + &hdr, sizeof(hdr)); + + mnl_attr_put_strz(nlh, CTRL_ATTR_FAMILY_NAME, family_name); + + err = mnl_socket_sendto(nlg->nl, nlh, nlh->nlmsg_len); + if (err < 0) + return err; + + err = mnlu_socket_recv_run(nlg->nl, nlh->nlmsg_seq, nlg->buf, + MNL_SOCKET_BUFFER_SIZE, + get_family_id_cb, &nlg->family); + return err; +} + +int mnlu_gen_socket_open(struct mnlu_gen_socket *nlg, const char *family_name, + uint8_t version) +{ + int err; + + nlg->buf = malloc(MNL_SOCKET_BUFFER_SIZE); + if (!nlg->buf) + goto err_buf_alloc; + + nlg->nl = mnlu_socket_open(NETLINK_GENERIC); + if (!nlg->nl) + goto err_socket_open; + + err = family_get(nlg, family_name); + if (err) + goto err_socket; + + return 0; + +err_socket: + mnl_socket_close(nlg->nl); +err_socket_open: + free(nlg->buf); +err_buf_alloc: + return -1; +} + +void mnlu_gen_socket_close(struct mnlu_gen_socket *nlg) +{ + mnl_socket_close(nlg->nl); + free(nlg->buf); +} + +struct nlmsghdr *mnlu_gen_socket_cmd_prepare(struct mnlu_gen_socket *nlg, + uint8_t cmd, uint16_t flags) +{ + struct genlmsghdr hdr = {}; + struct nlmsghdr *nlh; + + hdr.cmd = cmd; + hdr.version = nlg->version; + nlh = mnlu_msg_prepare(nlg->buf, nlg->family, flags, &hdr, sizeof(hdr)); + nlg->seq = nlh->nlmsg_seq; + return nlh; +} + +int mnlu_gen_socket_sndrcv(struct mnlu_gen_socket *nlg, const struct nlmsghdr *nlh, + mnl_cb_t data_cb, void *data) +{ + int err; + + err = mnl_socket_sendto(nlg->nl, nlh, nlh->nlmsg_len); + if (err < 0) { + perror("Failed to send data"); + return -errno; + } + + err = mnlu_socket_recv_run(nlg->nl, nlh->nlmsg_seq, nlg->buf, + MNL_SOCKET_BUFFER_SIZE, + data_cb, data); + if (err < 0) { + fprintf(stderr, "kernel answers: %s\n", strerror(errno)); + return -errno; + } + return 0; +} diff --git a/lib/namespace.c b/lib/namespace.c index 06ae0a4..45a7ded 100644 --- a/lib/namespace.c +++ b/lib/namespace.c @@ -122,8 +122,14 @@ int netns_foreach(int (*func)(char *nsname, void *arg), void *arg) struct dirent *entry; dir = opendir(NETNS_RUN_DIR); - if (!dir) + if (!dir) { + if (errno == ENOENT) + return 0; + + fprintf(stderr, "Failed to open directory %s: %s\n", + NETNS_RUN_DIR, strerror(errno)); return -1; + } while ((entry = readdir(dir)) != NULL) { if (strcmp(entry->d_name, ".") == 0) diff --git a/lib/rt_names.c b/lib/rt_names.c index c40d2e7..b976471 100644 --- a/lib/rt_names.c +++ b/lib/rt_names.c @@ -479,18 +479,30 @@ static void rtnl_rtdsfield_initialize(void) const char *rtnl_dsfield_n2a(int id, char *buf, int len) { + const char *name; + if (id < 0 || id >= 256) { snprintf(buf, len, "%d", id); return buf; } + if (!numeric) { + name = rtnl_dsfield_get_name(id); + if (name != NULL) + return name; + } + snprintf(buf, len, "0x%02x", id); + return buf; +} + +const char *rtnl_dsfield_get_name(int id) +{ + if (id < 0 || id >= 256) + return NULL; if (!rtnl_rtdsfield_tab[id]) { if (!rtnl_rtdsfield_init) rtnl_rtdsfield_initialize(); } - if (!numeric && rtnl_rtdsfield_tab[id]) - return rtnl_rtdsfield_tab[id]; - snprintf(buf, len, "0x%02x", id); - return buf; + return rtnl_rtdsfield_tab[id]; } @@ -682,3 +694,95 @@ int nl_proto_a2n(__u32 *id, const char *arg) *id = res; return 0; } + +#define PROTODOWN_REASON_NUM_BITS 32 +static char *protodown_reason_tab[PROTODOWN_REASON_NUM_BITS] = { +}; + +static int protodown_reason_init; + +static void protodown_reason_initialize(void) +{ + struct dirent *de; + DIR *d; + + protodown_reason_init = 1; + + d = opendir(CONFDIR "/protodown_reasons.d"); + if (!d) + return; + + while ((de = readdir(d)) != NULL) { + char path[PATH_MAX]; + size_t len; + + if (*de->d_name == '.') + continue; + + /* only consider filenames ending in '.conf' */ + len = strlen(de->d_name); + if (len <= 5) + continue; + if (strcmp(de->d_name + len - 5, ".conf")) + continue; + + snprintf(path, sizeof(path), CONFDIR "/protodown_reasons.d/%s", + de->d_name); + rtnl_tab_initialize(path, protodown_reason_tab, + PROTODOWN_REASON_NUM_BITS); + } + closedir(d); +} + +int protodown_reason_n2a(int id, char *buf, int len) +{ + if (id < 0 || id >= PROTODOWN_REASON_NUM_BITS) + return -1; + + if (numeric) { + snprintf(buf, len, "%d", id); + return 0; + } + + if (!protodown_reason_init) + protodown_reason_initialize(); + + if (protodown_reason_tab[id]) + snprintf(buf, len, "%s", protodown_reason_tab[id]); + else + snprintf(buf, len, "%d", id); + + return 0; +} + +int protodown_reason_a2n(__u32 *id, const char *arg) +{ + static char *cache; + static unsigned long res; + char *end; + int i; + + if (cache && strcmp(cache, arg) == 0) { + *id = res; + return 0; + } + + if (!protodown_reason_init) + protodown_reason_initialize(); + + for (i = 0; i < PROTODOWN_REASON_NUM_BITS; i++) { + if (protodown_reason_tab[i] && + strcmp(protodown_reason_tab[i], arg) == 0) { + cache = protodown_reason_tab[i]; + res = i; + *id = res; + return 0; + } + } + + res = strtoul(arg, &end, 0); + if (!end || end == arg || *end || res >= PROTODOWN_REASON_NUM_BITS) + return -1; + *id = res; + return 0; +} diff --git a/lib/utils.c b/lib/utils.c index c98021d..93ae0c5 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -1695,3 +1695,251 @@ char *sprint_time64(__s64 time, char *buf) print_time64(buf, SPRINT_BSIZE-1, time); return buf; } + +int do_batch(const char *name, bool force, + int (*cmd)(int argc, char *argv[], void *data), void *data) +{ + char *line = NULL; + size_t len = 0; + int ret = EXIT_SUCCESS; + + if (name && strcmp(name, "-") != 0) { + if (freopen(name, "r", stdin) == NULL) { + fprintf(stderr, + "Cannot open file \"%s\" for reading: %s\n", + name, strerror(errno)); + return EXIT_FAILURE; + } + } + + cmdlineno = 0; + while (getcmdline(&line, &len, stdin) != -1) { + char *largv[100]; + int largc; + + largc = makeargs(line, largv, 100); + if (!largc) + continue; /* blank line */ + + if (cmd(largc, largv, data)) { + fprintf(stderr, "Command failed %s:%d\n", + name, cmdlineno); + ret = EXIT_FAILURE; + if (!force) + break; + } + } + + if (line) + free(line); + + return ret; +} + +int parse_one_of(const char *msg, const char *realval, const char * const *list, + size_t len, int *p_err) +{ + int i; + + for (i = 0; i < len; i++) { + if (list[i] && matches(realval, list[i]) == 0) { + *p_err = 0; + return i; + } + } + + fprintf(stderr, "Error: argument of \"%s\" must be one of ", msg); + for (i = 0; i < len; i++) + if (list[i]) + fprintf(stderr, "\"%s\", ", list[i]); + fprintf(stderr, "not \"%s\"\n", realval); + *p_err = -EINVAL; + return 0; +} + +bool parse_on_off(const char *msg, const char *realval, int *p_err) +{ + static const char * const values_on_off[] = { "off", "on" }; + + return parse_one_of(msg, realval, values_on_off, ARRAY_SIZE(values_on_off), p_err); +} + +int parse_mapping_gen(int *argcp, char ***argvp, + int (*key_cb)(__u32 *keyp, const char *key), + int (*mapping_cb)(__u32 key, char *value, void *data), + void *mapping_cb_data) +{ + int argc = *argcp; + char **argv = *argvp; + int ret = 0; + + while (argc > 0) { + char *colon = strchr(*argv, ':'); + __u32 key; + + if (!colon) + break; + *colon = '\0'; + + if (key_cb(&key, *argv)) { + ret = 1; + break; + } + if (mapping_cb(key, colon + 1, mapping_cb_data)) { + ret = 1; + break; + } + + argc--, argv++; + } + + *argcp = argc; + *argvp = argv; + return ret; +} + +static int parse_mapping_num(__u32 *keyp, const char *key) +{ + return get_u32(keyp, key, 0); +} + +int parse_mapping_num_all(__u32 *keyp, const char *key) +{ + if (matches(key, "all") == 0) { + *keyp = (__u32) -1; + return 0; + } + return parse_mapping_num(keyp, key); +} + +int parse_mapping(int *argcp, char ***argvp, bool allow_all, + int (*mapping_cb)(__u32 key, char *value, void *data), + void *mapping_cb_data) +{ + if (allow_all) + return parse_mapping_gen(argcp, argvp, parse_mapping_num_all, + mapping_cb, mapping_cb_data); + else + return parse_mapping_gen(argcp, argvp, parse_mapping_num, + mapping_cb, mapping_cb_data); +} + +int str_map_lookup_str(const struct str_num_map *map, const char *needle) +{ + if (!needle) + return -EINVAL; + + /* Process array which is NULL terminated by the string. */ + while (map && map->str) { + if (strcmp(map->str, needle) == 0) + return map->num; + + map++; + } + return -EINVAL; +} + +const char *str_map_lookup_uint(const struct str_num_map *map, unsigned int val) +{ + unsigned int num = val; + + while (map && map->str) { + if (num == map->num) + return map->str; + + map++; + } + return NULL; +} + +const char *str_map_lookup_u16(const struct str_num_map *map, uint16_t val) +{ + unsigned int num = val; + + while (map && map->str) { + if (num == map->num) + return map->str; + + map++; + } + return NULL; +} + +const char *str_map_lookup_u8(const struct str_num_map *map, uint8_t val) +{ + unsigned int num = val; + + while (map && map->str) { + if (num == map->num) + return map->str; + + map++; + } + return NULL; +} + +unsigned int get_str_char_count(const char *str, int match) +{ + unsigned int count = 0; + const char *pos = str; + + while ((pos = strchr(pos, match))) { + count++; + pos++; + } + return count; +} + +int str_split_by_char(char *str, char **before, char **after, int match) +{ + char *slash; + + slash = strrchr(str, match); + if (!slash) + return -EINVAL; + *slash = '\0'; + *before = str; + *after = slash + 1; + return 0; +} + +struct indent_mem *alloc_indent_mem(void) +{ + struct indent_mem *mem = malloc(sizeof(*mem)); + + if (!mem) + return NULL; + strcpy(mem->indent_str, ""); + mem->indent_level = 0; + return mem; +} + +void free_indent_mem(struct indent_mem *mem) +{ + free(mem); +} + +#define INDENT_STR_STEP 2 + +void inc_indent(struct indent_mem *mem) +{ + if (mem->indent_level + INDENT_STR_STEP > INDENT_STR_MAXLEN) + return; + mem->indent_level += INDENT_STR_STEP; + memset(mem->indent_str, ' ', sizeof(mem->indent_str)); + mem->indent_str[mem->indent_level] = '\0'; +} + +void dec_indent(struct indent_mem *mem) +{ + if (mem->indent_level - INDENT_STR_STEP < 0) + return; + mem->indent_level -= INDENT_STR_STEP; + mem->indent_str[mem->indent_level] = '\0'; +} + +void print_indent(struct indent_mem *mem) +{ + if (mem->indent_level) + printf("%s", mem->indent_str); +} diff --git a/lib/utils_math.c b/lib/utils_math.c new file mode 100644 index 0000000..9ef3dd6 --- /dev/null +++ b/lib/utils_math.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include +#include +#include +#include +#include + +#include "utils.h" + +/* See http://physics.nist.gov/cuu/Units/binary.html */ +static const struct rate_suffix { + const char *name; + double scale; +} suffixes[] = { + { "bit", 1. }, + { "Kibit", 1024. }, + { "kbit", 1000. }, + { "mibit", 1024.*1024. }, + { "mbit", 1000000. }, + { "gibit", 1024.*1024.*1024. }, + { "gbit", 1000000000. }, + { "tibit", 1024.*1024.*1024.*1024. }, + { "tbit", 1000000000000. }, + { "Bps", 8. }, + { "KiBps", 8.*1024. }, + { "KBps", 8000. }, + { "MiBps", 8.*1024*1024. }, + { "MBps", 8000000. }, + { "GiBps", 8.*1024.*1024.*1024. }, + { "GBps", 8000000000. }, + { "TiBps", 8.*1024.*1024.*1024.*1024. }, + { "TBps", 8000000000000. }, + { NULL } +}; + +int get_rate(unsigned int *rate, const char *str) +{ + char *p; + double bps = strtod(str, &p); + const struct rate_suffix *s; + + if (p == str) + return -1; + + for (s = suffixes; s->name; ++s) { + if (strcasecmp(s->name, p) == 0) { + bps *= s->scale; + p += strlen(p); + break; + } + } + + if (*p) + return -1; /* unknown suffix */ + + bps /= 8; /* -> bytes per second */ + *rate = bps; + /* detect if an overflow happened */ + if (*rate != floor(bps)) + return -1; + return 0; +} + +int get_rate64(__u64 *rate, const char *str) +{ + char *p; + double bps = strtod(str, &p); + const struct rate_suffix *s; + + if (p == str) + return -1; + + for (s = suffixes; s->name; ++s) { + if (strcasecmp(s->name, p) == 0) { + bps *= s->scale; + p += strlen(p); + break; + } + } + + if (*p) + return -1; /* unknown suffix */ + + bps /= 8; /* -> bytes per second */ + *rate = bps; + return 0; +} + +int get_size(unsigned int *size, const char *str) +{ + double sz; + char *p; + + sz = strtod(str, &p); + if (p == str) + return -1; + + if (*p) { + if (strcasecmp(p, "kb") == 0 || strcasecmp(p, "k") == 0) + sz *= 1024; + else if (strcasecmp(p, "gb") == 0 || strcasecmp(p, "g") == 0) + sz *= 1024*1024*1024; + else if (strcasecmp(p, "gbit") == 0) + sz *= 1024*1024*1024/8; + else if (strcasecmp(p, "mb") == 0 || strcasecmp(p, "m") == 0) + sz *= 1024*1024; + else if (strcasecmp(p, "mbit") == 0) + sz *= 1024*1024/8; + else if (strcasecmp(p, "kbit") == 0) + sz *= 1024/8; + else if (strcasecmp(p, "b") != 0) + return -1; + } + + *size = sz; + + /* detect if an overflow happened */ + if (*size != floor(sz)) + return -1; + + return 0; +} diff --git a/man/man8/bridge.8 b/man/man8/bridge.8 index b060057..9d8663b 100644 --- a/man/man8/bridge.8 +++ b/man/man8/bridge.8 @@ -116,6 +116,8 @@ bridge \- show / manipulate bridge addresses and devices .I PORT .B grp .IR GROUP " [ " +.B src +.IR SOURCE " ] [ " .BR permanent " | " temp " ] [ " .B vid .IR VID " ] " @@ -395,7 +397,8 @@ bridge FDB. .TP .BR "flood on " or " flood off " -Controls whether a given port will flood unicast traffic for which there is no FDB entry. By default this flag is on. +Controls whether unicast traffic for which there is no FDB entry will be +flooded towards this given port. By default this flag is on. .TP .B hwmode @@ -411,8 +414,8 @@ switch. .TP .BR "mcast_flood on " or " mcast_flood off " -Controls whether a given port will flood multicast traffic for which -there is no MDB entry. By default this flag is on. +Controls whether multicast traffic for which there is no MDB entry will be +flooded towards this given port. By default this flag is on. .TP .BR "mcast_to_unicast on " or " mcast_to_unicast off " @@ -512,7 +515,14 @@ the Ethernet MAC address. the interface to which this address is associated. .B local -- is a local permanent fdb entry +- is a local permanent fdb entry, which means that the bridge will not forward +frames with this destination MAC address and VLAN ID, but terminate them +locally. This flag is default unless "static" or "dynamic" are explicitly +specified. +.sp + +.B permanent +- this is a synonym for "local" .sp .B static @@ -524,11 +534,21 @@ the interface to which this address is associated. .sp .B self -- the address is associated with the port drivers fdb. Usually hardware. +- the operation is fulfilled directly by the driver for the specified network +device. If the network device belongs to a master like a bridge, then the +bridge is bypassed and not notified of this operation (and if the device does +notify the bridge, it is driver-specific behavior and not mandated by this +flag, check the driver for more details). The "bridge fdb add" command can also +be used on the bridge device itself, and in this case, the added fdb entries +will be locally terminated (not forwarded). In the latter case, the "self" flag +is mandatory. The flag is set by default if "master" is not specified. .sp .B master -- the address is associated with master devices fdb. Usually software (default). +- if the specified network device is a port that belongs to a master device +such as a bridge, the operation is fulfilled by the master device's driver, +which may in turn notify the port driver too of the address. If the specified +device is a master itself, such as a bridge, this flag is invalid. .sp .B router @@ -663,7 +683,7 @@ the bridge to which this address is associated. .SH bridge mdb - multicast group database management .B mdb -objects contain known IP multicast group addresses on a link. +objects contain known IP or L2 multicast group addresses on a link. .P The corresponding commands display mdb entries, add new entries, @@ -683,11 +703,11 @@ the port whose link is known to have members of this multicast group. .TP .BI grp " GROUP" -the IP multicast group address whose members reside on the link connected to -the port. +the multicast group address (IPv4, IPv6 or L2 multicast) whose members reside +on the link connected to the port. .B permanent -- the mdb entry is permanent +- the mdb entry is permanent. Optional for IPv4 and IPv6, mandatory for L2. .sp .B temp @@ -695,6 +715,12 @@ the port. .sp .TP +.BI src " SOURCE" +optional source IP address of a sender for this multicast group. If IGMPv3 for IPv4, or +MLDv2 for IPv6 respectively, are enabled it will be included in the lookup when +forwarding multicast traffic. + +.TP .BI vid " VID" the VLAN ID which is known to have members of this multicast group. diff --git a/man/man8/dcb-app.8 b/man/man8/dcb-app.8 new file mode 100644 index 0000000..23fd337 --- /dev/null +++ b/man/man8/dcb-app.8 @@ -0,0 +1,237 @@ +.TH DCB-ETS 8 "6 December 2020" "iproute2" "Linux" +.SH NAME +dcb-app \- show / manipulate application priority table of +the DCB (Data Center Bridging) subsystem +.SH SYNOPSIS +.sp +.ad l +.in +8 + +.ti -8 +.B dcb +.RI "[ " OPTIONS " ] " +.B app +.RI "{ " COMMAND " | " help " }" +.sp + +.ti -8 +.B dcb app " { " show " | " flush " } " dev +.RI DEV +.RB "[ " default-prio " ]" +.RB "[ " ethtype-prio " ]" +.RB "[ " stream-port-prio " ]" +.RB "[ " dgram-port-prio " ]" +.RB "[ " port-prio " ]" +.RB "[ " dscp-prio " ]" + +.ti -8 +.B dcb ets " { " add " | " del " | " replace " } " dev +.RI DEV +.RB "[ " default-prio " " \fIPRIO-LIST\fB " ]" +.RB "[ " ethtype-prio " " \fIET-MAP\fB " ]" +.RB "[ " stream-port-prio " " \fIPORT-MAP\fB " ]" +.RB "[ " dgram-port-prio " " \fIPORT-MAP\fB " ]" +.RB "[ " port-prio " " \fIPORT-MAP\fB " ]" +.RB "[ " dscp-prio " " \fIDSCP-MAP\fB " ]" + +.ti -8 +.IR PRIO-LIST " := [ " PRIO-LIST " ] " PRIO + +.ti -8 +.IR ET-MAP " := [ " ET-MAP " ] " ET-MAPPING + +.ti -8 +.IR ET-MAPPING " := " ET\fB:\fIPRIO\fR + +.ti -8 +.IR PORT-MAP " := [ " PORT-MAP " ] " PORT-MAPPING + +.ti -8 +.IR PORT-MAPPING " := " PORT\fB:\fIPRIO\fR + +.ti -8 +.IR DSCP-MAP " := [ " DSCP-MAP " ] " DSCP-MAPPING + +.ti -8 +.IR DSCP-MAPPING " := { " DSCP " | " \fBall " }" \fB:\fIPRIO\fR + +.ti -8 +.IR ET " := { " \fB0x600\fR " .. " \fB0xffff\fR " }" + +.ti -8 +.IR PORT " := { " \fB1\fR " .. " \fB65535\fR " }" + +.ti -8 +.IR DSCP " := { " \fB0\fR " .. " \fB63\fR " }" + +.ti -8 +.IR PRIO " := { " \fB0\fR " .. " \fB7\fR " }" + +.SH DESCRIPTION + +.B dcb app +is used to configure APP table, or application priority table in the DCB (Data +Center Bridging) subsystem. The APP table is used to assign priority to traffic +based on value in one of several headers: EtherType, L4 destination port, or +DSCP. It also allows configuration of port-default priority that is chosen if no +other prioritization rule applies. + +DCB APP entries are 3-tuples of selector, protocol ID, and priority. Selector is +an enumeration that picks one of the prioritization namespaces. Currently it +mostly corresponds to configurable parameters described below. Protocol ID is a +value in the selector namespace. E.g. for EtherType selector, protocol IDs are +the individual EtherTypes, for DSCP they are individual code points. The +priority is the priority that should be assigned to traffic that matches the +selector and protocol ID. + +The APP table is a set of DCB APP entries. The only requirement is that +duplicate entries are not added. Notably, it is valid to have conflicting +priority assignment for the same selector and protocol ID. For example, the set +of two APP entries (DSCP, 10, 1) and (DSCP, 10, 2), where packets with DSCP of +10 should get priority of both 1 and 2, form a well-defined APP table. The +.B dcb app +tool allows low-level management of the app table by adding and deleting +individual APP 3-tuples through +.B add +and +.B del +commands. On the other other hand, the command +.B replace +does what one would typically want in this situation--first adds the new +configuration, and then removes the obsolete one, so that only one +prioritization is in effect for a given selector and protocol ID. + +.SH COMMANDS + +.TP +.B show +Display all entries with a given selector. When no selector is given, shows all +APP table entries categorized per selector. + +.TP +.B flush +Remove all entries with a given selector. When no selector is given, removes all +APP table entries. + +.TP +.B add +.TQ +.B del +Add and, respectively, remove individual APP 3-tuples to and from the DCB APP +table. + +.TP +.B replace +Take the list of entries mentioned as parameter, and add those that are not +present in the APP table yet. Then remove those entries, whose selector and +protocol ID have been mentioned as parameter, but not with the exact same +priority. This has the effect of, for the given selector and protocol ID, +causing that the table only contains the priority (or priorities) given as +parameter. + +.SH PARAMETERS + +The following table shows parameters in a way that they would be used with +\fBadd\fR, \fBdel\fR and \fBreplace\fR commands. For \fBshow\fR and \fBflush\fR, +the parameter name is to be used as a simple keyword without further arguments. + +.TP +.B default-prio \fIPRIO-LIST +The priority to be used for traffic the priority of which is otherwise +unspecified. The argument is a list of individual priorities. Note that +.B default-prio +rules are configured as triplets (\fBEtherType\fR, \fB0\fR, \fIPRIO\fR). +.B dcb app +translates these rules to the symbolic name +.B default-prio +and back. + +.TP +.B ethtype-prio \fIET-MAP +\fIET-MAP\fR uses the array parameter syntax, see +.BR dcb (8) +for details. Keys are EtherType values. Values are priorities to be assigned to +traffic with the matching EtherType. + +.TP +.B stream-port-prio \fIPORT-MAP +.TQ +.B dgram-port-prio \fIPORT-MAP +.TQ +.B port-prio \fIPORT-MAP +\fIPORT-MAP\fR uses the array parameter syntax, see +.BR dcb (8) +for details. Keys are L4 destination port numbers that match on, respectively, +TCP and SCTP traffic, UDP and DCCP traffic, and either of those. Values are +priorities that should be assigned to matching traffic. + +.TP +.B dscp-prio \fIDSCP-MAP +\fIDSCP-MAP\fR uses the array parameter syntax, see +.BR dcb (8) +for details. Keys are DSCP points, values are priorities assigned to +traffic with matching DSCP. DSCP points can be written either direcly as +numeric values, or using symbolic names specified in +.B /etc/iproute2/rt_dsfield +(however note that that file specifies full 8-bit dsfield values, whereas +.B dcb app +will only use the higher six bits). +.B dcb app show +will similarly format DSCP values as symbolic names if possible. The +command line option +.B -N +turns the show translation off. + +.SH EXAMPLE & USAGE + +Prioritize traffic with DSCP 0 to priority 0, 24 to 3 and 48 to 6: + +.P +# dcb app add dev eth0 dscp-prio 0:0 24:3 48:6 + +Add another rule to configure DSCP 24 to priority 2 and show the result: + +.P +# dcb app add dev eth0 dscp-prio 24:2 +.br +# dcb app show dev eth0 dscp-prio +.br +dscp-prio 0:0 CS3:2 CS3:3 CS6:6 +.br +# dcb -N app show dev eth0 dscp-prio +.br +dscp-prio 0:0 24:2 24:3 48:6 + +Reconfigure the table so that the only rule for DSCP 24 is for assignment of +priority 4: + +.P +# dcb app replace dev eth0 dscp-prio 24:4 +.br +# dcb app show dev eth0 dscp-prio +.br +dscp-prio 0:0 24:4 48:6 + +Flush all DSCP rules: + +.P +# dcb app flush dev eth0 dscp-prio +.br +# dcb app show dev eth0 dscp-prio +.br +(nothing) + +.SH EXIT STATUS +Exit status is 0 if command was successful or a positive integer upon failure. + +.SH SEE ALSO +.BR dcb (8) + +.SH REPORTING BUGS +Report any bugs to the Network Developers mailing list +.B +where the development and maintenance is primarily done. +You do not have to be subscribed to the list to send a message there. + +.SH AUTHOR +Petr Machata diff --git a/man/man8/dcb-buffer.8 b/man/man8/dcb-buffer.8 new file mode 100644 index 0000000..c7ba6a9 --- /dev/null +++ b/man/man8/dcb-buffer.8 @@ -0,0 +1,126 @@ +.TH DCB-BUFFER 8 "12 November 2020" "iproute2" "Linux" +.SH NAME +dcb-buffer \- show / manipulate port buffer settings of +the DCB (Data Center Bridging) subsystem +.SH SYNOPSIS +.sp +.ad l +.in +8 + +.ti -8 +.B dcb +.RI "[ " OPTIONS " ] " +.B buffer +.RI "{ " COMMAND " | " help " }" +.sp + +.ti -8 +.B dcb buffer show dev +.RI DEV +.RB "[ " prio-buffer " ]" +.RB "[ " buffer-size " ]" +.RB "[ " total-size " ]" + +.ti -8 +.B dcb buffer set dev +.RI DEV +.RB "[ " prio-buffer " " \fIPRIO-MAP " ]" +.RB "[ " buffer-size " " \fISIZE-MAP " ]" + +.ti -8 +.IR PRIO-MAP " := [ " PRIO-MAP " ] " PRIO-MAPPING + +.ti -8 +.IR PRIO-MAPPING " := { " PRIO " | " \fBall " }" \fB:\fIBUFFER\fR + +.ti -8 +.IR SIZE-MAP " := [ " SIZE-MAP " ] " SIZE-MAPPING + +.ti -8 +.IR SIZE-MAPPING " := { " BUFFER " | " \fBall " }" \fB:\fISIZE\fR + +.ti -8 +.IR PRIO " := { " \fB0\fR " .. " \fB7\fR " }" + +.ti -8 +.IR BUFFER " := { " \fB0\fR " .. " \fB7\fR " }" + +.ti -8 +.IR SIZE " := { " INTEGER " | " INTEGER\fBK\fR " | " INTEGER\fBM\fR " | " ... " }" + +.SH DESCRIPTION + +.B dcb buffer +is used to configure assignment of traffic to port buffers based on traffic +priority, and sizes of those buffers. It can be also used to inspect the current +configuration, as well as total device memory that the port buffers take. + +.SH PARAMETERS + +For read-write parameters, the following describes only the write direction, +i.e. as used with the \fBset\fR command. For the \fBshow\fR command, the +parameter name is to be used as a simple keyword without further arguments. This +instructs the tool to show the value of a given parameter. When no parameters +are given, the tool shows the complete buffer configuration. + +.TP +.B total-size +A read-only property that shows the total device memory taken up by port +buffers. This might be more than a simple sum of individual buffer sizes if +there are any hidden or internal buffers. + +.TP +.B prio-buffer \fIPRIO-MAP +\fIPRIO-MAP\fR uses the array parameter syntax, see +.BR dcb (8) +for details. Keys are priorities, values are buffer indices. For each priority +sets a buffer where traffic with that priority is directed to. + +.TP +.B buffer-size \fISIZE-MAP +\fISIZE-MAP\fR uses the array parameter syntax, see +.BR dcb (8) +for details. Keys are buffer indices, values are sizes of that buffer in bytes. +The sizes can use the notation documented in section PARAMETERS at +.BR tc (8). +Note that the size requested by the tool can be rounded or capped by the driver +to satisfy the requirements of the device. + +.SH EXAMPLE & USAGE + +Configure the priomap in a one-to-one fashion: + +.P +# dcb buffer set dev eth0 prio-buffer 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + +Set sizes of all buffers to 10KB, except for buffer 6, which will have the size +1MB: + +.P +# dcb buffer set dev eth0 buffer-size all:10K 6:1M + +Show what was set: + +.P +# dcb buffer show dev eth0 +.br +prio-buffer 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 +.br +buffer-size 0:10Kb 1:10Kb 2:10Kb 3:10Kb 4:10Kb 5:10Kb 6:1Mb 7:10Kb +.br +total-size 1222Kb + +.SH EXIT STATUS +Exit status is 0 if command was successful or a positive integer upon failure. + +.SH SEE ALSO +.BR dcb (8) + +.SH REPORTING BUGS +Report any bugs to the Network Developers mailing list +.B +where the development and maintenance is primarily done. +You do not have to be subscribed to the list to send a message there. + +.SH AUTHOR +Petr Machata diff --git a/man/man8/dcb-dcbx.8 b/man/man8/dcb-dcbx.8 new file mode 100644 index 0000000..52133e3 --- /dev/null +++ b/man/man8/dcb-dcbx.8 @@ -0,0 +1,108 @@ +.TH DCB-DCBX 8 "13 December 2020" "iproute2" "Linux" +.SH NAME +dcb-dcbx \- show / manipulate port DCBX (Data Center Bridging eXchange) +.SH SYNOPSIS +.sp +.ad l +.in +8 + +.ti -8 +.B dcb +.RI "[ " OPTIONS " ] " +.B dcbx +.RI "{ " COMMAND " | " help " }" +.sp + +.ti -8 +.B dcb dcbx show dev +.RI DEV + +.ti -8 +.B dcb dcbx set dev +.RI DEV +.RB "[ " host " ]" +.RB "[ " lld-managed " ]" +.RB "[ " cee " ]" +.RB "[ " ieee " ]" +.RB "[ " static " ]" + +.SH DESCRIPTION + +Data Center Bridging eXchange (DCBX) is a protocol used by DCB devices to +exchange configuration information with directly connected peers. The Linux DCBX +object is a 1-byte bitfield of flags that configure whether DCBX is implemented +in the device or in the host, and which version of the protocol should be used. +.B dcb dcbx +is used to access the per-port Linux DCBX object. + +There are two principal modes of operation: in +.B host +mode, DCBX protocol is implemented by the host LLDP agent, and the DCB +interfaces are used to propagate the negotiate parameters to capable devices. In +.B lld-managed +mode, the configuration is handled by the device, and DCB interfaces are used +for inspection of negotiated parameters, and can also be used to set initial +parameters. + +.SH PARAMETERS + +When used with +.B dcb dcbx set, +the following keywords enable the corresponding configuration. The keywords that +are not mentioned on the command line are considered disabled. When used with +.B show, +each enabled feature is shown by its corresponding keyword. + +.TP +.B host +.TQ +.B lld-managed +The device is in the host mode of operation and, respectively, the lld-managed +mode of operation, as described above. In principle these two keywords are +mutually exclusive, but +.B dcb dcbx +allows setting both and lets the driver handle it as appropriate. + +.TP +.B cee +.TQ +.B ieee +The device supports CEE (Converged Enhanced Ethernet) and, respecively, IEEE +version of the DCB specification. Typically only one of these will be set, but +.B dcb dcbx +does not mandate this. + +.TP +.B static +indicates the engine supports static configuration. No actual negotiation is +performed, negotiated parameters are always the initial configuration. + +.SH EXAMPLE & USAGE + +Put the DCB engine into the "host" mode of operation, and use IEEE-standardized +DCB interfaces: + +.P +# dcb dcbx set dev eth0 host ieee + +Show what was set: + +.P +# dcb dcbx show dev eth0 +.br +host ieee + +.SH EXIT STATUS +Exit status is 0 if command was successful or a positive integer upon failure. + +.SH SEE ALSO +.BR dcb (8) + +.SH REPORTING BUGS +Report any bugs to the Network Developers mailing list +.B +where the development and maintenance is primarily done. +You do not have to be subscribed to the list to send a message there. + +.SH AUTHOR +Petr Machata diff --git a/man/man8/dcb-ets.8 b/man/man8/dcb-ets.8 new file mode 100644 index 0000000..9c64b33 --- /dev/null +++ b/man/man8/dcb-ets.8 @@ -0,0 +1,194 @@ +.TH DCB-ETS 8 "19 October 2020" "iproute2" "Linux" +.SH NAME +dcb-ets \- show / manipulate ETS (Enhanced Transmission Selection) settings of +the DCB (Data Center Bridging) subsystem +.SH SYNOPSIS +.sp +.ad l +.in +8 + +.ti -8 +.B dcb +.RI "[ " OPTIONS " ] " +.B ets +.RI "{ " COMMAND " | " help " }" +.sp + +.ti -8 +.B dcb ets show dev +.RI DEV +.RB "[ " willing " ]" +.RB "[ " ets-cap " ]" +.RB "[ " cbs " ]" +.RB "[ " tc-tsa " ]" +.RB "[ " reco-tc-tsa " ]" +.RB "[ " pg-bw " ]" +.RB "[ " tc-bw " ]" +.RB "[ " reco-tc-bw " ]" +.RB "[ " prio-tc " ]" +.RB "[ " reco-prio-tc " ]" + +.ti -8 +.B dcb ets set dev +.RI DEV +.RB "[ " willing " { " on " | " off " } ]" +.RB "[ { " tc-tsa " | " reco-tc-tsa " } " \fITSA-MAP\fB " ]" +.RB "[ { " pg-bw " | " tc-bw " | " reco-tc-bw " } " \fIBW-MAP\fB " ]" +.RB "[ { " prio-tc " | " reco-prio-tc " } " \fIPRIO-MAP\fB " ]" + +.ti -8 +.IR TSA-MAP " := [ " TSA-MAP " ] " TSA-MAPPING + +.ti -8 +.IR TSA-MAPPING " := { " TC " | " \fBall " }" \fB: "{ " \fBstrict\fR " | " +.IR \fBcbs\fR " | " \fBets\fR " | " \fBvendor\fR " }" + +.ti -8 +.IR BW-MAP " := [ " BW-MAP " ] " BW-MAPPING + +.ti -8 +.IR BW-MAPPING " := { " TC " | " \fBall " }" \fB:\fIINTEGER\fR + +.ti -8 +.IR PRIO-MAP " := [ " PRIO-MAP " ] " PRIO-MAPPING + +.ti -8 +.IR PRIO-MAPPING " := { " PRIO " | " \fBall " }" \fB:\fITC\fR + +.ti -8 +.IR TC " := { " \fB0\fR " .. " \fB7\fR " }" + +.ti -8 +.IR PRIO " := { " \fB0\fR " .. " \fB7\fR " }" + +.SH DESCRIPTION + +.B dcb ets +is used to configure Enhanced Transmission Selection attributes through Linux +DCB (Data Center Bridging) interface. ETS permits configuration of mapping of +priorities to traffic classes, traffic selection algorithm to use per traffic +class, bandwidth allocation, etc. + +Two DCB TLVs are related to the ETS feature: a configuration and recommendation +values. Recommendation values are named with a prefix +.B reco-, +while the configuration ones have plain names. + +.SH PARAMETERS + +For read-write parameters, the following describes only the write direction, +i.e. as used with the \fBset\fR command. For the \fBshow\fR command, the +parameter name is to be used as a simple keyword without further arguments. This +instructs the tool to show the value of a given parameter. When no parameters +are given, the tool shows the complete ETS configuration. + +.TP +.B ets-cap +A read-only property that shows the number of supported ETS traffic classes. + +.TP +.B cbs +A read-only property that is enabled if the driver and the hardware support the +CBS Transmission Selection Algorithm. + +.TP +.B willing \fR{ \fBon\fR | \fBoff\fR } +Whether local host should accept configuration from peer TLVs. + +.TP +.B prio-tc \fIPRIO-MAP +.TQ +.B reco-prio-tc \fIPRIO-MAP +\fIPRIO-MAP\fR uses the array parameter syntax, see +.BR dcb (8) +for details. Keys are priorities, values are traffic classes. For each priority +sets a TC where traffic with that priority is directed to. + +.TP +.B tc-tsa \fITSA-MAP +.TQ +.B reco-tc-tsa \fITSA-MAP +\fITSA-MAP\fR uses the array parameter syntax, see +.BR dcb (8) +for details. Keys are TCs, values are Transmission Selection Algorithm (TSA) +keywords described below. For each TC sets an algorithm used for deciding how +traffic queued up at this TC is scheduled for transmission. Supported TSAs are: + +.B strict +- for strict priority, where traffic in higher-numbered TCs always takes +precedence over traffic in lower-numbered TCs. +.br +.B ets +- for Enhanced Traffic Selection, where available bandwidth is distributed among +the ETS-enabled TCs according to the weights set by +.B tc-bw +and +.B reco-tc-bw\fR, +respectively. +.br +.B cbs +- for Credit Based Shaper, where traffic is scheduled in a strict manner up to +the limit set by a shaper. +.br +.B vendor +- for vendor-specific traffic selection algorithm. + +.TP +.B tc-bw \fIBW-MAP +.TQ +.B reco-tc-bw \fIBW-MAP +\fIBW-MAP\fR uses the array parameter syntax, see +.BR dcb (8) +for details. Keys are TCs, values are integers representing percent of available +bandwidth given to the traffic class in question. The value should be 0 for TCs +whose TSA is not \fBets\fR, and the sum of all values shall be 100. As an +exception to the standard wording, a configuration with no \fBets\fR TCs is +permitted to sum up to 0 instead. +.br + +.TP +.B pg-bw \fIBW-MAP +The precise meaning of \fBpg-bw\fR is not standardized, but the assumption seems +to be that the same scheduling process as on the transmit side is applicable on +receive side as well, and configures receive bandwidth allocation for \fBets\fR +ingress traffic classes (priority groups). + +.SH EXAMPLE & USAGE + +Configure ETS priomap in a one-to-one fashion: + +.P +# dcb ets set dev eth0 prio-tc 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + +Set TSA and transmit bandwidth configuration: + +.P +# dcb ets set dev eth0 tc-tsa all:strict 0:ets 1:ets 2:ets \\ +.br + tc-bw all:0 0:33 1:33 2:34 + +Show what was set: + +.P +# dcb ets show dev eth0 prio-tc tc-tsa tc-bw +.br +prio-tc 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 +.br +tc-tsa 0:ets 1:ets 2:ets 3:strict 4:strict 5:strict 6:strict 7:strict +.br +tc-bw 0:33 1:33 2:34 3:0 4:0 5:0 6:0 7:0 + +.SH EXIT STATUS +Exit status is 0 if command was successful or a positive integer upon failure. + +.SH SEE ALSO +.BR dcb (8) + +.SH REPORTING BUGS +Report any bugs to the Network Developers mailing list +.B +where the development and maintenance is primarily done. +You do not have to be subscribed to the list to send a message there. + +.SH AUTHOR +Petr Machata diff --git a/man/man8/dcb-maxrate.8 b/man/man8/dcb-maxrate.8 new file mode 100644 index 0000000..d03c215 --- /dev/null +++ b/man/man8/dcb-maxrate.8 @@ -0,0 +1,94 @@ +.TH DCB-MAXRATE 8 "22 November 2020" "iproute2" "Linux" +.SH NAME +dcb-maxrate \- show / manipulate port maxrate settings of +the DCB (Data Center Bridging) subsystem +.SH SYNOPSIS +.sp +.ad l +.in +8 + +.ti -8 +.B dcb +.RI "[ " OPTIONS " ] " +.B maxrate +.RI "{ " COMMAND " | " help " }" +.sp + +.ti -8 +.B dcb maxrate show dev +.RI DEV +.RB "[ " tc-maxrate " ]" + +.ti -8 +.B dcb maxrate set dev +.RI DEV +.RB "[ " tc-maxrate " " \fIRATE-MAP " ]" + +.ti -8 +.IR RATE-MAP " := [ " RATE-MAP " ] " RATE-MAPPING + +.ti -8 +.IR RATE-MAPPING " := { " TC " | " \fBall " }" \fB:\fIRATE\fR + +.ti -8 +.IR TC " := { " \fB0\fR " .. " \fB7\fR " }" + +.ti -8 +.IR RATE " := { " INTEGER "[" \fBbit\fR "] | " INTEGER\fBKbit\fR " | " +.IR INTEGER\fBMib\fR " | " ... " }" + +.SH DESCRIPTION + +.B dcb maxrate +is used to configure and inspect maximum rate at which traffic is allowed to +egress from a given traffic class. + +.SH PARAMETERS + +The following describes only the write direction, i.e. as used with the +\fBset\fR command. For the \fBshow\fR command, the parameter name is to be used +as a simple keyword without further arguments. This instructs the tool to show +the value of a given parameter. When no parameters are given, the tool shows the +complete maxrate configuration. + +.TP +.B tc-maxrate \fIRATE-MAP +\fIRATE-MAP\fR uses the array parameter syntax, see +.BR dcb (8) +for details. Keys are TC indices, values are traffic rates in bits per second. +The rates can use the notation documented in section PARAMETERS at +.BR tc (8). +Note that under that notation, "bit" stands for bits per second whereas "b" +stands for bytes per second. When showing, the command line option +.B -i +toggles between using decadic and ISO/IEC prefixes. + +.SH EXAMPLE & USAGE + +Set rates of all traffic classes to 25Gbps, except for TC 6, which will +have the rate of 100Gbps: + +.P +# dcb maxrate set dev eth0 tc-maxrate all:25Gbit 6:100Gbit + +Show what was set: + +.P +# dcb maxrate show dev eth0 +.br +tc-maxrate 0:25Gbit 1:25Gbit 2:25Gbit 3:25Gbit 4:25Gbit 5:25Gbit 6:100Gbit 7:25Gbit + +.SH EXIT STATUS +Exit status is 0 if command was successful or a positive integer upon failure. + +.SH SEE ALSO +.BR dcb (8) + +.SH REPORTING BUGS +Report any bugs to the Network Developers mailing list +.B +where the development and maintenance is primarily done. +You do not have to be subscribed to the list to send a message there. + +.SH AUTHOR +Petr Machata diff --git a/man/man8/dcb-pfc.8 b/man/man8/dcb-pfc.8 new file mode 100644 index 0000000..735c16e --- /dev/null +++ b/man/man8/dcb-pfc.8 @@ -0,0 +1,127 @@ +.TH DCB-PFC 8 "31 October 2020" "iproute2" "Linux" +.SH NAME +dcb-pfc \- show / manipulate PFC (Priority-based Flow Control) settings of +the DCB (Data Center Bridging) subsystem +.SH SYNOPSIS +.sp +.ad l +.in +8 + +.ti -8 +.B dcb +.RI "[ " OPTIONS " ] " +.B pfc +.RI "{ " COMMAND " | " help " }" +.sp + +.ti -8 +.B dcb pfc show dev +.RI DEV +.RB "[ " pfc-cap " ]" +.RB "[ " prio-pfc " ]" +.RB "[ " macsec-bypass " ]" +.RB "[ " delay " ]" +.RB "[ " requests " ]" +.RB "[ " indications " ]" + +.ti -8 +.B dcb pfc set dev +.RI DEV +.RB "[ " prio-pfc " " \fIPFC-MAP " ]" +.RB "[ " macsec-bypass " { " on " | " off " } ]" +.RB "[ " delay " " \fIINTEGER\fR " ]" + +.ti -8 +.IR PFC-MAP " := [ " PFC-MAP " ] " PFC-MAPPING + +.ti -8 +.IR PFC-MAPPING " := { " PRIO " | " \fBall " }" \fB:\fR "{ " +.IR \fBon\fR " | " \fBoff\fR " }" + +.ti -8 +.IR PRIO " := { " \fB0\fR " .. " \fB7\fR " }" + +.SH DESCRIPTION + +.B dcb pfc +is used to configure Priority-based Flow Control attributes through Linux +DCB (Data Center Bridging) interface. PFC permits marking flows with a +certain priority as lossless, and holds related configuration, as well as +PFC counters. + +.SH PARAMETERS + +For read-write parameters, the following describes only the write direction, +i.e. as used with the \fBset\fR command. For the \fBshow\fR command, the +parameter name is to be used as a simple keyword without further arguments. This +instructs the tool to show the value of a given parameter. When no parameters +are given, the tool shows the complete PFC configuration. + +.TP +.B pfc-cap +A read-only property that shows the number of traffic classes that may +simultaneously support PFC. + +.TP +.B requests +A read-only count of the sent PFC frames per traffic class. Only shown when +-s is given, or when requested explicitly. + +.TP +.B indications +A read-only count of the received PFC frames per traffic class. Only shown +when -s is given, or when requested explicitly. + +.TP +.B macsec-bypass \fR{ \fBon\fR | \fBoff\fR } +Whether the sending station is capable of bypassing MACsec processing when +MACsec is disabled. + +.TP +.B prio-pfc \fIPFC-MAP +\fIPFC-MAP\fR uses the array parameter syntax, see +.BR dcb (8) +for details. Keys are priorities, values are on / off indicators of whether +PFC is enabled for a given priority. + +.TP +.B delay \fIINTEGER +The allowance made for round-trip propagation delay of the link in bits. +The value shall be 0..65535. + +.SH EXAMPLE & USAGE + +Enable PFC on priorities 6 and 7, leaving the rest intact: + +.P +# dcb pfc set dev eth0 prio-pfc 6:on 7:on + +Disable PFC of all priorities except 6 and 7, and configure delay to 4096 +bits: + +.P +# dcb pfc set dev eth0 prio-pfc all:off 6:on 7:on delay 0x1000 + +Show what was set: + +.P +# dcb pfc show dev eth0 +.br +pfc-cap 8 macsec-bypass off delay 4096 +.br +prio-pfc 0:off 1:off 2:off 3:off 4:off 5:off 6:on 7:on + +.SH EXIT STATUS +Exit status is 0 if command was successful or a positive integer upon failure. + +.SH SEE ALSO +.BR dcb (8) + +.SH REPORTING BUGS +Report any bugs to the Network Developers mailing list +.B +where the development and maintenance is primarily done. +You do not have to be subscribed to the list to send a message there. + +.SH AUTHOR +Petr Machata diff --git a/man/man8/dcb.8 b/man/man8/dcb.8 new file mode 100644 index 0000000..24944b7 --- /dev/null +++ b/man/man8/dcb.8 @@ -0,0 +1,156 @@ +.TH DCB 8 "19 October 2020" "iproute2" "Linux" +.SH NAME +dcb \- show / manipulate DCB (Data Center Bridging) settings +.SH SYNOPSIS +.sp +.ad l +.in +8 + +.ti -8 +.B dcb +.RI "[ " OPTIONS " ] " +.RB "{ " app " | " buffer " | " ets " | " maxrate " | " pfc " }" +.RI "{ " COMMAND " | " help " }" +.sp + +.ti -8 +.B dcb +.RB "[ " -force " ] " +.BI "-batch " filename +.sp + +.ti -8 +.B dcb +.RI "[ " OPTIONS " ] " +.B help +.sp + +.SH OPTIONS + +.TP +.BR "\-n" , " \--netns " +switches +.B dcb +to the specified network namespace +.IR NETNS . + +.TP +.BR "\-V" , " --Version" +Print the version of the +.B dcb +utility and exit. + +.TP +.BR "\-b", " --batch " +Read commands from provided file or standard input and invoke them. First +failure will cause termination of dcb. + +.TP +.BR "\-f", " --force" +Don't terminate dcb on errors in batch mode. If there were any errors during +execution of the commands, the application return code will be non zero. + +.TP +.BR "\-i" , " --iec" +When showing rates, use ISO/IEC 1024-based prefixes (Ki, Mi, Bi) instead of +the 1000-based ones (K, M, B). + +.TP +.BR "\-j" , " --json" +Generate JSON output. + +.TP +.BR "\-N" , " --Numeric" +If the subtool in question translates numbers to symbolic names in some way, +suppress this translation. + +.TP +.BR "\-p" , " --pretty" +When combined with -j generate a pretty JSON output. + +.TP +.BR "\-s" , " --statistics" +If the object in question contains any statistical counters, shown them as +part of the "show" output. + +.SH OBJECTS + +.TP +.B app +- Configuration of application priority table + +.TP +.B buffer +- Configuration of port buffers + +.TP +.B ets +- Configuration of ETS (Enhanced Transmission Selection) + +.TP +.B maxrate +- Configuration of per-TC maximum transmit rate + +.TP +.B pfc +- Configuration of PFC (Priority-based Flow Control) + +.SH COMMANDS + +A \fICOMMAND\fR specifies the action to perform on the object. The set of +possible actions depends on the object type. As a rule, it is possible to +.B show +objects and to invoke topical +.B help, +which prints a list of available commands and argument syntax conventions. + +.SH ARRAY PARAMETERS + +Like commands, specification of parameters is in the domain of individual +objects (and their commands) as well. However, much of the DCB interface +revolves around arrays of fixed size that specify one value per some key, such +as per traffic class or per priority. There is therefore a single syntax for +adjusting elements of these arrays. It consists of a series of +\fIKEY\fB:\fIVALUE\fR pairs, where the meaning of the individual keys and values +depends on the parameter. + +The elements are evaluated in order from left to right, and the latter ones +override the earlier ones. The elements that are not specified on the command +line are queried from the kernel and their current value is retained. + +As an example, take a made-up parameter tc-juju, which can be set to charm +traffic in a given TC with either good luck or bad luck. \fIKEY\fR can therefore +be 0..7 (as is usual for TC numbers in DCB), and \fIVALUE\fR either of +\fBnone\fR, \fBgood\fR, and \fBbad\fR. An example of changing a juju value of +TCs 0 and 7, while leaving all other intact, would then be: + +.P +# dcb foo set dev eth0 tc-juju 0:good 7:bad + +A special key, \fBall\fR, is recognized which sets the same value to all array +elements. This can be combined with the usual single-element syntax. E.g. in the +following, the juju of all keys is set to \fBnone\fR, except 0 and 7, which have +other values: + +.P +# dcb foo set dev eth0 tc-juju all:none 0:good 7:bad + +.SH EXIT STATUS +Exit status is 0 if command was successful or a positive integer upon failure. + +.SH SEE ALSO +.BR dcb-app (8), +.BR dcb-buffer (8), +.BR dcb-ets (8), +.BR dcb-maxrate (8), +.BR dcb-pfc (8) +.br + +.SH REPORTING BUGS +Report any bugs to the Network Developers mailing list +.B +where the development and maintenance is primarily done. +You do not have to be subscribed to the list to send a message there. + +.SH AUTHOR +Petr Machata diff --git a/man/man8/devlink-dev.8 b/man/man8/devlink-dev.8 index 279100c..22735dc 100644 --- a/man/man8/devlink-dev.8 +++ b/man/man8/devlink-dev.8 @@ -63,6 +63,10 @@ devlink-dev \- devlink device configuration [ .B netns .RI "{ " PID " | " NAME " | " ID " }" +] [ +.BR action " { " driver_reinit " | " fw_activate " }" +] [ +.B limit no_reset ] .ti -8 @@ -173,6 +177,36 @@ If this argument is omitted all parameters supported by devlink devices are list .RI { " PID " | " NAME " | " ID " } - Specifies the network namespace to reload into, either by pid, name or id. +.BR action " { " driver_reinit " | " fw_activate " }" +- Specifies the reload action required. +If this argument is omitted +.I driver_reinit +action will be used. +Note that even though user asks for a specific action, the driver implementation +might require to perform another action alongside with it. For example, some +driver do not support driver reinitialization being performed without fw +activation. Therefore, the devlink reload command returns the list of actions +which were actrually performed. + +.I driver_reinit +- Driver entities re-initialization, applying devlink-param and +devlink-resource values. + +.I fw_activate +- Activates new firmware if such image is stored and pending activation. If no +limitation specified this action may involve firmware reset. If no new image +pending this action will reload current firmware image. + +.B limit no_reset +- Specifies limitation on reload action. +If this argument is omitted limit is unspecificed and the reload action is not +limited. In such case driver implementation may include reset or downtime as +needed to perform the actions. + +.I no_reset +- No reset allowed, no down time allowed, no link flap and no configuration is +lost. + .SS devlink dev info - display device information. Display device information provided by the driver. This command can be used to query versions of the hardware components or device components which diff --git a/man/man8/devlink-health.8 b/man/man8/devlink-health.8 index 47b9613..975b8c7 100644 --- a/man/man8/devlink-health.8 +++ b/man/man8/devlink-health.8 @@ -42,6 +42,12 @@ devlink-health \- devlink health reporting and recovery .RI "" REPORTER "" .ti -8 +.BR "devlink health test" +.RI "{ " DEV " | " DEV/PORT_INDEX " }" +.B reporter +.RI "" REPORTER "" + +.ti -8 .B devlink health dump clear .RI "{ " DEV " | " DEV/PORT_INDEX " }" .B reporter @@ -105,6 +111,16 @@ This action performs a recovery and increases the recoveries counter on success. .I "REPORTER" - specifies the reporter's name registered on specified devlink device or port. +.SS devlink health test - Trigger a test event on a reporter. + +.PP +.I "DEV" +- specifies the devlink device. + +.PP +.I "REPORTER" +- specifies the reporter's name registered on the devlink device. + .SS devlink health dump show - Display the last saved dump. .PD 0 diff --git a/man/man8/devlink-port.8 b/man/man8/devlink-port.8 index 966faae..563c583 100644 --- a/man/man8/devlink-port.8 +++ b/man/man8/devlink-port.8 @@ -44,6 +44,50 @@ devlink-port \- devlink port configuration .RI "{ " show " | " recover " | " diagnose " | " dump " | " set " }" .ti -8 +.BI "devlink port add" +.RB "[" +.IR "DEV | DEV/PORT_INDEX" +.RB "] " +.RB "[ " flavour +.IR FLAVOUR " ]" +.RB "[ " pcipf +.IR PFNUMBER " ]" +.RB "{ " pcisf +.IR SFNUMBER " }" +.br + +.ti -8 +.B devlink port del +.IR DEV/PORT_INDEX + +.ti -8 +.BR "devlink port function set " +.IR DEV/PORT_INDEX +.RI "{ " +.BR "hw_addr " +.RI "ADDR }" +.RI "{ " +.BR "state" +.RI "STATE }" + +.ti -8 +.B devlink dev param set +.I DEV/PORT_INDEX +.B name +.I PARAMETER +.B value +.I VALUE +.BR cmode " { " runtime " | " driverinit " | " permanent " } " + +.ti -8 +.B devlink dev param show +[ +.I DEV/PORT_INDEX +.B name +.I PARAMETER +] + +.ti -8 .B devlink port help .SH "DESCRIPTION" @@ -99,6 +143,103 @@ If this argument is omitted all ports are listed. Is an alias for .BR devlink-health (8). +.ti -8 +.SS devlink port add - add a devlink port +.PP +.B "DEV" +- specifies the devlink device to operate on. or + +.PP +.B "DEV/PORT_INDEX" +- specifies the devlink port index to use for the requested new port. +This is optional. When ommited, driver allocates unique port index. + +.TP +.BR flavour " { " pcipf " | " pcisf " } " +set port flavour + +.I pcipf +- PCI PF port + +.I pcisf +- PCI SF port + +.TP +.BR pfnum " { " pfnumber " } " +Specifies PCI pfnumber to use on which a SF device to create + +.TP +.BR sfnum " { " sfnumber " } " +Specifies sfnumber to assign to the device of the SF. +This field is optional for those devices which supports auto assignment of the +SF number. + +.ti -8 +.SS devlink port function set - Set the port function attribute(s). + +.PP +.B "DEV/PORT_INDEX" +- specifies the devlink port to operate on. + +.TP +.BR hw_addr " ADDR" +- hardware address of the function to set. This is a Ethernet MAC address when +port type is Ethernet. + +.TP +.BR state " { " active " | " inactive " } " +- new state of the function to change to. + +.I active +- Once configuration of the function is done, activate the function. + +.I inactive +- To inactivate the function and its device(s), set to inactive. + +.ti -8 +.SS devlink port del - delete a devlink port +.PP +.B "DEV/PORT_INDEX" +- specifies the devlink port to delete. + +.ti -8 +.SS devlink port param set - set new value to devlink port configuration parameter +.PP +.B "DEV/PORT_INDEX" +- specifies the devlink port to operate on. + +.TP +.BI name " PARAMETER" +Specify parameter name to set. + +.TP +.BI value " VALUE" +New value to set. + +.TP +.BR cmode " { " runtime " | " driverinit " | " permanent " } " +Configuration mode in which the new value is set. + +.I runtime +- Set new value while driver is running. This configuration mode doesn't require any reset to apply the new value. + +.I driverinit +- Set new value which will be applied during driver initialization. This configuration mode requires restart driver by devlink reload command to apply the new value. + +.I permanent +- New value is written to device's non-volatile memory. This configuration mode requires hard reset to apply the new value. + +.SS devlink port param show - display devlink port supported configuration parameters attributes + +.PP +.B "DEV/PORT_INDEX" +- specifies the devlink port to operate on. + +.B name +.I PARAMETER +Specify parameter name to show. +If this argument, as well as port index, are omitted - all parameters supported by devlink device ports are listed. + .SH "EXAMPLES" .PP devlink port show @@ -135,6 +276,57 @@ devlink port health show pci/0000:01:00.0/1 reporter tx .RS 4 Shows status and configuration of tx reporter registered on pci/0000:01:00.0/1 devlink port. .RE +.PP +devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88 +.RS 4 +Add a devlink port of flavour PCI SF on PCI PF having number 0 with SF number 88. +To make use of the function an example sequence is to add a port, configure the +function attribute and activate the function. Once function usage is completed, +inactivate the function and finally delete the port. When there is desire to +reuse the port without deletion, it can be reconfigured and activated again when +function is in inactive state and function's operational state is detached. +.RE +.PP +devlink port del pci/0000:06:00.0/1 +.RS 4 +Delete previously created devlink port. It is recommended to first deactivate +the function if the function supports state management. +.RE +.PP +devlink port function set pci/0000:01:00.0/1 hw_addr 00:00:00:11:22:33 +.RS 4 +Configure hardware address of the PCI function represented by devlink port. +If the port supports change in function state, hardware address must be configured +before activating the function. +.RE +.PP +devlink port function set pci/0000:01:00.0/1 state active +.RS 4 +Activate the function. This will initiate the function enumeration and driver loading. +.RE +.PP +devlink port function set pci/0000:01:00.0/1 state inactive +.RS 4 +Deactivate the function. This will initiate the function teardown which results +in driver unload and device removal. +.RE +.PP +devlink port function set pci/0000:01:00.0/1 hw_addr 00:00:00:11:22:33 state active +.RS 4 +Configure hardware address and also active the function. When a function is +activated together with other configuration in a single command, all the +configuration is applied first before changing the state to active. +.RE +.PP +devlink dev param show +.RS 4 +Shows (dumps) all the port parameters across all the devices registered in the devlink. +.RE +.PP +devlink dev param set pci/0000:01:00.0/1 name internal_error_reset value true cmode runtime +.RS 4 +Sets the parameter internal_error_reset of specified devlink port (#1) to true. +.RE .SH SEE ALSO .BR devlink (8), diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index f451ecf..fd67e61 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -75,6 +75,9 @@ ip-link \- network device configuration .br .RB "[ " protodown " { " on " | " off " } ]" .br +.RB "[ " protodown_reason +.IR PREASON " { " on " | " off " } ]" +.br .RB "[ " trailers " { " on " | " off " } ]" .br .RB "[ " txqueuelen @@ -441,7 +444,7 @@ the following additional arguments are supported: - either 802.1Q or 802.1ad. .BI id " VLANID " -- specifies the VLAN Identifer to use. Note that numbers with a leading " 0 " or " 0x " are interpreted as octal or hexadeimal, respectively. +- specifies the VLAN Identifier to use. Note that numbers with a leading " 0 " or " 0x " are interpreted as octal or hexadecimal, respectively. .BR reorder_hdr " { " on " | " off " } " - specifies whether ethernet headers are reordered or not (default is @@ -572,7 +575,7 @@ the following additional arguments are supported: .in +8 .sp .BI id " VNI " -- specifies the VXLAN Network Identifer (or VXLAN Segment +- specifies the VXLAN Network Identifier (or VXLAN Segment Identifier) to use. .BI dev " PHYS_DEV" @@ -1237,7 +1240,7 @@ the following additional arguments are supported: .in +8 .sp .BI id " VNI " -- specifies the Virtual Network Identifer to use. +- specifies the Virtual Network Identifier to use. .sp .BI remote " IPADDR" @@ -1304,9 +1307,9 @@ For a link of type the following additional arguments are supported: .BI "ip link add " DEVICE -.BI type " bareudp " dstport " PORT " ethertype " ETHERTYPE" +.BI type " bareudp " dstport " PORT " ethertype " PROTO" [ -.BI srcportmin " SRCPORTMIN " +.BI srcportmin " PORT " ] [ .RB [ no ] multiproto ] @@ -1317,11 +1320,14 @@ the following additional arguments are supported: - specifies the destination port for the UDP tunnel. .sp -.BI ethertype " ETHERTYPE" +.BI ethertype " PROTO" - specifies the ethertype of the L3 protocol being tunnelled. +.B ethertype +can be given as plain Ethernet protocol number or using the protocol name +("ipv4", "ipv6", "mpls_uc", etc.). .sp -.BI srcportmin " SRCPORTMIN" +.BI srcportmin " PORT" - selects the lowest value of the UDP tunnel source port range. .sp @@ -1329,11 +1335,11 @@ the following additional arguments are supported: - activates support for protocols similar to the one .RB "specified by " ethertype . When -.I ETHERTYPE +.B ethertype is "mpls_uc" (that is, unicast MPLS), this allows the tunnel to also handle multicast MPLS. When -.I ETHERTYPE +.B ethertype is "ipv4", this allows the tunnel to also handle IPv6. This option is disabled by default. @@ -1349,6 +1355,7 @@ the following additional arguments are supported: .BR type " { " macvlan " | " macvtap " } " .BR mode " { " private " | " vepa " | " bridge " | " passthru .RB " [ " nopromisc " ] | " source " } " +.RB " [ " bcqueuelen " { " LENGTH " } ] " .in +8 .sp @@ -1392,6 +1399,18 @@ against source mac address from received frames on underlying interface. This allows creating mac based VLAN associations, instead of standard port or tag based. The feature is useful to deploy 802.1x mac based behavior, where drivers of underlying interfaces doesn't allows that. + +.BR bcqueuelen " { " LENGTH " } " +- Set the length of the RX queue used to process broadcast and multicast packets. +.BR LENGTH " must be a positive integer in the range [0-4294967295]." +Setting a length of 0 will effectively drop all broadcast/multicast traffic. +If not specified the macvlan driver default (1000) is used. +Note that all macvlans that share the same underlying device are using the same +.RB "queue. The parameter here is a " request ", the actual queue length used" +will be the maximum length that any macvlan interface has requested. +When listing device parameters both the bcqueuelen parameter +as well as the actual used bcqueuelen are listed to better help +the user understand the setting. .in -8 .TP @@ -1947,6 +1966,13 @@ on the port. Switch drivers can react to this error by doing a phys down on the switch port. .TP +.BR "protodown_reason PREASON on " or " off" +set +.B PROTODOWN +reasons on the device. protodown reason bit names can be enumerated under +/etc/iproute2/protodown_reasons.d/. possible reasons bits 0-31 + +.TP .BR "dynamic on " or " dynamic off" change the .B DYNAMIC @@ -2441,6 +2467,26 @@ Commands: .sp .in -8 +Update the broadcast/multicast queue length. + +.B "ip link set type { macvlan | macvap } " +[ +.BI bcqueuelen " LENGTH " +] + +.in +8 +.BI bcqueuelen " LENGTH " +- Set the length of the RX queue used to process broadcast and multicast packets. +.IR LENGTH " must be a positive integer in the range [0-4294967295]." +Setting a length of 0 will effectively drop all broadcast/multicast traffic. +If not specified the macvlan driver default (1000) is used. +Note that all macvlans that share the same underlying device are using the same +.RB "queue. The parameter here is a " request ", the actual queue length used" +will be the maximum length that any macvlan interface has requested. +When listing device parameters both the bcqueuelen parameter +as well as the actual used bcqueuelen are listed to better help +the user understand the setting. +.in -8 .SS ip link show - display device attributes @@ -2497,7 +2543,7 @@ specifies the device to display address-family statistics for. .PP .I "TYPE" -specifies which help of link type to dislpay. +specifies which help of link type to display. .SS .I GROUP diff --git a/man/man8/ip-macsec.8 b/man/man8/ip-macsec.8 index 8e9175c..6739e51 100644 --- a/man/man8/ip-macsec.8 +++ b/man/man8/ip-macsec.8 @@ -119,7 +119,7 @@ type. .SH NOTES This tool can be used to configure the 802.1AE keys of the interface. Note that 802.1AE uses GCM-AES with a initialization vector (IV) derived from the packet number. The same key must not be used -with the same IV more than once. Instead, keys must be frequently regenerated and distibuted. +with the same IV more than once. Instead, keys must be frequently regenerated and distributed. This tool is thus mostly for debugging and testing, or in combination with a user-space application that reconfigures the keys. It is wrong to just configure the keys statically and assume them to work indefinitely. The suggested and standardized way for key management is 802.1X-2010, which is implemented diff --git a/man/man8/ip-monitor.8 b/man/man8/ip-monitor.8 index 86f8f98..f886d31 100644 --- a/man/man8/ip-monitor.8 +++ b/man/man8/ip-monitor.8 @@ -55,7 +55,7 @@ command is the first in the command line and then the object list follows: is the list of object types that we want to monitor. It may contain .BR link ", " address ", " route ", " mroute ", " prefix ", " -.BR neigh ", " netconf ", " rule " and " nsid "." +.BR neigh ", " netconf ", " rule ", " nsid " and " nexthop "." If no .B file argument is given, diff --git a/man/man8/ip-neighbour.8 b/man/man8/ip-neighbour.8 index f71f18b..a27f9ef 100644 --- a/man/man8/ip-neighbour.8 +++ b/man/man8/ip-neighbour.8 @@ -85,11 +85,11 @@ the interface to which this neighbour is attached. .TP .BI proxy -indicates whether we are proxying for this neigbour entry +indicates whether we are proxying for this neighbour entry .TP .BI router -indicates whether neigbour is a router +indicates whether neighbour is a router .TP .BI extern_learn @@ -244,7 +244,7 @@ lookup a neighbour entry to a destination given a device .TP .BI proxy -indicates whether we should lookup a proxy neigbour entry +indicates whether we should lookup a proxy neighbour entry .TP .BI to " ADDRESS " (default) diff --git a/man/man8/ip-xfrm.8 b/man/man8/ip-xfrm.8 index 4fa3165..2669b38 100644 --- a/man/man8/ip-xfrm.8 +++ b/man/man8/ip-xfrm.8 @@ -60,7 +60,9 @@ ip-xfrm \- transform configuration .RB "[ " extra-flag .IR EXTRA-FLAG-LIST " ]" .RB "[ " output-mark -.IR OUTPUT-MARK " ]" +.IR OUTPUT-MARK +.RB "[ " mask +.IR MASK " ] ]" .RB "[ " if_id .IR IF-ID " ]" diff --git a/man/man8/ss.8 b/man/man8/ss.8 index 3b2559f..42aac6d 100644 --- a/man/man8/ss.8 +++ b/man/man8/ss.8 @@ -69,7 +69,7 @@ how long time the timer will expire .P .TP .B -how many times the retransmission occured +how many times the retransmission occurred .RE .TP .B \-e, \-\-extended @@ -379,6 +379,9 @@ Display vsock sockets (alias for -f vsock). .B \-\-xdp Display XDP sockets (alias for -f xdp). .TP +.B \-\-inet-sockopt +Display inet socket options. +.TP .B \-f FAMILY, \-\-family=FAMILY Display sockets of type FAMILY. Currently the following families are supported: unix, inet, inet6, link, netlink, vsock, xdp. @@ -437,6 +440,113 @@ states except for - opposite to .B bucket +.SH EXPRESSION + +.B EXPRESSION +allows filtering based on specific criteria. +.B EXPRESSION +consists of a series of predicates combined by boolean operators. The possible operators in increasing +order of precedence are +.B or +(or | or ||), +.B and +(or & or &&), and +.B not +(or !). If no operator is between consecutive predicates, an implicit +.B and +operator is assumed. Subexpressions can be grouped with "(" and ")". +.P +The following predicates are supported: + +.TP +.B {dst|src} [=] HOST +Test if the destination or source matches HOST. See HOST SYNTAX for details. +.TP +.B {dport|sport} [OP] [FAMILY:]:PORT +Compare the destination or source port to PORT. OP can be any of "<", "<=", "=", "!=", +">=" and ">". Following normal arithmetic rules. FAMILY and PORT are as described in +HOST SYNTAX below. +.TP +.B dev [=|!=] DEVICE +Match based on the device the connection uses. DEVICE can either be a device name or the +index of the interface. +.TP +.B fwmark [=|!=] MASK +Matches based on the fwmark value for the connection. This can either be a specific mark value +or a mark value followed by a "/" and a bitmask of which bits to use in the comparison. For example +"fwmark = 0x01/0x03" would match if the two least significant bits of the fwmark were 0x01. +.TP +.B cgroup [=|!=] PATH +Match if the connection is part of a cgroup at the given path. +.TP +.B autobound +Match if the port or path of the source address was automatically allocated +(rather than explicitly specified). +.P +Most operators have aliases. If no operator is supplied "=" is assumed. +Each of the following groups of operators are all equivalent: +.RS +.IP \(bu 2 += == eq +.IP \(bu +!= ne neq +.IP \(bu +> gt +.IP \(bu +< lt +.IP \(bu +>= ge geq +.IP \(bu +<= le leq +.IP \(bu +! not +.IP \(bu +| || or +.IP \(bu +& && and +.RE +.SH HOST SYNTAX +.P +The general host syntax is [FAMILY:]ADDRESS[:PORT]. +.P +FAMILY must be one of the families supported by the -f option. If not given +it defaults to the family given with the -f option, and if that is also +missing, will assume either inet or inet6. Note that all host conditions in the +expression should either all be the same family or be only inet and inet6. If there +is some other mixture of families, the results will probably be unexpected. +.P +The form of ADDRESS and PORT depends on the family used. "*" can be used as +a wildcard for either the address or port. The details for each family are as +follows: +.TP +.B unix +ADDRESS is a glob pattern (see +.BR fnmatch (3)) +that will be matched case-insensitively against the unix socket's address. Both path and abstract +names are supported. Unix addresses do not support a port, and "*" cannot be used as a wildcard. +.TP +.B link +ADDRESS is the case-insensitive name of an Ethernet protocol to match. PORT +is either a device name or a device index for the desired link device, as seen +in the output of ip link. +.TP +.B netlink +ADDRESS is a descriptor of the netlink family. Possible values come from +/etc/iproute2/nl_protos. PORT is the port id of the socket, which is usually +the same as the owning process id. The value "kernel" can be used to represent +the kernel (port id of 0). +.TP +.B vsock +ADDRESS is an integer representing the CID address, and PORT is the port. +.TP +.BR inet \ and\ inet6 +ADDRESS is an ip address (either v4 or v6 depending on the family) or a DNS +hostname that resolves to an ip address of the required version. An ipv6 +address must be enclosed in "[" and "]" to disambiguate the port separator. The +address may additionally have a prefix length given in CIDR notation (a slash +followed by the prefix length in bits). PORT is either the numerical +socket port, or the service name for the port to match. + .SH USAGE EXAMPLES .TP .B ss -t -a diff --git a/man/man8/tc-cake.8 b/man/man8/tc-cake.8 index 4112b75..cb67d15 100644 --- a/man/man8/tc-cake.8 +++ b/man/man8/tc-cake.8 @@ -413,9 +413,9 @@ suffered by Australasian residents. Equivalent to .SH FLOW ISOLATION PARAMETERS With flow isolation enabled, CAKE places packets from different flows into different queues, each of which carries its own AQM state. Packets from each -queue are then delivered fairly, according to a DRR++ algorithm which minimises +queue are then delivered fairly, according to a DRR++ algorithm which minimizes latency for "sparse" flows. CAKE uses a set-associative hashing algorithm to -minimise flow collisions. +minimize flow collisions. These keywords specify whether fairness based on source address, destination address, individual flows, or any combination of those is desired. diff --git a/man/man8/tc-ct.8 b/man/man8/tc-ct.8 index 45d2932..709e62a 100644 --- a/man/man8/tc-ct.8 +++ b/man/man8/tc-ct.8 @@ -66,7 +66,7 @@ Restore any previous configured nat. Remove any conntrack state and metadata (mark/label) from the packet (must only option specified). .TP .BI force -Forces conntrack direction for a previously commited connections, so that current direction will become the original direction (only valid with commit). +Forces conntrack direction for a previously committed connections, so that current direction will become the original direction (only valid with commit). .SH EXAMPLES Example showing natted firewall in conntrack zone 2, and conntrack mark usage: diff --git a/man/man8/tc-flower.8 b/man/man8/tc-flower.8 index b5bcfd1..f7336b6 100644 --- a/man/man8/tc-flower.8 +++ b/man/man8/tc-flower.8 @@ -1,11 +1,5 @@ .TH "Flower filter in tc" 8 "22 Oct 2015" "iproute2" "Linux" - "Usage: ct clear\n" - " ct commit [force] [zone ZONE] [mark MASKED_MARK] [label MASKED_LABEL] [nat NAT_SPEC] [OFFLOAD_POLICY]\n" - " ct [nat] [zone ZONE] [OFFLOAD_POLICY]\n" - "Where: ZONE is the conntrack zone table number\n" - " NAT_SPEC is {src|dst} addr addr1[-addr2] [port port1[-port2]]\n" - " OFFLOAD_POLICY is [policy_pkts PACKETS] [policy_timeout TIMEOUT]\n" .SH NAME flower \- flow based traffic control filter .SH SYNOPSIS @@ -306,7 +300,7 @@ If the prefix is missing, \fBtc\fR assumes a full-length host match. .TQ .IR \fBsrc_port " { " MASKED_NUMBER " | " " MIN_VALUE-MAX_VALUE " } Match on layer 4 protocol source or destination port number, with an -optional mask. Alternatively, the mininum and maximum values can be +optional mask. Alternatively, the minimum and maximum values can be specified to match on a range of layer 4 protocol source or destination port numbers. Only available for .BR ip_proto " values " udp ", " tcp " and " sctp @@ -384,7 +378,7 @@ Matches on connection tracking info .RS .TP .I CT_STATE -Match the connection state, and can ne combination of [{+|-}flag] flags, where flag can be one of +Match the connection state, and can be combination of [{+|-}flag] flags, where flag can be one of .RS .TP trk - Tracked connection. @@ -393,6 +387,10 @@ new - New connection. .TP est - Established connection. .TP +rpl - The packet is in the reply direction, meaning that it is in the opposite direction from the packet that initiated the connection. +.TP +inv - The state is invalid. The packet couldn't be associated to a connection. +.TP Example: +trk+est .RE .TP diff --git a/man/man8/tc-htb.8 b/man/man8/tc-htb.8 index a416234..031b73a 100644 --- a/man/man8/tc-htb.8 +++ b/man/man8/tc-htb.8 @@ -12,7 +12,7 @@ major: minor-id .B ] [ r2q divisor -.B ] +.B ] [ offload ] .B tc class ... dev dev @@ -104,6 +104,9 @@ Divisor used to calculate values for classes. Classes divide .B rate by this number. Default value is 10. +.TP +offload +Offload the HTB algorithm to hardware (requires driver and device support). .SH CLASSES Classes have a host of parameters to configure their operation. diff --git a/man/man8/tc-matchall.8 b/man/man8/tc-matchall.8 index e3cddb1..75c28c9 100644 --- a/man/man8/tc-matchall.8 +++ b/man/man8/tc-matchall.8 @@ -45,7 +45,7 @@ tc filter add dev eth1 parent ffff: \\ .EE .RE -The first command creats an ingress qdisc with handle +The first command creates an ingress qdisc with handle .BR ffff: on device .BR eth1 @@ -64,7 +64,7 @@ tc filter add dev eth1 parent 1: \\ .EE .RE -The first command creats an egress qdisc with handle +The first command creates an egress qdisc with handle .BR 1: that replaces the root qdisc on device .BR eth1 diff --git a/man/man8/tc-mpls.8 b/man/man8/tc-mpls.8 index 84ef2ef..7f8be22 100644 --- a/man/man8/tc-mpls.8 +++ b/man/man8/tc-mpls.8 @@ -17,7 +17,7 @@ mpls - mpls manipulation module .ti -8 .IR PUSH " := " -.BR push " [ " protocol +.RB "{ " push " | " mac_push " } [ " protocol .IR MPLS_PROTO " ]" .RB " [ " tc .IR MPLS_TC " ] " @@ -64,7 +64,14 @@ requires no arguments and simply subtracts 1 from the MPLS header TTL field. Decapsulation mode. Requires the protocol of the next header. .TP .B push -Encapsulation mode. Requires at least the +Encapsulation mode. Adds the MPLS header between the MAC and the network +headers. Requires at least the +.B label +option. +.TP +.B mac_push +Encapsulation mode. Adds the MPLS header before the MAC header. Requires at +least the .B label option. .TP @@ -140,17 +147,48 @@ a label 123 and sends them out eth1: .EE .RE -In this example, incoming MPLS unicast packets on eth0 are decapsulated and to -ip packets and output to eth1: +In this example, incoming MPLS unicast packets on eth0 are decapsulated +and redirected to eth1: .RS .EX #tc qdisc add dev eth0 handle ffff: ingress #tc filter add dev eth0 protocol mpls_uc parent ffff: flower \\ action mpls pop protocol ipv4 \\ - action mirred egress redirect dev eth0 + action mirred egress redirect dev eth1 +.EE +.RE + +Here is another example, where incoming Ethernet frames are encapsulated into +MPLS with label 123 and TTL 64. Then, an outer Ethernet header is added and the +resulting frame is finally sent on eth1: + +.RS +.EX +#tc qdisc add dev eth0 ingress +#tc filter add dev eth0 ingress matchall \\ + action mpls mac_push label 123 ttl 64 \\ + action vlan push_eth \\ + dst_mac 02:00:00:00:00:02 \\ + src_mac 02:00:00:00:00:01 \\ + action mirred egress redirect dev eth1 +.EE +.RE + +The following example assumes that incoming MPLS packets with label 123 +transport Ethernet frames. The outer Ethernet and the MPLS headers are +stripped, then the inner Ethernet frame is sent on eth1: + +.RS +.EX +#tc qdisc add dev eth0 ingress +#tc filter add dev eth0 ingress protocol mpls_uc \\ + flower mpls_label 123 mpls_bos 1 \\ + action vlan pop_eth \\ + action mpls pop protocol teb \\ + action mirred egress redirect dev eth1 .EE .RE .SH SEE ALSO -.BR tc (8) +.BR tc "(8), " tc-mirred "(8), " tc-vlan (8) diff --git a/man/man8/tc-pie.8 b/man/man8/tc-pie.8 index 0db97d1..5a8c782 100644 --- a/man/man8/tc-pie.8 +++ b/man/man8/tc-pie.8 @@ -40,7 +40,7 @@ aims to control delay. The main design goals are PIE is designed to control delay effectively. First, an average dequeue rate is estimated based on the standing queue. The rate is used to calculate the current delay. Then, on a periodic basis, the delay is used to calculate the dropping -probabilty. Finally, on arrival, a packet is dropped (or marked) based on this +probability. Finally, on arrival, a packet is dropped (or marked) based on this probability. PIE makes adjustments to the probability based on the trend of the delay i.e. @@ -52,7 +52,7 @@ growth and are determined through control theoretic approaches. alpha determines the deviation between the current and target latency changes probability. beta exerts additional adjustments depending on the latency trend. -The drop probabilty is used to mark packets in ecn mode. However, as in RED, +The drop probability is used to mark packets in ecn mode. However, as in RED, beyond 10% packets are dropped based on this probability. The bytemode is used to drop packets proportional to the packet size. diff --git a/man/man8/tc-sfb.8 b/man/man8/tc-sfb.8 index aad19e1..e4584de 100644 --- a/man/man8/tc-sfb.8 +++ b/man/man8/tc-sfb.8 @@ -105,7 +105,7 @@ device the qdisc is attached to. .TP max Maximum length of a buckets queue, in packets, before packets start being -dropped. Should be sightly larger than +dropped. Should be slightly larger than .B target , but should not be set to values exceeding 1.5 times that of .B target . diff --git a/man/man8/tc-taprio.8 b/man/man8/tc-taprio.8 index e1d19ba..d13c86f 100644 --- a/man/man8/tc-taprio.8 +++ b/man/man8/tc-taprio.8 @@ -92,7 +92,11 @@ in the schedule; clockid .br Specifies the clock to be used by qdisc's internal timer for measuring -time and scheduling events. +time and scheduling events. This argument must be omitted when using the +full-offload feature (flags 0x2), since in that case, the clockid is +implicitly /dev/ptpN (where N is given by +.B ethtool -T eth0 | grep 'PTP Hardware Clock' +), and therefore not necessarily synchronized with the system's CLOCK_TAI. .TP sched-entry @@ -115,13 +119,27 @@ before moving to the next entry. .TP flags .br -Specifies different modes for taprio. Currently, only txtime-assist is -supported which can be enabled by setting it to 0x1. In this mode, taprio will -set the transmit timestamp depending on the interval in which the packet needs -to be transmitted. It will then utililize the +This is a bit mask which specifies different modes for taprio. +.RS +.TP +.I 0x1 +Enables the txtime-assist feature. In this mode, taprio will set the transmit +timestamp depending on the interval in which the packet needs to be +transmitted. It will then utililize the .BR etf(8) qdisc to sort and transmit the packets at the right time. The second example can be used as a reference to configure this mode. +.TP +.I 0x2 +Enables the full-offload feature. In this mode, taprio will pass the gate +control list to the NIC which will execute it cyclically in hardware. +When using full-offload, there is no need to specify the +.B clockid +argument. + +The txtime-assist and full-offload features are mutually exclusive, i.e. +setting flags to 0x3 is invalid. +.RE .TP txtime-delay @@ -178,5 +196,28 @@ for more information about configuring the ETF qdisc. offload delta 200000 clockid CLOCK_TAI .EE +The following is a schedule in full offload mode. The +.B base-time +is 200 ns and the +.B cycle-time +is implicitly calculated as the sum of all +.B sched-entry +durations (i.e. 20 us + 20 us + 60 us = 100 us). Although the base-time is in +the past, the hardware will start executing the schedule at a PTP time equal to +the smallest integer multiple of 100 us, plus 200 ns, that is larger than the +NIC's current PTP time. + +.EX +# tc qdisc add dev eth0 parent root taprio \\ + num_tc 8 \\ + map 0 1 2 3 4 5 6 7 \\ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \\ + base-time 200 \\ + sched-entry S 80 20000 \\ + sched-entry S a0 20000 \\ + sched-entry S df 60000 \\ + flags 0x2 +.EE + .SH AUTHORS Vinicius Costa Gomes diff --git a/man/man8/tc-tunnel_key.8 b/man/man8/tc-tunnel_key.8 index ad99724..f9863f9 100644 --- a/man/man8/tc-tunnel_key.8 +++ b/man/man8/tc-tunnel_key.8 @@ -96,13 +96,13 @@ variable length hexadecimal value. Additionally multiple options may be listed using a comma delimiter. .TP .B vxlan_opts -Vxlan metatdata options. +Vxlan metadata options. .B vxlan_opts is specified in the form GBP, as a 32bit number. Multiple options is not supported. .TP .B erspan_opts -Erspan metatdata options. +Erspan metadata options. .B erspan_opts is specified in the form VERSION:INDEX:DIR:HWID, where VERSION is represented as a 8bit number, INDEX as an 32bit number, DIR and HWID as a 8bit number. diff --git a/man/man8/tc-vlan.8 b/man/man8/tc-vlan.8 index f5ffc25..264053d 100644 --- a/man/man8/tc-vlan.8 +++ b/man/man8/tc-vlan.8 @@ -5,8 +5,8 @@ vlan - vlan manipulation module .SH SYNOPSIS .in +8 .ti -8 -.BR tc " ... " "action vlan" " { " pop " |" -.IR PUSH " | " MODIFY " } [ " CONTROL " ]" +.BR tc " ... " "action vlan" " { " pop " | " pop_eth " |" +.IR PUSH " | " MODIFY " | " PUSH_ETH " } [ " CONTROL " ]" .ti -8 .IR PUSH " := " @@ -25,6 +25,11 @@ vlan - vlan manipulation module .BI id " VLANID" .ti -8 +.IR PUSH_ETH " := " +.B push_eth +.BI dst_mac " LLADDR " src_mac " LLADDR " + +.ti -8 .IR CONTROL " := { " .BR reclassify " | " pipe " | " drop " | " continue " | " pass " | " goto " " chain " " CHAIN_INDEX " }" .SH DESCRIPTION @@ -43,6 +48,20 @@ modes require at least a and allow to optionally choose the .I VLANPROTO to use. + +The +.B vlan +action can also be used to add or remove the base Ethernet header. The +.B pop_eth +mode, which takes no argument, is used to remove the base Ethernet header. All +existing VLANs must have been previously dropped. The opposite operation, +adding a base Ethernet header, is done with the +.B push_eth +mode. In that case, the packet must have no MAC header (stacking MAC headers is +not permitted). This mode is mostly useful when a previous action has +encapsulated the whole original frame behind a network header and one needs +to prepend an Ethernet header before forwarding the resulting packet. + .SH OPTIONS .TP .B pop @@ -58,6 +77,16 @@ Replace mode. Existing 802.1Q tag is replaced. Requires at least .B id option. .TP +.B pop_eth +Ethernet header decapsulation mode. Only works on a plain Ethernet header: +VLANs, if any, must be removed first. +.TP +.B push_eth +Ethernet header encapsulation mode. The Ethertype is automatically set +using the network header type. Chaining Ethernet headers is not allowed: the +packet must have no MAC header when using this mode. Requires the +.BR "dst_mac " and " src_mac " options. +.TP .BI id " VLANID" Specify the VLAN ID to encapsulate into. .I VLANID @@ -73,6 +102,12 @@ Choose the VLAN protocol to use. At the time of writing, the kernel accepts only .BI priority " VLANPRIO" Choose the VLAN priority to use. Decimal number in range of 0-7. .TP +.BI dst_mac " LLADDR" +Choose the destination MAC address to use. +.TP +.BI src_mac " LLADDR" +Choose the source MAC address to use. +.TP .I CONTROL How to continue after executing this action. .RS @@ -122,5 +157,8 @@ process then restarted for the plain packet: .EE .RE +For an example of the +.BR pop_eth " and " push_eth " modes, see " tc-mpls (8). + .SH SEE ALSO -.BR tc (8) +.BR tc "(8), " tc-mpls (8) diff --git a/man/man8/tc.8 b/man/man8/tc.8 index 7e9019f..4338572 100644 --- a/man/man8/tc.8 +++ b/man/man8/tc.8 @@ -854,6 +854,12 @@ option for creating alias. .RE +.TP +.BR "\-br" , " \-brief" +Print only essential data needed to identify the filter and action (handle, +cookie, etc.) and stats. This option is currently only supported by +.BR "tc filter show " and " tc actions ls " commands. + .SH "EXAMPLES" .PP tc -g class show dev eth0 diff --git a/man/man8/vdpa-dev.8 b/man/man8/vdpa-dev.8 new file mode 100644 index 0000000..3643351 --- /dev/null +++ b/man/man8/vdpa-dev.8 @@ -0,0 +1,96 @@ +.TH DEVLINK\-DEV 8 "5 Jan 2021" "iproute2" "Linux" +.SH NAME +vdpa-dev \- vdpa device configuration +.SH SYNOPSIS +.sp +.ad l +.in +8 +.ti -8 +.B vdpa +.B dev +.RI "[ " OPTIONS " ] " +.RI " { " COMMAND | " " +.BR help " }" +.sp + +.ti -8 +.IR OPTIONS " := { " +\fB\-V\fR[\fIersion\fR] +} + +.ti -8 +.B vdpa dev show +.RI "[ " DEV " ]" + +.ti -8 +.B vdpa dev help + +.ti -8 +.B vdpa dev add +.B name +.I NAME +.B mgmtdev +.I MGMTDEV + +.ti -8 +.B vdpa dev del +.I DEV + +.SH "DESCRIPTION" +.SS vdpa dev show - display vdpa device attributes + +.PP +.I "DEV" +- specifies the vdpa device to show. +If this argument is omitted all devices are listed. + +.in +4 +Format is: +.in +2 +VDPA_DEVICE_NAME + +.SS vdpa dev add - add a new vdpa device. + +.TP +.BI name " NAME" +Name of the new vdpa device to add. + +.TP +.BI mgmtdev " MGMTDEV" +Name of the management device to use for device addition. + +.SS vdpa dev del - Delete the vdpa device. + +.PP +.I "DEV" +- specifies the vdpa device to delete. + +.SH "EXAMPLES" +.PP +vdpa dev show +.RS 4 +Shows the all vdpa devices on the system. +.RE +.PP +vdpa dev show foo +.RS 4 +Shows the specified vdpa device. +.RE +.PP +vdpa dev add name foo mgmtdev vdpa_sim_net +.RS 4 +Add the vdpa device named foo on the management device vdpa_sim_net. +.RE +.PP +vdpa dev del foo +.RS 4 +Delete the vdpa device named foo which was previously created. +.RE + +.SH SEE ALSO +.BR vdpa (8), +.BR vdpa-mgmtdev (8), +.br + +.SH AUTHOR +Parav Pandit diff --git a/man/man8/vdpa-mgmtdev.8 b/man/man8/vdpa-mgmtdev.8 new file mode 100644 index 0000000..cae2cbd --- /dev/null +++ b/man/man8/vdpa-mgmtdev.8 @@ -0,0 +1,53 @@ +.TH DEVLINK\-DEV 8 "5 Jan 2021" "iproute2" "Linux" +.SH NAME +vdpa-dev \- vdpa management device view +.SH SYNOPSIS +.sp +.ad l +.in +8 +.ti -8 +.B vdpa +.B mgmtdev +.RI " { " COMMAND | " " +.BR help " }" +.sp + +.ti -8 +.IR OPTIONS " := { " +\fB\-V\fR[\fIersion\fR] +} + +.ti -8 +.B vdpa mgmtdev show +.RI "[ " MGMTDEV " ]" + +.ti -8 +.B vdpa mgmtdev help + +.SH "DESCRIPTION" +.SS vdpa mgmtdev show - display vdpa management device attributes + +.PP +.I "MGMTDEV" +- specifies the vdpa management device to show. +If this argument is omitted all management devices are listed. + +.SH "EXAMPLES" +.PP +vdpa mgmtdev show +.RS 4 +Shows all the vdpa management devices on the system. +.RE +.PP +vdpa mgmtdev show bar +.RS 4 +Shows the specified vdpa management device. +.RE + +.SH SEE ALSO +.BR vdpa (8), +.BR vdpa-dev (8), +.br + +.SH AUTHOR +Parav Pandit diff --git a/man/man8/vdpa.8 b/man/man8/vdpa.8 new file mode 100644 index 0000000..d1aaece --- /dev/null +++ b/man/man8/vdpa.8 @@ -0,0 +1,76 @@ +.TH VDPA 8 "5 Jan 2021" "iproute2" "Linux" +.SH NAME +vdpa \- vdpa management tool +.SH SYNOPSIS +.sp +.ad l +.in +8 +.ti -8 +.B vdpa +.RI "[ " OPTIONS " ] { " dev | mgmtdev " } { " COMMAND " | " +.BR help " }" +.sp + +.SH OPTIONS + +.TP +.BR "\-V" , " --Version" +Print the version of the +.B vdpa +utility and exit. + +.TP +.BR "\-j" , " --json" +Generate JSON output. + +.TP +.BR "\-p" , " --pretty" +When combined with -j generate a pretty JSON output. + +.SS +.I OBJECT + +.TP +.B dev +- vdpa device. + +.TP +.B mgmtdev +- vdpa management device. + +.SS +.I COMMAND + +Specifies the action to perform on the object. +The set of possible actions depends on the object type. +It is possible to +.B show +(or +.B list +) objects. The +.B help +command is available for all objects. It prints +out a list of available commands and argument syntax conventions. +.sp +If no command is given, some default command is assumed. +Usually it is +.B show +or, if the objects of this class cannot be listed, +.BR "help" . + +.SH EXIT STATUS +Exit status is 0 if command was successful or a positive integer upon failure. + +.SH SEE ALSO +.BR vdpa-dev (8), +.BR vdpa-mgmtdev (8), +.br + +.SH REPORTING BUGS +Report any bugs to the Network Developers mailing list +.B +where the development and maintenance is primarily done. +You do not have to be subscribed to the list to send a message there. + +.SH AUTHOR +Parav Pandit diff --git a/misc/ifstat.c b/misc/ifstat.c index c05183d..d4a3342 100644 --- a/misc/ifstat.c +++ b/misc/ifstat.c @@ -251,7 +251,7 @@ static void load_raw_table(FILE *fp) buf[strlen(buf)-1] = 0; if (info_source[0] && strcmp(info_source, buf+1)) source_mismatch = 1; - strncpy(info_source, buf+1, sizeof(info_source)-1); + strlcpy(info_source, buf+1, sizeof(info_source)); continue; } if ((n = malloc(sizeof(*n))) == NULL) diff --git a/misc/nstat.c b/misc/nstat.c index 6fdd316..ecdd4ce 100644 --- a/misc/nstat.c +++ b/misc/nstat.c @@ -136,8 +136,7 @@ static void load_good_table(FILE *fp) buf[strlen(buf)-1] = 0; if (info_source[0] && strcmp(info_source, buf+1)) source_mismatch = 1; - info_source[0] = 0; - strncat(info_source, buf+1, sizeof(info_source)-1); + strlcpy(info_source, buf + 1, sizeof(info_source)); continue; } /* idbuf is as big as buf, so this is safe */ diff --git a/misc/ss.c b/misc/ss.c index e556572..894ad40 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -114,6 +114,7 @@ static int sctp_ino; static int show_tipcinfo; static int show_tos; static int show_cgroup; +static int show_inet_sockopt; int oneline; enum col_id { @@ -2110,6 +2111,18 @@ static void vsock_set_inet_prefix(inet_prefix *a, __u32 cid) memcpy(a->data, &cid, sizeof(cid)); } +static char* find_port(char *addr, bool is_port) +{ + char *port = NULL; + if (is_port) + port = addr; + else + port = strchr(addr, ':'); + if (port && *port == ':') + *port++ = '\0'; + return port; +} + void *parse_hostcond(char *addr, bool is_port) { char *port = NULL; @@ -2118,35 +2131,49 @@ void *parse_hostcond(char *addr, bool is_port) int fam = preferred_family; struct filter *f = ¤t_filter; - if (fam == AF_UNIX || strncmp(addr, "unix:", 5) == 0) { + if (strncmp(addr, "unix:", 5) == 0) { + fam = AF_UNIX; + addr += 5; + } else if (strncmp(addr, "link:", 5) == 0) { + fam = AF_PACKET; + addr += 5; + } else if (strncmp(addr, "netlink:", 8) == 0) { + fam = AF_NETLINK; + addr += 8; + } else if (strncmp(addr, "vsock:", 6) == 0) { + fam = AF_VSOCK; + addr += 6; + } else if (strncmp(addr, "inet:", 5) == 0) { + fam = AF_INET; + addr += 5; + } else if (strncmp(addr, "inet6:", 6) == 0) { + fam = AF_INET6; + addr += 6; + } + + if (fam == AF_UNIX) { char *p; a.addr.family = AF_UNIX; - if (strncmp(addr, "unix:", 5) == 0) - addr += 5; p = strdup(addr); a.addr.bitlen = 8*strlen(p); memcpy(a.addr.data, &p, sizeof(p)); - fam = AF_UNIX; goto out; } - if (fam == AF_PACKET || strncmp(addr, "link:", 5) == 0) { + if (fam == AF_PACKET) { a.addr.family = AF_PACKET; a.addr.bitlen = 0; - if (strncmp(addr, "link:", 5) == 0) - addr += 5; - port = strchr(addr, ':'); + port = find_port(addr, is_port); if (port) { - *port = 0; - if (port[1] && strcmp(port+1, "*")) { - if (get_integer(&a.port, port+1, 0)) { - if ((a.port = xll_name_to_index(port+1)) <= 0) + if (*port && strcmp(port, "*")) { + if (get_integer(&a.port, port, 0)) { + if ((a.port = xll_name_to_index(port)) <= 0) return NULL; } } } - if (addr[0] && strcmp(addr, "*")) { + if (!is_port && addr[0] && strcmp(addr, "*")) { unsigned short tmp; a.addr.bitlen = 32; @@ -2154,77 +2181,51 @@ void *parse_hostcond(char *addr, bool is_port) return NULL; a.addr.data[0] = ntohs(tmp); } - fam = AF_PACKET; goto out; } - if (fam == AF_NETLINK || strncmp(addr, "netlink:", 8) == 0) { + if (fam == AF_NETLINK) { a.addr.family = AF_NETLINK; a.addr.bitlen = 0; - if (strncmp(addr, "netlink:", 8) == 0) - addr += 8; - port = strchr(addr, ':'); + port = find_port(addr, is_port); if (port) { - *port = 0; - if (port[1] && strcmp(port+1, "*")) { - if (get_integer(&a.port, port+1, 0)) { - if (strcmp(port+1, "kernel") == 0) + if (*port && strcmp(port, "*")) { + if (get_integer(&a.port, port, 0)) { + if (strcmp(port, "kernel") == 0) a.port = 0; else return NULL; } } } - if (addr[0] && strcmp(addr, "*")) { + if (!is_port && addr[0] && strcmp(addr, "*")) { a.addr.bitlen = 32; if (nl_proto_a2n(&a.addr.data[0], addr) == -1) return NULL; } - fam = AF_NETLINK; goto out; } - if (fam == AF_VSOCK || strncmp(addr, "vsock:", 6) == 0) { + if (fam == AF_VSOCK) { __u32 cid = ~(__u32)0; a.addr.family = AF_VSOCK; - if (strncmp(addr, "vsock:", 6) == 0) - addr += 6; - - if (is_port) - port = addr; - else { - port = strchr(addr, ':'); - if (port) { - *port = '\0'; - port++; - } - } + + port = find_port(addr, is_port); if (port && strcmp(port, "*") && get_u32((__u32 *)&a.port, port, 0)) return NULL; - if (addr[0] && strcmp(addr, "*")) { + if (!is_port && addr[0] && strcmp(addr, "*")) { a.addr.bitlen = 32; if (get_u32(&cid, addr, 0)) return NULL; } vsock_set_inet_prefix(&a.addr, cid); - fam = AF_VSOCK; goto out; } - if (fam == AF_INET || !strncmp(addr, "inet:", 5)) { - fam = AF_INET; - if (!strncmp(addr, "inet:", 5)) - addr += 5; - } else if (fam == AF_INET6 || !strncmp(addr, "inet6:", 6)) { - fam = AF_INET6; - if (!strncmp(addr, "inet6:", 6)) - addr += 6; - } - /* URL-like literal [] */ if (addr[0] == '[') { addr++; @@ -3135,7 +3136,7 @@ static void mptcp_stats_print(struct mptcp_info *s) out(" subflows:%d", s->mptcpi_subflows); if (s->mptcpi_add_addr_signal) out(" add_addr_signal:%d", s->mptcpi_add_addr_signal); - if (s->mptcpi_add_addr_signal) + if (s->mptcpi_add_addr_accepted) out(" add_addr_accepted:%d", s->mptcpi_add_addr_accepted); if (s->mptcpi_subflows_max) out(" subflows_max:%d", s->mptcpi_subflows_max); @@ -3333,6 +3334,41 @@ static int inet_show_sock(struct nlmsghdr *nlh, out(" cgroup:%s", cg_id_to_path(rta_getattr_u64(tb[INET_DIAG_CGROUP_ID]))); } + if (show_inet_sockopt) { + if (tb[INET_DIAG_SOCKOPT] && RTA_PAYLOAD(tb[INET_DIAG_SOCKOPT]) >= + sizeof(struct inet_diag_sockopt)) { + const struct inet_diag_sockopt *sockopt = + RTA_DATA(tb[INET_DIAG_SOCKOPT]); + if (!oneline) + out("\n\tinet-sockopt: ("); + else + out(" inet-sockopt: ("); + if (sockopt->recverr) + out(" recverr"); + if (sockopt->is_icsk) + out(" is_icsk"); + if (sockopt->freebind) + out(" freebind"); + if (sockopt->hdrincl) + out(" hdrincl"); + if (sockopt->mc_loop) + out(" mc_loop"); + if (sockopt->transparent) + out(" transparent"); + if (sockopt->mc_all) + out(" mc_all"); + if (sockopt->nodefrag) + out(" nodefrag"); + if (sockopt->bind_address_no_port) + out(" bind_addr_no_port"); + if (sockopt->recverr_rfc4884) + out(" recverr_rfc4884"); + if (sockopt->defer_connect) + out(" defer_connect"); + out(")"); + } + } + if (show_mem || (show_tcpinfo && s->type != IPPROTO_UDP)) { if (!oneline) out("\n\t"); @@ -3368,7 +3404,7 @@ static int tcpdiag_send(int fd, int protocol, struct filter *f) struct iovec iov[3]; int iovlen = 1; - if (protocol == IPPROTO_UDP) + if (protocol == IPPROTO_UDP || protocol == IPPROTO_MPTCP) return -1; if (protocol == IPPROTO_TCP) @@ -3587,6 +3623,14 @@ static int inet_show_netlink(struct filter *f, FILE *dump_fp, int protocol) if (preferred_family == PF_INET6) family = PF_INET6; + /* extended protocol will use INET_DIAG_REQ_PROTOCOL, + * not supported by older kernels. On such kernel + * rtnl_dump will bail with rtnl_dump_error(). + * Suppress the error to avoid confusing the user + */ + if (protocol > 255) + rth.flags |= RTNL_HANDLE_F_SUPPRESS_NLERR; + again: if ((err = sockdiag_send(family, rth.fd, protocol, f))) goto Exit; @@ -4500,6 +4544,21 @@ static void xdp_show_umem(struct xdp_diag_umem *umem, struct xdp_diag_ring *fr, xdp_show_ring("cr", cr); } +static void xdp_show_stats(struct xdp_diag_stats *stats) +{ + if (oneline) + out(" stats("); + else + out("\n\tstats("); + out("rx dropped:%llu", stats->n_rx_dropped); + out(",rx invalid:%llu", stats->n_rx_invalid); + out(",rx queue full:%llu", stats->n_rx_full); + out(",rx fill ring empty:%llu", stats->n_fill_ring_empty); + out(",tx invalid:%llu", stats->n_tx_invalid); + out(",tx ring empty:%llu", stats->n_tx_ring_empty); + out(")"); +} + static int xdp_show_sock(struct nlmsghdr *nlh, void *arg) { struct xdp_diag_ring *rx = NULL, *tx = NULL, *fr = NULL, *cr = NULL; @@ -4507,6 +4566,7 @@ static int xdp_show_sock(struct nlmsghdr *nlh, void *arg) struct rtattr *tb[XDP_DIAG_MAX + 1]; struct xdp_diag_info *info = NULL; struct xdp_diag_umem *umem = NULL; + struct xdp_diag_stats *stats = NULL; const struct filter *f = arg; struct sockstat stat = {}; @@ -4541,6 +4601,8 @@ static int xdp_show_sock(struct nlmsghdr *nlh, void *arg) stat.rq = skmeminfo[SK_MEMINFO_RMEM_ALLOC]; } + if (tb[XDP_DIAG_STATS]) + stats = RTA_DATA(tb[XDP_DIAG_STATS]); if (xdp_stats_print(&stat, f)) return 0; @@ -4552,6 +4614,8 @@ static int xdp_show_sock(struct nlmsghdr *nlh, void *arg) xdp_show_ring("tx", tx); if (umem) xdp_show_umem(umem, fr, cr); + if (stats) + xdp_show_stats(stats); } if (show_mem) @@ -4570,7 +4634,7 @@ static int xdp_show(struct filter *f) req.r.sdiag_family = AF_XDP; req.r.xdiag_show = XDP_SHOW_INFO | XDP_SHOW_RING_CFG | XDP_SHOW_UMEM | - XDP_SHOW_MEMINFO; + XDP_SHOW_MEMINFO | XDP_SHOW_STATS; return handle_netlink_request(f, &req.nlh, sizeof(req), xdp_show_sock); } @@ -5210,6 +5274,7 @@ static void _usage(FILE *dest) " -K, --kill forcibly close sockets, display what was closed\n" " -H, --no-header Suppress header line\n" " -O, --oneline socket's data printed on a single line\n" +" --inet-sockopt show various inet socket options\n" "\n" " -A, --query=QUERY, --socket=QUERY\n" " QUERY := {all|inet|tcp|mptcp|udp|raw|unix|unix_dgram|unix_stream|unix_seqpacket|packet|netlink|vsock_stream|vsock_dgram|tipc}[,QUERY]\n" @@ -5299,6 +5364,8 @@ static int scan_state(const char *state) #define OPT_CGROUP 261 +#define OPT_INET_SOCKOPT 262 + static const struct option long_opts[] = { { "numeric", 0, 0, 'n' }, { "resolve", 0, 0, 'r' }, @@ -5341,6 +5408,7 @@ static const struct option long_opts[] = { { "xdp", 0, 0, OPT_XDPSOCK}, { "mptcp", 0, 0, 'M' }, { "oneline", 0, 0, 'O' }, + { "inet-sockopt", 0, 0, OPT_INET_SOCKOPT }, { 0 } }; @@ -5539,6 +5607,9 @@ int main(int argc, char *argv[]) case 'O': oneline = 1; break; + case OPT_INET_SOCKOPT: + show_inet_sockopt = 1; + break; case 'h': help(); case '?': diff --git a/rdma/dev.c b/rdma/dev.c index a11081b..c684dde 100644 --- a/rdma/dev.c +++ b/rdma/dev.c @@ -159,7 +159,7 @@ static void dev_print_dim_setting(struct rd *rd, struct nlattr **tb) if (dim_setting > 1) return; - print_on_off(rd, "adaptive-moderation", dim_setting); + print_on_off(PRINT_ANY, "adaptive-moderation", "adaptive-moderation %s ", dim_setting); } diff --git a/rdma/rdma.c b/rdma/rdma.c index 9ea2d17..8dc2d3e 100644 --- a/rdma/rdma.c +++ b/rdma/rdma.c @@ -41,40 +41,16 @@ static int rd_cmd(struct rd *rd, int argc, char **argv) return rd_exec_cmd(rd, cmds, "object"); } -static int rd_batch(struct rd *rd, const char *name, bool force) +static int rd_batch_cmd(int argc, char *argv[], void *data) { - char *line = NULL; - size_t len = 0; - int ret = 0; - - if (name && strcmp(name, "-") != 0) { - if (!freopen(name, "r", stdin)) { - pr_err("Cannot open file \"%s\" for reading: %s\n", - name, strerror(errno)); - return errno; - } - } + struct rd *rd = data; - cmdlineno = 0; - while (getcmdline(&line, &len, stdin) != -1) { - char *largv[512]; - int largc; - - largc = makeargs(line, largv, ARRAY_SIZE(largv)); - if (!largc) - continue; /* blank line */ - - ret = rd_cmd(rd, largc, largv); - if (ret) { - pr_err("Command failed %s:%d\n", name, cmdlineno); - if (!force) - break; - } - } - - free(line); + return rd_cmd(rd, argc, argv); +} - return ret; +static int rd_batch(struct rd *rd, const char *name, bool force) +{ + return do_batch(name, force, rd_batch_cmd, rd); } static int rd_init(struct rd *rd, char *filename) diff --git a/rdma/rdma.h b/rdma/rdma.h index a6c6bde..8b421db 100644 --- a/rdma/rdma.h +++ b/rdma/rdma.h @@ -19,6 +19,7 @@ #include "list.h" #include "utils.h" +#include "mnl_utils.h" #include "json_print.h" #define pr_err(args...) fprintf(stderr, ##args) @@ -39,7 +40,7 @@ struct filter_entry { char *key; char *value; /* - * This field menas that we can try to issue .doit calback + * This field means that we can try to issue .doit calback * on value above. This value can be converted to integer * with simple atoi(). Otherwise "is_doit" will be false. */ @@ -84,6 +85,7 @@ struct rd_cmd { * Parser interface */ bool rd_no_arg(struct rd *rd); +bool rd_is_multiarg(struct rd *rd); void rd_arg_inc(struct rd *rd); char *rd_argv(struct rd *rd); @@ -138,7 +140,6 @@ void print_driver_table(struct rd *rd, struct nlattr *tb); void print_raw_data(struct rd *rd, struct nlattr **nla_line); void newline(struct rd *rd); void newline_indent(struct rd *rd); -void print_on_off(struct rd *rd, const char *key_str, bool on); void print_raw_data(struct rd *rd, struct nlattr **nla_line); #define MAX_LINE_LENGTH 80 diff --git a/rdma/res-cq.c b/rdma/res-cq.c index 313f929..9e7c4f5 100644 --- a/rdma/res-cq.c +++ b/rdma/res-cq.c @@ -36,7 +36,7 @@ static void print_cq_dim_setting(struct rd *rd, struct nlattr *attr) if (dim_setting > 1) return; - print_on_off(rd, "adaptive-moderation", dim_setting); + print_on_off(PRINT_ANY, "adaptive-moderation", "adaptive-moderation %s ", dim_setting); } static int res_cq_line_raw(struct rd *rd, const char *name, int idx, diff --git a/rdma/stat.c b/rdma/stat.c index a2b5da1..8edf7bf 100644 --- a/rdma/stat.c +++ b/rdma/stat.c @@ -307,7 +307,7 @@ static int stat_qp_show_parse_cb(const struct nlmsghdr *nlh, void *data) struct rd *rd = data; const char *name; uint32_t idx; - int ret; + int ret = MNL_CB_OK; mnl_attr_parse(nlh, 0, rd_attr_cb, tb); if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || @@ -502,6 +502,12 @@ static int stat_get_arg(struct rd *rd, const char *arg) return -EINVAL; rd_arg_inc(rd); + + if (rd_is_multiarg(rd)) { + pr_err("The parameter %s shouldn't include range\n", arg); + return -EINVAL; + } + value = strtol(rd_argv(rd), &endp, 10); rd_arg_inc(rd); @@ -523,6 +529,8 @@ static int stat_one_qp_bind(struct rd *rd) return ret; lqpn = stat_get_arg(rd, "lqpn"); + if (lqpn < 0) + return lqpn; rd_prepare_msg(rd, RDMA_NLDEV_CMD_STAT_SET, &seq, (NLM_F_REQUEST | NLM_F_ACK)); @@ -537,6 +545,9 @@ static int stat_one_qp_bind(struct rd *rd) if (rd_argc(rd)) { cntn = stat_get_arg(rd, "cntn"); + if (cntn < 0) + return cntn; + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn); } @@ -607,13 +618,23 @@ static int stat_one_qp_unbind(struct rd *rd) unsigned int portid; uint32_t seq; + if (rd_no_arg(rd)) { + stat_help(rd); + return -EINVAL; + } + ret = rd_build_filter(rd, stat_valid_filters); if (ret) return ret; cntn = stat_get_arg(rd, "cntn"); + if (cntn < 0) + return cntn; + if (rd_argc(rd)) { lqpn = stat_get_arg(rd, "lqpn"); + if (lqpn < 0) + return lqpn; return do_stat_qp_unbind_lqpn(rd, cntn, lqpn); } diff --git a/rdma/utils.c b/rdma/utils.c index 4d3de4f..292e180 100644 --- a/rdma/utils.c +++ b/rdma/utils.c @@ -47,6 +47,13 @@ bool rd_no_arg(struct rd *rd) return rd_argc(rd) == 0; } +bool rd_is_multiarg(struct rd *rd) +{ + if (!rd_argc(rd)) + return false; + return strpbrk(rd_argv(rd), ",-") != NULL; +} + /* * Possible input:output * dev/port | first port | is_dump_all @@ -666,18 +673,12 @@ int rd_send_msg(struct rd *rd) { int ret; - rd->nl = mnl_socket_open(NETLINK_RDMA); + rd->nl = mnlu_socket_open(NETLINK_RDMA); if (!rd->nl) { pr_err("Failed to open NETLINK_RDMA socket\n"); return -ENODEV; } - ret = mnl_socket_bind(rd->nl, 0, MNL_SOCKET_AUTOPID); - if (ret < 0) { - pr_err("Failed to bind socket with err %d\n", ret); - goto err; - } - ret = mnl_socket_sendto(rd->nl, rd->nlh, rd->nlh->nlmsg_len); if (ret < 0) { pr_err("Failed to send to socket with err %d\n", ret); @@ -692,23 +693,13 @@ err: int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, unsigned int seq) { - int ret; - unsigned int portid; char buf[MNL_SOCKET_BUFFER_SIZE]; + int ret; - portid = mnl_socket_get_portid(rd->nl); - do { - ret = mnl_socket_recvfrom(rd->nl, buf, sizeof(buf)); - if (ret <= 0) - break; - - ret = mnl_cb_run(buf, ret, seq, portid, callback, data); - } while (ret > 0); - + ret = mnlu_socket_recv_run(rd->nl, seq, buf, MNL_SOCKET_BUFFER_SIZE, + callback, data); if (ret < 0 && !rd->suppress_errors) perror("error"); - - mnl_socket_close(rd->nl); return ret; } @@ -781,11 +772,6 @@ static int print_driver_string(struct rd *rd, const char *key_str, return 0; } -void print_on_off(struct rd *rd, const char *key_str, bool on) -{ - print_driver_string(rd, key_str, (on) ? "on":"off"); -} - static int print_driver_s32(struct rd *rd, const char *key_str, int32_t val, enum rdma_nldev_print_type print_type) { diff --git a/tc/e_bpf.c b/tc/e_bpf.c index a48393b..517ee5b 100644 --- a/tc/e_bpf.c +++ b/tc/e_bpf.c @@ -159,7 +159,9 @@ static int parse_bpf(struct exec_util *eu, int argc, char **argv) envp_run[env_num - 1] = NULL; out: - return execvpe(argv_run[0], argv_run, envp_run); + ret = execvpe(argv_run[0], argv_run, envp_run); + free(envp_run); + return ret; err_free_env: for (--i; i >= env_old; i--) diff --git a/tc/f_flower.c b/tc/f_flower.c index 00c919f..53822a9 100644 --- a/tc/f_flower.c +++ b/tc/f_flower.c @@ -345,6 +345,8 @@ static struct flower_ct_states { { "trk", TCA_FLOWER_KEY_CT_FLAGS_TRACKED }, { "new", TCA_FLOWER_KEY_CT_FLAGS_NEW }, { "est", TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED }, + { "inv", TCA_FLOWER_KEY_CT_FLAGS_INVALID }, + { "rpl", TCA_FLOWER_KEY_CT_FLAGS_REPLY }, }; static int flower_parse_ct_state(char *str, struct nlmsghdr *n) @@ -1324,9 +1326,9 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, bool mpls_format_old = false; bool mpls_format_new = false; struct rtattr *tail; - __be16 eth_type = TC_H_MIN(t->tcm_info); + __be16 tc_proto = TC_H_MIN(t->tcm_info); + __be16 eth_type = tc_proto; __be16 vlan_ethtype = 0; - __be16 cvlan_ethtype = 0; __u8 ip_proto = 0xff; __u32 flags = 0; __u32 mtf = 0; @@ -1432,7 +1434,7 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, __u16 vid; NEXT_ARG(); - if (!eth_type_vlan(eth_type)) { + if (!eth_type_vlan(tc_proto)) { fprintf(stderr, "Can't set \"vlan_id\" if ethertype isn't 802.1Q or 802.1AD\n"); return -1; } @@ -1446,7 +1448,7 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, __u8 vlan_prio; NEXT_ARG(); - if (!eth_type_vlan(eth_type)) { + if (!eth_type_vlan(tc_proto)) { fprintf(stderr, "Can't set \"vlan_prio\" if ethertype isn't 802.1Q or 802.1AD\n"); return -1; } @@ -1464,6 +1466,8 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, &vlan_ethtype, n); if (ret < 0) return -1; + /* get new ethtype for later parsing */ + eth_type = vlan_ethtype; } else if (matches(*argv, "cvlan_id") == 0) { __u16 vid; @@ -1495,9 +1499,10 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, TCA_FLOWER_KEY_CVLAN_PRIO, cvlan_prio); } else if (matches(*argv, "cvlan_ethtype") == 0) { NEXT_ARG(); + /* get new ethtype for later parsing */ ret = flower_parse_vlan_eth_type(*argv, vlan_ethtype, TCA_FLOWER_KEY_CVLAN_ETH_TYPE, - &cvlan_ethtype, n); + ð_type, n); if (ret < 0) return -1; } else if (matches(*argv, "mpls") == 0) { @@ -1627,9 +1632,7 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, } } else if (matches(*argv, "ip_proto") == 0) { NEXT_ARG(); - ret = flower_parse_ip_proto(*argv, cvlan_ethtype ? - cvlan_ethtype : vlan_ethtype ? - vlan_ethtype : eth_type, + ret = flower_parse_ip_proto(*argv, eth_type, TCA_FLOWER_KEY_IP_PROTO, &ip_proto, n); if (ret < 0) { @@ -1658,9 +1661,7 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, } } else if (matches(*argv, "dst_ip") == 0) { NEXT_ARG(); - ret = flower_parse_ip_addr(*argv, cvlan_ethtype ? - cvlan_ethtype : vlan_ethtype ? - vlan_ethtype : eth_type, + ret = flower_parse_ip_addr(*argv, eth_type, TCA_FLOWER_KEY_IPV4_DST, TCA_FLOWER_KEY_IPV4_DST_MASK, TCA_FLOWER_KEY_IPV6_DST, @@ -1672,9 +1673,7 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, } } else if (matches(*argv, "src_ip") == 0) { NEXT_ARG(); - ret = flower_parse_ip_addr(*argv, cvlan_ethtype ? - cvlan_ethtype : vlan_ethtype ? - vlan_ethtype : eth_type, + ret = flower_parse_ip_addr(*argv, eth_type, TCA_FLOWER_KEY_IPV4_SRC, TCA_FLOWER_KEY_IPV4_SRC_MASK, TCA_FLOWER_KEY_IPV6_SRC, @@ -1728,33 +1727,30 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, } } else if (matches(*argv, "arp_tip") == 0) { NEXT_ARG(); - ret = flower_parse_arp_ip_addr(*argv, vlan_ethtype ? - vlan_ethtype : eth_type, - TCA_FLOWER_KEY_ARP_TIP, - TCA_FLOWER_KEY_ARP_TIP_MASK, - n); + ret = flower_parse_arp_ip_addr(*argv, eth_type, + TCA_FLOWER_KEY_ARP_TIP, + TCA_FLOWER_KEY_ARP_TIP_MASK, + n); if (ret < 0) { fprintf(stderr, "Illegal \"arp_tip\"\n"); return -1; } } else if (matches(*argv, "arp_sip") == 0) { NEXT_ARG(); - ret = flower_parse_arp_ip_addr(*argv, vlan_ethtype ? - vlan_ethtype : eth_type, - TCA_FLOWER_KEY_ARP_SIP, - TCA_FLOWER_KEY_ARP_SIP_MASK, - n); + ret = flower_parse_arp_ip_addr(*argv, eth_type, + TCA_FLOWER_KEY_ARP_SIP, + TCA_FLOWER_KEY_ARP_SIP_MASK, + n); if (ret < 0) { fprintf(stderr, "Illegal \"arp_sip\"\n"); return -1; } } else if (matches(*argv, "arp_op") == 0) { NEXT_ARG(); - ret = flower_parse_arp_op(*argv, vlan_ethtype ? - vlan_ethtype : eth_type, - TCA_FLOWER_KEY_ARP_OP, - TCA_FLOWER_KEY_ARP_OP_MASK, - n); + ret = flower_parse_arp_op(*argv, eth_type, + TCA_FLOWER_KEY_ARP_OP, + TCA_FLOWER_KEY_ARP_OP_MASK, + n); if (ret < 0) { fprintf(stderr, "Illegal \"arp_op\"\n"); return -1; @@ -1894,8 +1890,8 @@ parse_done: return ret; } - if (eth_type != htons(ETH_P_ALL)) { - ret = addattr16(n, MAX_MSG, TCA_FLOWER_KEY_ETH_TYPE, eth_type); + if (tc_proto != htons(ETH_P_ALL)) { + ret = addattr16(n, MAX_MSG, TCA_FLOWER_KEY_ETH_TYPE, tc_proto); if (ret) return ret; } @@ -2476,7 +2472,7 @@ static void flower_print_u32(const char *name, struct rtattr *attr) print_uint(PRINT_ANY, name, namefrm, rta_getattr_u32(attr)); } -static void flower_print_mpls_opt_lse(const char *name, struct rtattr *lse) +static void flower_print_mpls_opt_lse(struct rtattr *lse) { struct rtattr *tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1]; struct rtattr *attr; @@ -2493,7 +2489,8 @@ static void flower_print_mpls_opt_lse(const char *name, struct rtattr *lse) RTA_PAYLOAD(lse)); print_nl(); - open_json_array(PRINT_ANY, name); + print_string(PRINT_FP, NULL, " lse", NULL); + open_json_object(NULL); attr = tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]; if (attr) print_hhu(PRINT_ANY, "depth", " depth %u", @@ -2511,10 +2508,10 @@ static void flower_print_mpls_opt_lse(const char *name, struct rtattr *lse) attr = tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]; if (attr) print_hhu(PRINT_ANY, "ttl", " ttl %u", rta_getattr_u8(attr)); - close_json_array(PRINT_JSON, NULL); + close_json_object(); } -static void flower_print_mpls_opts(const char *name, struct rtattr *attr) +static void flower_print_mpls_opts(struct rtattr *attr) { struct rtattr *lse; int rem; @@ -2523,11 +2520,12 @@ static void flower_print_mpls_opts(const char *name, struct rtattr *attr) return; print_nl(); - open_json_array(PRINT_ANY, name); + print_string(PRINT_FP, NULL, " mpls", NULL); + open_json_array(PRINT_JSON, "mpls"); rem = RTA_PAYLOAD(attr); lse = RTA_DATA(attr); while (RTA_OK(lse, rem)) { - flower_print_mpls_opt_lse(" lse", lse); + flower_print_mpls_opt_lse(lse); lse = RTA_NEXT(lse, rem); }; if (rem) @@ -2650,7 +2648,7 @@ static int flower_print_opt(struct filter_util *qu, FILE *f, flower_print_ip_attr("ip_ttl", tb[TCA_FLOWER_KEY_IP_TTL], tb[TCA_FLOWER_KEY_IP_TTL_MASK]); - flower_print_mpls_opts(" mpls", tb[TCA_FLOWER_KEY_MPLS_OPTS]); + flower_print_mpls_opts(tb[TCA_FLOWER_KEY_MPLS_OPTS]); flower_print_u32("mpls_label", tb[TCA_FLOWER_KEY_MPLS_LABEL]); flower_print_u8("mpls_tc", tb[TCA_FLOWER_KEY_MPLS_TC]); flower_print_u8("mpls_bos", tb[TCA_FLOWER_KEY_MPLS_BOS]); diff --git a/tc/f_u32.c b/tc/f_u32.c index e0a322d..2ed5254 100644 --- a/tc/f_u32.c +++ b/tc/f_u32.c @@ -1110,7 +1110,7 @@ static int u32_parse_opt(struct filter_util *qu, char *handle, } NEXT_ARG(); } - hash = sel2.sel.keys[0].val & sel2.sel.keys[0].mask; + hash = sel2.keys[0].val & sel2.keys[0].mask; hash ^= hash >> 16; hash ^= hash >> 8; htid = ((hash % divisor) << 12) | (htid & 0xFFF00000); diff --git a/tc/m_action.c b/tc/m_action.c index 66e6724..b16882a 100644 --- a/tc/m_action.c +++ b/tc/m_action.c @@ -374,6 +374,11 @@ static int tc_print_one_action(FILE *f, struct rtattr *arg) if (err < 0) return err; + if (brief && tb[TCA_ACT_INDEX]) { + print_uint(PRINT_ANY, "index", "\t index %u", + rta_getattr_u32(tb[TCA_ACT_INDEX])); + print_nl(); + } if (show_stats && tb[TCA_ACT_STATS]) { print_string(PRINT_FP, NULL, "\tAction statistics:", NULL); print_nl(); @@ -735,8 +740,12 @@ static int tc_act_list_or_flush(int *argc_p, char ***argv_p, int event) addattr_nest_end(&req.n, tail); tail3 = NLMSG_TAIL(&req.n); - flag_select.value |= TCA_FLAG_LARGE_DUMP_ON; - flag_select.selector |= TCA_FLAG_LARGE_DUMP_ON; + flag_select.value |= TCA_ACT_FLAG_LARGE_DUMP_ON; + flag_select.selector |= TCA_ACT_FLAG_LARGE_DUMP_ON; + if (brief) { + flag_select.value |= TCA_ACT_FLAG_TERSE_DUMP; + flag_select.selector |= TCA_ACT_FLAG_TERSE_DUMP; + } addattr_l(&req.n, MAX_MSG, TCA_ROOT_FLAGS, &flag_select, sizeof(struct nla_bitfield32)); tail3->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail3; diff --git a/tc/m_bpf.c b/tc/m_bpf.c index e8d704b..af5ba5c 100644 --- a/tc/m_bpf.c +++ b/tc/m_bpf.c @@ -161,8 +161,9 @@ static int bpf_print_opt(struct action_util *au, FILE *f, struct rtattr *arg) struct tc_act_bpf *parm; int d_ok = 0; + print_string(PRINT_ANY, "kind", "%s ", "bpf"); if (arg == NULL) - return -1; + return 0; parse_rtattr_nested(tb, TCA_ACT_BPF_MAX, arg); @@ -172,7 +173,6 @@ static int bpf_print_opt(struct action_util *au, FILE *f, struct rtattr *arg) } parm = RTA_DATA(tb[TCA_ACT_BPF_PARMS]); - print_string(PRINT_ANY, "kind", "%s ", "bpf"); if (tb[TCA_ACT_BPF_NAME]) print_string(PRINT_ANY, "bpf_name", "%s ", diff --git a/tc/m_connmark.c b/tc/m_connmark.c index 4b2dc4e..640bba9 100644 --- a/tc/m_connmark.c +++ b/tc/m_connmark.c @@ -110,8 +110,9 @@ static int print_connmark(struct action_util *au, FILE *f, struct rtattr *arg) struct rtattr *tb[TCA_CONNMARK_MAX + 1]; struct tc_connmark *ci; + print_string(PRINT_ANY, "kind", "%s ", "connmark"); if (arg == NULL) - return -1; + return 0; parse_rtattr_nested(tb, TCA_CONNMARK_MAX, arg); if (tb[TCA_CONNMARK_PARMS] == NULL) { @@ -121,7 +122,6 @@ static int print_connmark(struct action_util *au, FILE *f, struct rtattr *arg) ci = RTA_DATA(tb[TCA_CONNMARK_PARMS]); - print_string(PRINT_ANY, "kind", "%s ", "connmark"); print_uint(PRINT_ANY, "zone", "zone %u", ci->zone); print_action_control(f, " ", ci->action, ""); diff --git a/tc/m_csum.c b/tc/m_csum.c index afbee9c..23c5972 100644 --- a/tc/m_csum.c +++ b/tc/m_csum.c @@ -166,8 +166,9 @@ print_csum(struct action_util *au, FILE *f, struct rtattr *arg) int uflag_count = 0; + print_string(PRINT_ANY, "kind", "%s ", "csum"); if (arg == NULL) - return -1; + return 0; parse_rtattr_nested(tb, TCA_CSUM_MAX, arg); @@ -199,7 +200,6 @@ print_csum(struct action_util *au, FILE *f, struct rtattr *arg) uflag_1 = "?empty"; } - print_string(PRINT_ANY, "kind", "%s ", "csum"); snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s", uflag_1, uflag_2, uflag_3, uflag_4, uflag_5, uflag_6, uflag_7); diff --git a/tc/m_ct.c b/tc/m_ct.c index 70d186e..a02bf0c 100644 --- a/tc/m_ct.c +++ b/tc/m_ct.c @@ -443,8 +443,9 @@ static int print_ct(struct action_util *au, FILE *f, struct rtattr *arg) struct tc_ct *p; int ct_action = 0; + print_string(PRINT_ANY, "kind", "%s", "ct"); if (arg == NULL) - return -1; + return 0; parse_rtattr_nested(tb, TCA_CT_MAX, arg); if (tb[TCA_CT_PARMS] == NULL) { @@ -454,8 +455,6 @@ static int print_ct(struct action_util *au, FILE *f, struct rtattr *arg) p = RTA_DATA(tb[TCA_CT_PARMS]); - print_string(PRINT_ANY, "kind", "%s", "ct"); - if (tb[TCA_CT_ACTION]) ct_action = rta_getattr_u16(tb[TCA_CT_ACTION]); if (ct_action & TCA_CT_ACT_COMMIT) { diff --git a/tc/m_ctinfo.c b/tc/m_ctinfo.c index e5c1b43..996a362 100644 --- a/tc/m_ctinfo.c +++ b/tc/m_ctinfo.c @@ -188,8 +188,9 @@ static int print_ctinfo(struct action_util *au, FILE *f, struct rtattr *arg) unsigned short zone = 0; struct tc_ctinfo *ci; + print_string(PRINT_ANY, "kind", "%s ", "ctinfo"); if (arg == NULL) - return -1; + return 0; parse_rtattr_nested(tb, TCA_CTINFO_MAX, arg); if (!tb[TCA_CTINFO_ACT]) { @@ -234,7 +235,6 @@ static int print_ctinfo(struct action_util *au, FILE *f, struct rtattr *arg) sizeof(__u16)) zone = rta_getattr_u16(tb[TCA_CTINFO_ZONE]); - print_string(PRINT_ANY, "kind", "%s ", "ctinfo"); print_hu(PRINT_ANY, "zone", "zone %u", zone); print_action_control(f, " ", ci->action, ""); diff --git a/tc/m_gact.c b/tc/m_gact.c index 33f326f..2ef52cd 100644 --- a/tc/m_gact.c +++ b/tc/m_gact.c @@ -171,8 +171,9 @@ print_gact(struct action_util *au, FILE *f, struct rtattr *arg) struct tc_gact *p = NULL; struct rtattr *tb[TCA_GACT_MAX + 1]; + print_string(PRINT_ANY, "kind", "%s ", "gact"); if (arg == NULL) - return -1; + return 0; parse_rtattr_nested(tb, TCA_GACT_MAX, arg); @@ -182,7 +183,6 @@ print_gact(struct action_util *au, FILE *f, struct rtattr *arg) } p = RTA_DATA(tb[TCA_GACT_PARMS]); - print_string(PRINT_ANY, "kind", "%s ", "gact"); print_action_control(f, "action ", p->action, ""); #ifdef CONFIG_GACT_PROB if (tb[TCA_GACT_PROB] != NULL) { diff --git a/tc/m_gate.c b/tc/m_gate.c index 327df7e..c091ae1 100644 --- a/tc/m_gate.c +++ b/tc/m_gate.c @@ -261,7 +261,7 @@ static int parse_gate(struct action_util *a, int *argc_p, char ***argv_p, if (!NEXT_ARG_OK()) { explain_entry_format(); - fprintf(stderr, "\"sched-entry\" is imcomplete\n"); + fprintf(stderr, "\"sched-entry\" is incomplete\n"); free_entries(&gate_entries); return -1; } @@ -270,14 +270,14 @@ static int parse_gate(struct action_util *a, int *argc_p, char ***argv_p, if (get_gate_state(&gate_state, *argv)) { explain_entry_format(); - fprintf(stderr, "\"sched-entry\" is imcomplete\n"); + fprintf(stderr, "\"sched-entry\" is incomplete\n"); free_entries(&gate_entries); return -1; } if (!NEXT_ARG_OK()) { explain_entry_format(); - fprintf(stderr, "\"sched-entry\" is imcomplete\n"); + fprintf(stderr, "\"sched-entry\" is incomplete\n"); free_entries(&gate_entries); return -1; } @@ -287,7 +287,7 @@ static int parse_gate(struct action_util *a, int *argc_p, char ***argv_p, if (get_u32(&interval, *argv, 0) && get_time64(&interval_s64, *argv)) { explain_entry_format(); - fprintf(stderr, "\"sched-entry\" is imcomplete\n"); + fprintf(stderr, "\"sched-entry\" is incomplete\n"); free_entries(&gate_entries); return -1; } @@ -427,7 +427,7 @@ static int print_gate_list(struct rtattr *list) __u32 index = 0, interval = 0; __u8 gate_state = 0; __s32 ipv = -1, maxoctets = -1; - char buf[22]; + SPRINT_BUF(buf); parse_rtattr_nested(tb, TCA_GATE_ENTRY_MAX, item); @@ -465,10 +465,8 @@ static int print_gate_list(struct rtattr *list) } if (maxoctets != -1) { - memset(buf, 0, sizeof(buf)); - print_uint(PRINT_JSON, "max_octets", NULL, maxoctets); - print_string(PRINT_FP, NULL, "\t max-octets %s", - sprint_size(maxoctets, buf)); + print_size(PRINT_ANY, "max_octets", "\t max-octets %s", + maxoctets); } else { print_string(PRINT_FP, NULL, "\t max-octets %s", "wildcard"); @@ -492,7 +490,7 @@ static int print_gate(struct action_util *au, FILE *f, struct rtattr *arg) __s64 base_time = 0; __s64 cycle_time = 0; __s64 cycle_time_ext = 0; - char buf[22]; + SPRINT_BUF(buf); int prio = -1; if (arg == NULL) diff --git a/tc/m_ife.c b/tc/m_ife.c index 6a85e08..70ab1d7 100644 --- a/tc/m_ife.c +++ b/tc/m_ife.c @@ -227,8 +227,9 @@ static int print_ife(struct action_util *au, FILE *f, struct rtattr *arg) int has_optional = 0; SPRINT_BUF(b2); + print_string(PRINT_ANY, "kind", "%s ", "ife"); if (arg == NULL) - return -1; + return 0; parse_rtattr_nested(tb, TCA_IFE_MAX, arg); @@ -238,7 +239,6 @@ static int print_ife(struct action_util *au, FILE *f, struct rtattr *arg) } p = RTA_DATA(tb[TCA_IFE_PARMS]); - print_string(PRINT_ANY, "kind", "%s ", "ife"); print_string(PRINT_ANY, "mode", "%s ", p->flags & IFE_ENCODE ? "encode" : "decode"); print_action_control(f, "action ", p->action, " "); diff --git a/tc/m_ipt.c b/tc/m_ipt.c index cc95eab..046b310 100644 --- a/tc/m_ipt.c +++ b/tc/m_ipt.c @@ -433,7 +433,7 @@ print_ipt(struct action_util *au, FILE * f, struct rtattr *arg) __u32 hook; if (arg == NULL) - return -1; + return 0; lib_dir = getenv("IPTABLES_LIB_DIR"); if (!lib_dir) diff --git a/tc/m_mirred.c b/tc/m_mirred.c index d2bdf40..38d8043 100644 --- a/tc/m_mirred.c +++ b/tc/m_mirred.c @@ -281,8 +281,9 @@ print_mirred(struct action_util *au, FILE *f, struct rtattr *arg) struct rtattr *tb[TCA_MIRRED_MAX + 1]; const char *dev; + print_string(PRINT_ANY, "kind", "%s ", "mirred"); if (arg == NULL) - return -1; + return 0; parse_rtattr_nested(tb, TCA_MIRRED_MAX, arg); @@ -298,7 +299,6 @@ print_mirred(struct action_util *au, FILE *f, struct rtattr *arg) return -1; } - print_string(PRINT_ANY, "kind", "%s ", "mirred"); print_string(PRINT_FP, NULL, "(%s", mirred_n2a(p->eaction)); print_string(PRINT_JSON, "mirred_action", NULL, mirred_action(p->eaction)); diff --git a/tc/m_mpls.c b/tc/m_mpls.c index 3d5d9b2..9b39d85 100644 --- a/tc/m_mpls.c +++ b/tc/m_mpls.c @@ -17,17 +17,21 @@ static const char * const action_names[] = { [TCA_MPLS_ACT_PUSH] = "push", [TCA_MPLS_ACT_MODIFY] = "modify", [TCA_MPLS_ACT_DEC_TTL] = "dec_ttl", + [TCA_MPLS_ACT_MAC_PUSH] = "mac_push", }; static void explain(void) { fprintf(stderr, - "Usage: mpls pop [ protocol MPLS_PROTO ]\n" + "Usage: mpls pop [ protocol MPLS_PROTO ] [CONTROL]\n" " mpls push [ protocol MPLS_PROTO ] [ label MPLS_LABEL ] [ tc MPLS_TC ]\n" " [ ttl MPLS_TTL ] [ bos MPLS_BOS ] [CONTROL]\n" - " mpls modify [ label MPLS_LABEL ] [ tc MPLS_TC ] [ ttl MPLS_TTL ] [CONTROL]\n" - " for pop MPLS_PROTO is next header of packet - e.g. ip or mpls_uc\n" - " for push MPLS_PROTO is one of mpls_uc or mpls_mc\n" + " mpls mac_push [ protocol MPLS_PROTO ] [ label MPLS_LABEL ] [ tc MPLS_TC ]\n" + " [ ttl MPLS_TTL ] [ bos MPLS_BOS ] [CONTROL]\n" + " mpls modify [ label MPLS_LABEL ] [ tc MPLS_TC ] [ ttl MPLS_TTL ]\n" + " [ bos MPLS_BOS ] [CONTROL]\n" + " for pop, MPLS_PROTO is next header of packet - e.g. ip or mpls_uc\n" + " for push and mac_push, MPLS_PROTO is one of mpls_uc or mpls_mc\n" " with default: mpls_uc\n" " CONTROL := reclassify | pipe | drop | continue | pass |\n" " goto chain \n"); @@ -41,12 +45,14 @@ static void usage(void) static bool can_modify_mpls_fields(unsigned int action) { - return action == TCA_MPLS_ACT_PUSH || action == TCA_MPLS_ACT_MODIFY; + return action == TCA_MPLS_ACT_PUSH || action == TCA_MPLS_ACT_MAC_PUSH || + action == TCA_MPLS_ACT_MODIFY; } -static bool can_modify_ethtype(unsigned int action) +static bool can_set_ethtype(unsigned int action) { - return action == TCA_MPLS_ACT_PUSH || action == TCA_MPLS_ACT_POP; + return action == TCA_MPLS_ACT_PUSH || action == TCA_MPLS_ACT_MAC_PUSH || + action == TCA_MPLS_ACT_POP; } static bool is_valid_label(__u32 label) @@ -98,37 +104,46 @@ static int parse_mpls(struct action_util *a, int *argc_p, char ***argv_p, if (check_double_action(action, *argv)) return -1; action = TCA_MPLS_ACT_MODIFY; + } else if (matches(*argv, "mac_push") == 0) { + if (check_double_action(action, *argv)) + return -1; + action = TCA_MPLS_ACT_MAC_PUSH; } else if (matches(*argv, "dec_ttl") == 0) { if (check_double_action(action, *argv)) return -1; action = TCA_MPLS_ACT_DEC_TTL; } else if (matches(*argv, "label") == 0) { if (!can_modify_mpls_fields(action)) - invarg("only valid for push/modify", *argv); + invarg("only valid for push, mac_push and modify", + *argv); NEXT_ARG(); if (get_u32(&label, *argv, 0) || !is_valid_label(label)) invarg("label must be <=0xFFFFF", *argv); } else if (matches(*argv, "tc") == 0) { if (!can_modify_mpls_fields(action)) - invarg("only valid for push/modify", *argv); + invarg("only valid for push, mac_push and modify", + *argv); NEXT_ARG(); if (get_u8(&tc, *argv, 0) || (tc & ~0x7)) invarg("tc field is 3 bits max", *argv); } else if (matches(*argv, "ttl") == 0) { if (!can_modify_mpls_fields(action)) - invarg("only valid for push/modify", *argv); + invarg("only valid for push, mac_push and modify", + *argv); NEXT_ARG(); if (get_u8(&ttl, *argv, 0) || !ttl) invarg("ttl must be >0 and <=255", *argv); } else if (matches(*argv, "bos") == 0) { if (!can_modify_mpls_fields(action)) - invarg("only valid for push/modify", *argv); + invarg("only valid for push, mac_push and modify", + *argv); NEXT_ARG(); if (get_u8(&bos, *argv, 0) || (bos & ~0x1)) invarg("bos must be 0 or 1", *argv); } else if (matches(*argv, "protocol") == 0) { - if (!can_modify_ethtype(action)) - invarg("only valid for push/pop", *argv); + if (!can_set_ethtype(action)) + invarg("only valid for push, mac_push and pop", + *argv); NEXT_ARG(); if (ll_proto_a2n(&proto, *argv)) invarg("protocol is invalid", *argv); @@ -159,10 +174,12 @@ static int parse_mpls(struct action_util *a, int *argc_p, char ***argv_p, if (action == TCA_MPLS_ACT_PUSH && label == 0xffffffff) missarg("label"); - if (action == TCA_MPLS_ACT_PUSH && proto && + if ((action == TCA_MPLS_ACT_PUSH || action == TCA_MPLS_ACT_MAC_PUSH) && + proto && proto != htons(ETH_P_MPLS_UC) && proto != htons(ETH_P_MPLS_MC)) { fprintf(stderr, - "invalid push protocol \"0x%04x\" - use mpls_(uc|mc)\n", + "invalid %spush protocol \"0x%04x\" - use mpls_(uc|mc)\n", + action == TCA_MPLS_ACT_MAC_PUSH ? "mac_" : "", ntohs(proto)); return -1; } @@ -197,8 +214,9 @@ static int print_mpls(struct action_util *au, FILE *f, struct rtattr *arg) SPRINT_BUF(b1); __u32 val; + print_string(PRINT_ANY, "kind", "%s ", "mpls"); if (!arg) - return -1; + return 0; parse_rtattr_nested(tb, TCA_MPLS_MAX, arg); @@ -208,7 +226,6 @@ static int print_mpls(struct action_util *au, FILE *f, struct rtattr *arg) } parm = RTA_DATA(tb[TCA_MPLS_PARMS]); - print_string(PRINT_ANY, "kind", "%s ", "mpls"); print_string(PRINT_ANY, "mpls_action", " %s", action_names[parm->m_action]); @@ -223,6 +240,7 @@ static int print_mpls(struct action_util *au, FILE *f, struct rtattr *arg) } break; case TCA_MPLS_ACT_PUSH: + case TCA_MPLS_ACT_MAC_PUSH: if (tb[TCA_MPLS_PROTO]) { __u16 proto; diff --git a/tc/m_nat.c b/tc/m_nat.c index 56e8f47..654f9a3 100644 --- a/tc/m_nat.c +++ b/tc/m_nat.c @@ -146,8 +146,9 @@ print_nat(struct action_util *au, FILE * f, struct rtattr *arg) SPRINT_BUF(buf2); int len; + print_string(PRINT_ANY, "type", " %s ", "nat"); if (arg == NULL) - return -1; + return 0; parse_rtattr_nested(tb, TCA_NAT_MAX, arg); @@ -160,7 +161,6 @@ print_nat(struct action_util *au, FILE * f, struct rtattr *arg) len = ffs(sel->mask); len = len ? 33 - len : 0; - print_string(PRINT_ANY, "type", " %s ", "nat"); print_string(PRINT_ANY, "direction", "%s", sel->flags & TCA_NAT_FLAG_EGRESS ? "egress" : "ingress"); diff --git a/tc/m_pedit.c b/tc/m_pedit.c index 51dcf10..74c91e8 100644 --- a/tc/m_pedit.c +++ b/tc/m_pedit.c @@ -745,8 +745,9 @@ static int print_pedit(struct action_util *au, FILE *f, struct rtattr *arg) struct m_pedit_key_ex *keys_ex = NULL; int err; + print_string(PRINT_ANY, "kind", " %s ", "pedit"); if (arg == NULL) - return -1; + return 0; parse_rtattr_nested(tb, TCA_PEDIT_MAX, arg); @@ -783,7 +784,6 @@ static int print_pedit(struct action_util *au, FILE *f, struct rtattr *arg) } } - print_string(PRINT_ANY, "kind", " %s ", "pedit"); print_action_control(f, "action ", sel->action, " "); print_uint(PRINT_ANY, "nkeys", "keys %d\n", sel->nkeys); print_uint(PRINT_ANY, "index", " \t index %u", sel->index); @@ -819,8 +819,10 @@ static int print_pedit(struct action_util *au, FILE *f, struct rtattr *arg) print_uint(PRINT_FP, NULL, "\n\t key #%d at ", i); err = print_pedit_location(f, htype, key->off); - if (err) + if (err) { + free(keys_ex); return err; + } /* In FP, report the "set" command as "val" to keep * backward compatibility. Report the true name in JSON. diff --git a/tc/m_police.c b/tc/m_police.c index 83b25db..bb51df6 100644 --- a/tc/m_police.c +++ b/tc/m_police.c @@ -238,7 +238,6 @@ int parse_police(int *argc_p, char ***argv_p, int tca_id, struct nlmsghdr *n) static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) { - SPRINT_BUF(b1); SPRINT_BUF(b2); struct tc_police *p; struct rtattr *tb[TCA_POLICE_MAX+1]; @@ -269,10 +268,10 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) rate64 = rta_getattr_u64(tb[TCA_POLICE_RATE64]); fprintf(f, " police 0x%x ", p->index); - fprintf(f, "rate %s ", sprint_rate(rate64, b1)); + tc_print_rate(PRINT_FP, NULL, "rate %s ", rate64); buffer = tc_calc_xmitsize(rate64, p->burst); - fprintf(f, "burst %s ", sprint_size(buffer, b1)); - fprintf(f, "mtu %s ", sprint_size(p->mtu, b1)); + print_size(PRINT_FP, NULL, "burst %s ", buffer); + print_size(PRINT_FP, NULL, "mtu %s ", p->mtu); if (show_raw) fprintf(f, "[%08x] ", p->burst); @@ -282,12 +281,11 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) prate64 = rta_getattr_u64(tb[TCA_POLICE_PEAKRATE64]); if (prate64) - fprintf(f, "peakrate %s ", sprint_rate(prate64, b1)); + tc_print_rate(PRINT_FP, NULL, "peakrate %s ", prate64); if (tb[TCA_POLICE_AVRATE]) - fprintf(f, "avrate %s ", - sprint_rate(rta_getattr_u32(tb[TCA_POLICE_AVRATE]), - b1)); + tc_print_rate(PRINT_FP, NULL, "avrate %s ", + rta_getattr_u32(tb[TCA_POLICE_AVRATE])); print_action_control(f, "action ", p->action, ""); diff --git a/tc/m_sample.c b/tc/m_sample.c index 4a30513..696d760 100644 --- a/tc/m_sample.c +++ b/tc/m_sample.c @@ -143,8 +143,9 @@ static int print_sample(struct action_util *au, FILE *f, struct rtattr *arg) struct rtattr *tb[TCA_SAMPLE_MAX + 1]; struct tc_sample *p; + print_string(PRINT_ANY, "kind", "%s ", "sample"); if (arg == NULL) - return -1; + return 0; parse_rtattr_nested(tb, TCA_SAMPLE_MAX, arg); @@ -155,7 +156,6 @@ static int print_sample(struct action_util *au, FILE *f, struct rtattr *arg) } p = RTA_DATA(tb[TCA_SAMPLE_PARMS]); - print_string(PRINT_ANY, "kind", "%s ", "sample"); print_uint(PRINT_ANY, "rate", "rate 1/%u ", rta_getattr_u32(tb[TCA_SAMPLE_RATE])); print_uint(PRINT_ANY, "group", "group %u", diff --git a/tc/m_simple.c b/tc/m_simple.c index 70897d6..bc86be2 100644 --- a/tc/m_simple.c +++ b/tc/m_simple.c @@ -166,7 +166,7 @@ static int print_simple(struct action_util *au, FILE *f, struct rtattr *arg) char *simpdata; if (arg == NULL) - return -1; + return 0; parse_rtattr_nested(tb, TCA_DEF_MAX, arg); diff --git a/tc/m_skbedit.c b/tc/m_skbedit.c index 9afe2f0..46d92b2 100644 --- a/tc/m_skbedit.c +++ b/tc/m_skbedit.c @@ -198,8 +198,9 @@ static int print_skbedit(struct action_util *au, FILE *f, struct rtattr *arg) __u16 ptype; struct tc_skbedit *p; + print_string(PRINT_ANY, "kind", "%s ", "skbedit"); if (arg == NULL) - return -1; + return 0; parse_rtattr_nested(tb, TCA_SKBEDIT_MAX, arg); @@ -209,8 +210,6 @@ static int print_skbedit(struct action_util *au, FILE *f, struct rtattr *arg) } p = RTA_DATA(tb[TCA_SKBEDIT_PARMS]); - print_string(PRINT_ANY, "kind", "%s ", "skbedit"); - if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) { print_uint(PRINT_ANY, "queue_mapping", "queue_mapping %u", rta_getattr_u16(tb[TCA_SKBEDIT_QUEUE_MAPPING])); diff --git a/tc/m_skbmod.c b/tc/m_skbmod.c index d38a5c1..e13d3f1 100644 --- a/tc/m_skbmod.c +++ b/tc/m_skbmod.c @@ -169,7 +169,7 @@ static int print_skbmod(struct action_util *au, FILE *f, struct rtattr *arg) SPRINT_BUF(b2); if (arg == NULL) - return -1; + return 0; parse_rtattr_nested(tb, TCA_SKBMOD_MAX, arg); diff --git a/tc/m_tunnel_key.c b/tc/m_tunnel_key.c index bfec907..ca0dff1 100644 --- a/tc/m_tunnel_key.c +++ b/tc/m_tunnel_key.c @@ -670,8 +670,9 @@ static int print_tunnel_key(struct action_util *au, FILE *f, struct rtattr *arg) struct rtattr *tb[TCA_TUNNEL_KEY_MAX + 1]; struct tc_tunnel_key *parm; + print_string(PRINT_ANY, "kind", "%s ", "tunnel_key"); if (!arg) - return -1; + return 0; parse_rtattr_nested(tb, TCA_TUNNEL_KEY_MAX, arg); @@ -681,8 +682,6 @@ static int print_tunnel_key(struct action_util *au, FILE *f, struct rtattr *arg) } parm = RTA_DATA(tb[TCA_TUNNEL_KEY_PARMS]); - print_string(PRINT_ANY, "kind", "%s ", "tunnel_key"); - switch (parm->t_action) { case TCA_TUNNEL_KEY_ACT_RELEASE: print_string(PRINT_ANY, "mode", " %s", "unset"); diff --git a/tc/m_vlan.c b/tc/m_vlan.c index 1096ba0..221083d 100644 --- a/tc/m_vlan.c +++ b/tc/m_vlan.c @@ -23,14 +23,18 @@ static const char * const action_names[] = { [TCA_VLAN_ACT_POP] = "pop", [TCA_VLAN_ACT_PUSH] = "push", [TCA_VLAN_ACT_MODIFY] = "modify", + [TCA_VLAN_ACT_POP_ETH] = "pop_eth", + [TCA_VLAN_ACT_PUSH_ETH] = "push_eth", }; static void explain(void) { fprintf(stderr, - "Usage: vlan pop\n" + "Usage: vlan pop [CONTROL]\n" " vlan push [ protocol VLANPROTO ] id VLANID [ priority VLANPRIO ] [CONTROL]\n" " vlan modify [ protocol VLANPROTO ] id VLANID [ priority VLANPRIO ] [CONTROL]\n" + " vlan pop_eth [CONTROL]\n" + " vlan push_eth dst_mac LLADDR src_mac LLADDR [CONTROL]\n" " VLANPROTO is one of 802.1Q or 802.1AD\n" " with default: 802.1Q\n" " CONTROL := reclassify | pipe | drop | continue | pass |\n" @@ -63,6 +67,10 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p, char **argv = *argv_p; struct rtattr *tail; int action = 0; + char dst_mac[ETH_ALEN] = {}; + int dst_mac_set = 0; + char src_mac[ETH_ALEN] = {}; + int src_mac_set = 0; __u16 id; int id_set = 0; __u16 proto; @@ -95,6 +103,18 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p, return -1; } action = TCA_VLAN_ACT_MODIFY; + } else if (matches(*argv, "pop_eth") == 0) { + if (action) { + unexpected(*argv); + return -1; + } + action = TCA_VLAN_ACT_POP_ETH; + } else if (matches(*argv, "push_eth") == 0) { + if (action) { + unexpected(*argv); + return -1; + } + action = TCA_VLAN_ACT_PUSH_ETH; } else if (matches(*argv, "id") == 0) { if (!has_push_attribs(action)) invarg("only valid for push/modify", *argv); @@ -119,6 +139,22 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p, if (get_u8(&prio, *argv, 0) || (prio & ~0x7)) invarg("prio is invalid", *argv); prio_set = 1; + } else if (matches(*argv, "dst_mac") == 0) { + if (action != TCA_VLAN_ACT_PUSH_ETH) + invarg("only valid for push_eth", *argv); + + NEXT_ARG(); + if (ll_addr_a2n(dst_mac, sizeof(dst_mac), *argv) < 0) + invarg("dst_mac is invalid", *argv); + dst_mac_set = 1; + } else if (matches(*argv, "src_mac") == 0) { + if (action != TCA_VLAN_ACT_PUSH_ETH) + invarg("only valid for push_eth", *argv); + + NEXT_ARG(); + if (ll_addr_a2n(src_mac, sizeof(src_mac), *argv) < 0) + invarg("src_mac is invalid", *argv); + src_mac_set = 1; } else if (matches(*argv, "help") == 0) { usage(); } else { @@ -150,6 +186,20 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p, return -1; } + if (action == TCA_VLAN_ACT_PUSH_ETH) { + if (!dst_mac_set) { + fprintf(stderr, "dst_mac needs to be set for %s\n", + action_names[action]); + explain(); + return -1; + } else if (!src_mac_set) { + fprintf(stderr, "src_mac needs to be set for %s\n", + action_names[action]); + explain(); + return -1; + } + } + parm.v_action = action; tail = addattr_nest(n, MAX_MSG, tca_id); addattr_l(n, MAX_MSG, TCA_VLAN_PARMS, &parm, sizeof(parm)); @@ -167,6 +217,12 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p, } if (prio_set) addattr8(n, MAX_MSG, TCA_VLAN_PUSH_VLAN_PRIORITY, prio); + if (dst_mac_set) + addattr_l(n, MAX_MSG, TCA_VLAN_PUSH_ETH_DST, dst_mac, + sizeof(dst_mac)); + if (src_mac_set) + addattr_l(n, MAX_MSG, TCA_VLAN_PUSH_ETH_SRC, src_mac, + sizeof(src_mac)); addattr_nest_end(n, tail); @@ -182,18 +238,18 @@ static int print_vlan(struct action_util *au, FILE *f, struct rtattr *arg) __u16 val; struct tc_vlan *parm; + print_string(PRINT_ANY, "kind", "%s ", "vlan"); if (arg == NULL) - return -1; + return 0; parse_rtattr_nested(tb, TCA_VLAN_MAX, arg); if (!tb[TCA_VLAN_PARMS]) { - fprintf(stderr, "Missing vlanparameters\n"); + fprintf(stderr, "Missing vlan parameters\n"); return -1; } parm = RTA_DATA(tb[TCA_VLAN_PARMS]); - print_string(PRINT_ANY, "kind", "%s ", "vlan"); print_string(PRINT_ANY, "vlan_action", " %s", action_names[parm->v_action]); @@ -216,6 +272,19 @@ static int print_vlan(struct action_util *au, FILE *f, struct rtattr *arg) print_uint(PRINT_ANY, "priority", " priority %u", val); } break; + case TCA_VLAN_ACT_PUSH_ETH: + if (tb[TCA_VLAN_PUSH_ETH_DST] && + RTA_PAYLOAD(tb[TCA_VLAN_PUSH_ETH_DST]) == ETH_ALEN) { + ll_addr_n2a(RTA_DATA(tb[TCA_VLAN_PUSH_ETH_DST]), + ETH_ALEN, 0, b1, sizeof(b1)); + print_string(PRINT_ANY, "dst_mac", " dst_mac %s", b1); + } + if (tb[TCA_VLAN_PUSH_ETH_SRC && + RTA_PAYLOAD(tb[TCA_VLAN_PUSH_ETH_SRC]) == ETH_ALEN]) { + ll_addr_n2a(RTA_DATA(tb[TCA_VLAN_PUSH_ETH_SRC]), + ETH_ALEN, 0, b1, sizeof(b1)); + print_string(PRINT_ANY, "src_mac", " src_mac %s", b1); + } } print_action_control(f, " ", parm->action, ""); diff --git a/tc/m_xt.c b/tc/m_xt.c index 487ba25..deaf96a 100644 --- a/tc/m_xt.c +++ b/tc/m_xt.c @@ -320,7 +320,7 @@ print_ipt(struct action_util *au, FILE *f, struct rtattr *arg) __u32 hook; if (arg == NULL) - return -1; + return 0; /* copy tcipt_globals because .opts will be modified by iptables */ struct xtables_globals tmp_tcipt_globals = tcipt_globals; diff --git a/tc/m_xt_old.c b/tc/m_xt_old.c index 6a4509a..db01489 100644 --- a/tc/m_xt_old.c +++ b/tc/m_xt_old.c @@ -358,7 +358,7 @@ print_ipt(struct action_util *au, FILE * f, struct rtattr *arg) __u32 hook; if (arg == NULL) - return -1; + return 0; set_lib_dir(); diff --git a/tc/p_ip6.c b/tc/p_ip6.c index 71660c6..83a6ae8 100644 --- a/tc/p_ip6.c +++ b/tc/p_ip6.c @@ -82,7 +82,7 @@ parse_ip6(int *argc_p, char ***argv_p, /* Shift the field by 4 bits on success. */ if (!res) { int nkeys = sel->sel.nkeys; - struct tc_pedit_key *key = &sel->sel.keys[nkeys - 1]; + struct tc_pedit_key *key = &sel->keys[nkeys - 1]; key->mask = htonl(ntohl(key->mask) << 4 | 0xf); key->val = htonl(ntohl(key->val) << 4); diff --git a/tc/q_cake.c b/tc/q_cake.c index bf116e8..4cfc1c0 100644 --- a/tc/q_cake.c +++ b/tc/q_cake.c @@ -299,8 +299,7 @@ static int cake_parse_opt(struct qdisc_util *qu, int argc, char **argv, NEXT_ARG(); overhead = strtol(*argv, &p, 10); - if (!p || *p || !*argv || - overhead < -64 || overhead > 256) { + if (!p || *p || overhead < -64 || overhead > 256) { fprintf(stderr, "Illegal \"overhead\", valid range is -64 to 256\\n"); return -1; @@ -312,7 +311,7 @@ static int cake_parse_opt(struct qdisc_util *qu, int argc, char **argv, NEXT_ARG(); mpu = strtol(*argv, &p, 10); - if (!p || *p || !*argv || mpu < 0 || mpu > 256) { + if (!p || *p || mpu < 0 || mpu > 256) { fprintf(stderr, "Illegal \"mpu\", valid range is 0 to 256\\n"); return -1; @@ -434,7 +433,6 @@ static int cake_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) int atm = 0; int nat = 0; - SPRINT_BUF(b1); SPRINT_BUF(b2); if (opt == NULL) @@ -445,11 +443,10 @@ static int cake_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (tb[TCA_CAKE_BASE_RATE64] && RTA_PAYLOAD(tb[TCA_CAKE_BASE_RATE64]) >= sizeof(bandwidth)) { bandwidth = rta_getattr_u64(tb[TCA_CAKE_BASE_RATE64]); - if (bandwidth) { - print_uint(PRINT_JSON, "bandwidth", NULL, bandwidth); - print_string(PRINT_FP, NULL, "bandwidth %s ", - sprint_rate(bandwidth, b1)); - } else + if (bandwidth) + tc_print_rate(PRINT_ANY, "bandwidth", "bandwidth %s ", + bandwidth); + else print_string(PRINT_ANY, "bandwidth", "bandwidth %s ", "unlimited"); } @@ -574,11 +571,8 @@ static int cake_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (mpu) print_uint(PRINT_ANY, "mpu", "mpu %u ", mpu); - if (memlimit) { - print_uint(PRINT_JSON, "memlimit", NULL, memlimit); - print_string(PRINT_FP, NULL, "memlimit %s ", - sprint_size(memlimit, b1)); - } + if (memlimit) + print_size(PRINT_ANY, "memlimit", "memlimit %s ", memlimit); if (fwmark) print_uint(PRINT_FP, NULL, "fwmark 0x%x ", fwmark); @@ -638,11 +632,11 @@ static int cake_print_xstats(struct qdisc_util *qu, FILE *f, if (st[TCA_CAKE_STATS_MEMORY_USED] && st[TCA_CAKE_STATS_MEMORY_LIMIT]) { - print_string(PRINT_FP, NULL, " memory used: %s", - sprint_size(GET_STAT_U32(MEMORY_USED), b1)); + print_size(PRINT_FP, NULL, " memory used: %s", + GET_STAT_U32(MEMORY_USED)); - print_string(PRINT_FP, NULL, " of %s\n", - sprint_size(GET_STAT_U32(MEMORY_LIMIT), b1)); + print_size(PRINT_FP, NULL, " of %s\n", + GET_STAT_U32(MEMORY_LIMIT)); print_uint(PRINT_JSON, "memory_used", NULL, GET_STAT_U32(MEMORY_USED)); @@ -650,12 +644,10 @@ static int cake_print_xstats(struct qdisc_util *qu, FILE *f, GET_STAT_U32(MEMORY_LIMIT)); } - if (st[TCA_CAKE_STATS_CAPACITY_ESTIMATE64]) { - print_string(PRINT_FP, NULL, " capacity estimate: %s\n", - sprint_rate(GET_STAT_U64(CAPACITY_ESTIMATE64), b1)); - print_uint(PRINT_JSON, "capacity_estimate", NULL, - GET_STAT_U64(CAPACITY_ESTIMATE64)); - } + if (st[TCA_CAKE_STATS_CAPACITY_ESTIMATE64]) + tc_print_rate(PRINT_ANY, "capacity_estimate", + " capacity estimate: %s\n", + GET_STAT_U64(CAPACITY_ESTIMATE64)); if (st[TCA_CAKE_STATS_MIN_NETLEN] && st[TCA_CAKE_STATS_MAX_NETLEN]) { @@ -682,7 +674,7 @@ static int cake_print_xstats(struct qdisc_util *qu, FILE *f, /* class stats */ if (st[TCA_CAKE_STATS_DEFICIT]) - print_int(PRINT_ANY, "deficit", " deficit %u", + print_int(PRINT_ANY, "deficit", " deficit %d", GET_STAT_S32(DEFICIT)); if (st[TCA_CAKE_STATS_COBALT_COUNT]) print_uint(PRINT_ANY, "count", " count %u", @@ -695,7 +687,7 @@ static int cake_print_xstats(struct qdisc_util *qu, FILE *f, if (drop_next < 0) { print_string(PRINT_FP, NULL, " drop_next -%s", - sprint_time(drop_next, b1)); + sprint_time(-drop_next, b1)); } else { print_uint(PRINT_JSON, "drop_next", NULL, drop_next); @@ -790,7 +782,14 @@ static int cake_print_xstats(struct qdisc_util *qu, FILE *f, #define PRINT_TSTAT_U64(name, attr) PRINT_TSTAT( \ name, attr, "llu", rta_getattr_u64(GET_TSTAT(i, attr))) - SPRINT_TSTAT(rate, u64, " thresh ", THRESHOLD_RATE64); + if (GET_TSTAT(0, THRESHOLD_RATE64)) { + fprintf(f, " thresh "); + for (i = 0; i < num_tins; i++) + tc_print_rate(PRINT_FP, NULL, " %12s", + rta_getattr_u64(GET_TSTAT(i, THRESHOLD_RATE64))); + fprintf(f, "%s", _SL_); + } + SPRINT_TSTAT(time, u32, " target ", TARGET_US); SPRINT_TSTAT(time, u32, " interval", INTERVAL_US); SPRINT_TSTAT(time, u32, " pk_delay", PEAK_DELAY_US); diff --git a/tc/q_cbq.c b/tc/q_cbq.c index 6518ef4..4619a37 100644 --- a/tc/q_cbq.c +++ b/tc/q_cbq.c @@ -497,10 +497,7 @@ static int cbq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) } if (r) { - char buf[64]; - - print_rate(buf, sizeof(buf), r->rate); - fprintf(f, "rate %s ", buf); + tc_print_rate(PRINT_FP, NULL, "rate %s ", r->rate); linklayer = (r->linklayer & TC_LINKLAYER_MASK); if (linklayer > TC_LINKLAYER_ETHERNET || show_details) fprintf(f, "linklayer %s ", sprint_linklayer(linklayer, b2)); @@ -533,13 +530,10 @@ static int cbq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) else fprintf(f, "prio no-transmit"); if (show_details) { - char buf[64]; - fprintf(f, "/%u ", wrr->cpriority); - if (wrr->weight != 1) { - print_rate(buf, sizeof(buf), wrr->weight); - fprintf(f, "weight %s ", buf); - } + if (wrr->weight != 1) + tc_print_rate(PRINT_FP, NULL, "weight %s ", + wrr->weight); if (wrr->allot) fprintf(f, "allot %ub ", wrr->allot); } diff --git a/tc/q_drr.c b/tc/q_drr.c index f9c90f3..4e829ce 100644 --- a/tc/q_drr.c +++ b/tc/q_drr.c @@ -84,16 +84,14 @@ static int drr_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) { struct rtattr *tb[TCA_DRR_MAX + 1]; - SPRINT_BUF(b1); - if (opt == NULL) return 0; parse_rtattr_nested(tb, TCA_DRR_MAX, opt); if (tb[TCA_DRR_QUANTUM]) - fprintf(f, "quantum %s ", - sprint_size(rta_getattr_u32(tb[TCA_DRR_QUANTUM]), b1)); + print_size(PRINT_FP, NULL, "quantum %s ", + rta_getattr_u32(tb[TCA_DRR_QUANTUM])); return 0; } @@ -101,15 +99,13 @@ static int drr_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstat { struct tc_drr_stats *x; - SPRINT_BUF(b1); - if (xstats == NULL) return 0; if (RTA_PAYLOAD(xstats) < sizeof(*x)) return -1; x = RTA_DATA(xstats); - fprintf(f, " deficit %s ", sprint_size(x->deficit, b1)); + print_size(PRINT_FP, NULL, " deficit %s ", x->deficit); return 0; } diff --git a/tc/q_fifo.c b/tc/q_fifo.c index 61493fb..ce82e74 100644 --- a/tc/q_fifo.c +++ b/tc/q_fifo.c @@ -67,14 +67,10 @@ static int fifo_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (RTA_PAYLOAD(opt) < sizeof(*qopt)) return -1; qopt = RTA_DATA(opt); - if (strcmp(qu->id, "bfifo") == 0) { - SPRINT_BUF(b1); - print_uint(PRINT_JSON, "limit", NULL, qopt->limit); - print_string(PRINT_FP, NULL, "limit %s", - sprint_size(qopt->limit, b1)); - } else { + if (strcmp(qu->id, "bfifo") == 0) + print_size(PRINT_ANY, "limit", "limit %s", qopt->limit); + else print_uint(PRINT_ANY, "limit", "limit %up", qopt->limit); - } return 0; } diff --git a/tc/q_fq.c b/tc/q_fq.c index b10d01e..cff2197 100644 --- a/tc/q_fq.c +++ b/tc/q_fq.c @@ -315,47 +315,37 @@ static int fq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (tb[TCA_FQ_QUANTUM] && RTA_PAYLOAD(tb[TCA_FQ_QUANTUM]) >= sizeof(__u32)) { quantum = rta_getattr_u32(tb[TCA_FQ_QUANTUM]); - print_uint(PRINT_JSON, "quantum", NULL, quantum); - print_string(PRINT_FP, NULL, "quantum %s ", - sprint_size(quantum, b1)); + print_size(PRINT_ANY, "quantum", "quantum %s ", quantum); } if (tb[TCA_FQ_INITIAL_QUANTUM] && RTA_PAYLOAD(tb[TCA_FQ_INITIAL_QUANTUM]) >= sizeof(__u32)) { quantum = rta_getattr_u32(tb[TCA_FQ_INITIAL_QUANTUM]); - print_uint(PRINT_JSON, "initial_quantum", NULL, quantum); - print_string(PRINT_FP, NULL, "initial_quantum %s ", - sprint_size(quantum, b1)); + print_size(PRINT_ANY, "initial_quantum", "initial_quantum %s ", + quantum); } if (tb[TCA_FQ_FLOW_MAX_RATE] && RTA_PAYLOAD(tb[TCA_FQ_FLOW_MAX_RATE]) >= sizeof(__u32)) { rate = rta_getattr_u32(tb[TCA_FQ_FLOW_MAX_RATE]); - if (rate != ~0U) { - print_uint(PRINT_JSON, "maxrate", NULL, rate); - print_string(PRINT_FP, NULL, "maxrate %s ", - sprint_rate(rate, b1)); - } + if (rate != ~0U) + tc_print_rate(PRINT_ANY, + "maxrate", "maxrate %s ", rate); } if (tb[TCA_FQ_FLOW_DEFAULT_RATE] && RTA_PAYLOAD(tb[TCA_FQ_FLOW_DEFAULT_RATE]) >= sizeof(__u32)) { rate = rta_getattr_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]); - if (rate != 0) { - print_uint(PRINT_JSON, "defrate", NULL, rate); - print_string(PRINT_FP, NULL, "defrate %s ", - sprint_rate(rate, b1)); - } + if (rate != 0) + tc_print_rate(PRINT_ANY, + "defrate", "defrate %s ", rate); } if (tb[TCA_FQ_LOW_RATE_THRESHOLD] && RTA_PAYLOAD(tb[TCA_FQ_LOW_RATE_THRESHOLD]) >= sizeof(__u32)) { rate = rta_getattr_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]); - if (rate != 0) { - print_uint(PRINT_JSON, "low_rate_threshold", NULL, - rate); - print_string(PRINT_FP, NULL, "low_rate_threshold %s ", - sprint_rate(rate, b1)); - } + if (rate != 0) + tc_print_rate(PRINT_ANY, "low_rate_threshold", + "low_rate_threshold %s ", rate); } if (tb[TCA_FQ_FLOW_REFILL_DELAY] && RTA_PAYLOAD(tb[TCA_FQ_FLOW_REFILL_DELAY]) >= sizeof(__u32)) { diff --git a/tc/q_fq_codel.c b/tc/q_fq_codel.c index 1a51302..3009806 100644 --- a/tc/q_fq_codel.c +++ b/tc/q_fq_codel.c @@ -221,9 +221,8 @@ static int fq_codel_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt if (tb[TCA_FQ_CODEL_MEMORY_LIMIT] && RTA_PAYLOAD(tb[TCA_FQ_CODEL_MEMORY_LIMIT]) >= sizeof(__u32)) { memory_limit = rta_getattr_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]); - print_uint(PRINT_JSON, "memory_limit", NULL, memory_limit); - print_string(PRINT_FP, NULL, "memory_limit %s ", - sprint_size(memory_limit, b1)); + print_size(PRINT_ANY, "memory_limit", "memory_limit %s ", + memory_limit); } if (tb[TCA_FQ_CODEL_ECN] && RTA_PAYLOAD(tb[TCA_FQ_CODEL_ECN]) >= sizeof(__u32)) { diff --git a/tc/q_fq_pie.c b/tc/q_fq_pie.c index c136cd1..9cbef47 100644 --- a/tc/q_fq_pie.c +++ b/tc/q_fq_pie.c @@ -232,16 +232,13 @@ static int fq_pie_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (tb[TCA_FQ_PIE_QUANTUM] && RTA_PAYLOAD(tb[TCA_FQ_PIE_QUANTUM]) >= sizeof(__u32)) { quantum = rta_getattr_u32(tb[TCA_FQ_PIE_QUANTUM]); - print_uint(PRINT_JSON, "quantum", NULL, quantum); - print_string(PRINT_FP, NULL, "quantum %s ", - sprint_size(quantum, b1)); + print_size(PRINT_ANY, "quantum", "quantum %s ", quantum); } if (tb[TCA_FQ_PIE_MEMORY_LIMIT] && RTA_PAYLOAD(tb[TCA_FQ_PIE_MEMORY_LIMIT]) >= sizeof(__u32)) { memory_limit = rta_getattr_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]); - print_uint(PRINT_JSON, "memory_limit", NULL, memory_limit); - print_string(PRINT_FP, NULL, "memory_limit %s ", - sprint_size(memory_limit, b1)); + print_size(PRINT_ANY, "memory_limit", "memory_limit %s ", + memory_limit); } if (tb[TCA_FQ_PIE_ECN_PROB] && RTA_PAYLOAD(tb[TCA_FQ_PIE_ECN_PROB]) >= sizeof(__u32)) { diff --git a/tc/q_gred.c b/tc/q_gred.c index 8a1cecf..89aeb08 100644 --- a/tc/q_gred.c +++ b/tc/q_gred.c @@ -373,18 +373,11 @@ gred_print_stats(struct tc_gred_info *info, struct tc_gred_qopt *qopt) { __u64 bytes = info ? info->bytes : qopt->bytesin; - SPRINT_BUF(b1); - if (!is_json_context()) printf("\n Queue size: "); - print_uint(PRINT_JSON, "qave", NULL, qopt->qave); - print_string(PRINT_FP, NULL, "average %s ", - sprint_size(qopt->qave, b1)); - - print_uint(PRINT_JSON, "backlog", NULL, qopt->backlog); - print_string(PRINT_FP, NULL, "current %s ", - sprint_size(qopt->backlog, b1)); + print_size(PRINT_ANY, "qave", "average %s ", qopt->qave); + print_size(PRINT_ANY, "backlog", "current %s ", qopt->backlog); if (!is_json_context()) printf("\n Dropped packets: "); @@ -415,9 +408,7 @@ gred_print_stats(struct tc_gred_info *info, struct tc_gred_qopt *qopt) printf("\n Total packets: "); print_uint(PRINT_ANY, "packets", "%u ", qopt->packets); - - print_uint(PRINT_JSON, "bytes", NULL, bytes); - print_string(PRINT_FP, NULL, "(%s) ", sprint_size(bytes, b1)); + print_size(PRINT_ANY, "bytes", "(%s) ", bytes); } static int gred_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) @@ -431,8 +422,6 @@ static int gred_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) __u32 *limit = NULL; unsigned int i; - SPRINT_BUF(b1); - if (opt == NULL) return 0; @@ -470,11 +459,8 @@ static int gred_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) else print_bool(PRINT_ANY, "grio", NULL, false); - if (limit) { - print_uint(PRINT_JSON, "limit", NULL, *limit); - print_string(PRINT_FP, NULL, "limit %s ", - sprint_size(*limit, b1)); - } + if (limit) + print_size(PRINT_ANY, "limit", "limit %s ", *limit); tc_red_print_flags(sopt->flags); @@ -487,18 +473,9 @@ static int gred_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) print_uint(PRINT_ANY, "vq", "\n vq %u ", qopt->DP); print_hhu(PRINT_ANY, "prio", "prio %hhu ", qopt->prio); - - print_uint(PRINT_JSON, "limit", NULL, qopt->limit); - print_string(PRINT_FP, NULL, "limit %s ", - sprint_size(qopt->limit, b1)); - - print_uint(PRINT_JSON, "min", NULL, qopt->qth_min); - print_string(PRINT_FP, NULL, "min %s ", - sprint_size(qopt->qth_min, b1)); - - print_uint(PRINT_JSON, "max", NULL, qopt->qth_max); - print_string(PRINT_FP, NULL, "max %s ", - sprint_size(qopt->qth_max, b1)); + print_size(PRINT_ANY, "limit", "limit %s ", qopt->limit); + print_size(PRINT_ANY, "min", "min %s ", qopt->qth_min); + print_size(PRINT_ANY, "max", "max %s ", qopt->qth_max); if (infos[i].flags_present) tc_red_print_flags(infos[i].flags); diff --git a/tc/q_hfsc.c b/tc/q_hfsc.c index f34b1b2..81c1021 100644 --- a/tc/q_hfsc.c +++ b/tc/q_hfsc.c @@ -219,9 +219,9 @@ hfsc_print_sc(FILE *f, char *name, struct tc_service_curve *sc) SPRINT_BUF(b1); fprintf(f, "%s ", name); - fprintf(f, "m1 %s ", sprint_rate(sc->m1, b1)); + tc_print_rate(PRINT_FP, NULL, "m1 %s ", sc->m1); fprintf(f, "d %s ", sprint_time(tc_core_ktime2time(sc->d), b1)); - fprintf(f, "m2 %s ", sprint_rate(sc->m2, b1)); + tc_print_rate(PRINT_FP, NULL, "m2 %s ", sc->m2); } static int diff --git a/tc/q_hhf.c b/tc/q_hhf.c index f888801..95e49f3 100644 --- a/tc/q_hhf.c +++ b/tc/q_hhf.c @@ -143,9 +143,7 @@ static int hhf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (tb[TCA_HHF_QUANTUM] && RTA_PAYLOAD(tb[TCA_HHF_QUANTUM]) >= sizeof(__u32)) { quantum = rta_getattr_u32(tb[TCA_HHF_QUANTUM]); - print_uint(PRINT_JSON, "quantum", NULL, quantum); - print_string(PRINT_FP, NULL, "quantum %s ", - sprint_size(quantum, b1)); + print_size(PRINT_ANY, "quantum", "quantum %s ", quantum); } if (tb[TCA_HHF_HH_FLOWS_LIMIT] && RTA_PAYLOAD(tb[TCA_HHF_HH_FLOWS_LIMIT]) >= sizeof(__u32)) { @@ -162,9 +160,8 @@ static int hhf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (tb[TCA_HHF_ADMIT_BYTES] && RTA_PAYLOAD(tb[TCA_HHF_ADMIT_BYTES]) >= sizeof(__u32)) { admit_bytes = rta_getattr_u32(tb[TCA_HHF_ADMIT_BYTES]); - print_uint(PRINT_JSON, "admit_bytes", NULL, admit_bytes); - print_string(PRINT_FP, NULL, "admit_bytes %s ", - sprint_size(admit_bytes, b1)); + print_size(PRINT_ANY, "admit_bytes", "admit_bytes %s ", + admit_bytes); } if (tb[TCA_HHF_EVICT_TIMEOUT] && RTA_PAYLOAD(tb[TCA_HHF_EVICT_TIMEOUT]) >= sizeof(__u32)) { diff --git a/tc/q_htb.c b/tc/q_htb.c index 5205222..4256635 100644 --- a/tc/q_htb.c +++ b/tc/q_htb.c @@ -30,11 +30,12 @@ static void explain(void) { fprintf(stderr, "Usage: ... qdisc add ... htb [default N] [r2q N]\n" - " [direct_qlen P]\n" + " [direct_qlen P] [offload]\n" " default minor id of class to which unclassified packets are sent {0}\n" " r2q DRR quantums are computed as rate in Bps/r2q {10}\n" " debug string of 16 numbers each 0-3 {0}\n\n" " direct_qlen Limit of the direct queue {in packets}\n" + " offload enable hardware offload\n" "... class add ... htb rate R1 [burst B1] [mpu B] [overhead O]\n" " [prio P] [slot S] [pslot PS]\n" " [ceil R2] [cburst B2] [mtu MTU] [quantum Q]\n" @@ -68,6 +69,7 @@ static int htb_parse_opt(struct qdisc_util *qu, int argc, }; struct rtattr *tail; unsigned int i; char *p; + bool offload = false; while (argc > 0) { if (matches(*argv, "r2q") == 0) { @@ -91,6 +93,8 @@ static int htb_parse_opt(struct qdisc_util *qu, int argc, if (get_u32(&direct_qlen, *argv, 10)) { explain1("direct_qlen"); return -1; } + } else if (matches(*argv, "offload") == 0) { + offload = true; } else { fprintf(stderr, "What is \"%s\"?\n", *argv); explain(); @@ -103,6 +107,8 @@ static int htb_parse_opt(struct qdisc_util *qu, int argc, if (direct_qlen != ~0U) addattr_l(n, 2024, TCA_HTB_DIRECT_QLEN, &direct_qlen, sizeof(direct_qlen)); + if (offload) + addattr(n, 2024, TCA_HTB_OFFLOAD); addattr_nest_end(n, tail); return 0; } @@ -269,7 +275,6 @@ static int htb_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) __u64 rate64, ceil64; SPRINT_BUF(b1); - SPRINT_BUF(b2); SPRINT_BUF(b3); if (opt == NULL) @@ -299,29 +304,27 @@ static int htb_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) RTA_PAYLOAD(tb[TCA_HTB_CEIL64]) >= sizeof(ceil64)) ceil64 = rta_getattr_u64(tb[TCA_HTB_CEIL64]); - fprintf(f, "rate %s ", sprint_rate(rate64, b1)); + tc_print_rate(PRINT_FP, NULL, "rate %s ", rate64); if (hopt->rate.overhead) fprintf(f, "overhead %u ", hopt->rate.overhead); buffer = tc_calc_xmitsize(rate64, hopt->buffer); - fprintf(f, "ceil %s ", sprint_rate(ceil64, b1)); + tc_print_rate(PRINT_FP, NULL, "ceil %s ", ceil64); cbuffer = tc_calc_xmitsize(ceil64, hopt->cbuffer); linklayer = (hopt->rate.linklayer & TC_LINKLAYER_MASK); if (linklayer > TC_LINKLAYER_ETHERNET || show_details) fprintf(f, "linklayer %s ", sprint_linklayer(linklayer, b3)); if (show_details) { - fprintf(f, "burst %s/%u mpu %s ", - sprint_size(buffer, b1), - 1<rate.cell_log, - sprint_size(hopt->rate.mpu, b2)); - fprintf(f, "cburst %s/%u mpu %s ", - sprint_size(cbuffer, b1), - 1<ceil.cell_log, - sprint_size(hopt->ceil.mpu, b2)); + print_size(PRINT_FP, NULL, "burst %s/", buffer); + fprintf(f, "%u ", 1<rate.cell_log); + print_size(PRINT_FP, NULL, "mpu %s ", hopt->rate.mpu); + print_size(PRINT_FP, NULL, "cburst %s/", cbuffer); + fprintf(f, "%u ", 1<ceil.cell_log); + print_size(PRINT_FP, NULL, "mpu %s ", hopt->ceil.mpu); fprintf(f, "level %d ", (int)hopt->level); } else { - fprintf(f, "burst %s ", sprint_size(buffer, b1)); - fprintf(f, "cburst %s ", sprint_size(cbuffer, b1)); + print_size(PRINT_FP, NULL, "burst %s ", buffer); + print_size(PRINT_FP, NULL, "cburst %s ", cbuffer); } if (show_raw) fprintf(f, "buffer [%08x] cbuffer [%08x] ", @@ -347,6 +350,8 @@ static int htb_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) print_uint(PRINT_ANY, "direct_qlen", " direct_qlen %u", direct_qlen); } + if (tb[TCA_HTB_OFFLOAD]) + print_null(PRINT_ANY, "offload", " offload", NULL); return 0; } diff --git a/tc/q_mqprio.c b/tc/q_mqprio.c index f26ba8d..706452d 100644 --- a/tc/q_mqprio.c +++ b/tc/q_mqprio.c @@ -230,8 +230,6 @@ static int mqprio_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) __u64 max_rate64[TC_QOPT_MAX_QUEUE] = {0}; int len; - SPRINT_BUF(b1); - if (opt == NULL) return 0; @@ -243,13 +241,19 @@ static int mqprio_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) qopt = RTA_DATA(opt); - fprintf(f, " tc %u map ", qopt->num_tc); + print_uint(PRINT_ANY, "tc", "tc %u ", qopt->num_tc); + open_json_array(PRINT_ANY, is_json_context() ? "map" : "map "); for (i = 0; i <= TC_PRIO_MAX; i++) - fprintf(f, "%u ", qopt->prio_tc_map[i]); - fprintf(f, "\n queues:"); - for (i = 0; i < qopt->num_tc; i++) - fprintf(f, "(%u:%u) ", qopt->offset[i], - qopt->offset[i] + qopt->count[i] - 1); + print_uint(PRINT_ANY, NULL, "%u ", qopt->prio_tc_map[i]); + close_json_array(PRINT_ANY, ""); + open_json_array(PRINT_ANY, is_json_context() ? "queues" : "\n queues:"); + for (i = 0; i < qopt->num_tc; i++) { + open_json_array(PRINT_JSON, NULL); + print_uint(PRINT_ANY, NULL, "(%u:", qopt->offset[i]); + print_uint(PRINT_ANY, NULL, "%u) ", qopt->offset[i] + qopt->count[i] - 1); + close_json_array(PRINT_JSON, NULL); + } + close_json_array(PRINT_ANY, ""); if (len > 0) { struct rtattr *tb[TCA_MQPRIO_MAX + 1]; @@ -262,18 +266,18 @@ static int mqprio_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) __u16 *mode = RTA_DATA(tb[TCA_MQPRIO_MODE]); if (*mode == TC_MQPRIO_MODE_CHANNEL) - fprintf(f, "\n mode:channel"); + print_string(PRINT_ANY, "mode", "\n mode:%s", "channel"); } else { - fprintf(f, "\n mode:dcb"); + print_string(PRINT_ANY, "mode", "\n mode:%s", "dcb"); } if (tb[TCA_MQPRIO_SHAPER]) { __u16 *shaper = RTA_DATA(tb[TCA_MQPRIO_SHAPER]); if (*shaper == TC_MQPRIO_SHAPER_BW_RATE) - fprintf(f, "\n shaper:bw_rlimit"); + print_string(PRINT_ANY, "shaper", "\n shaper:%s", "bw_rlimit"); } else { - fprintf(f, "\n shaper:dcb"); + print_string(PRINT_ANY, "shaper", "\n shaper:%s", "dcb"); } if (tb[TCA_MQPRIO_MIN_RATE64]) { @@ -287,9 +291,10 @@ static int mqprio_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) return -1; *(min++) = rta_getattr_u64(r); } - fprintf(f, " min_rate:"); + open_json_array(PRINT_ANY, is_json_context() ? "min_rate" : " min_rate:"); for (i = 0; i < qopt->num_tc; i++) - fprintf(f, "%s ", sprint_rate(min_rate64[i], b1)); + tc_print_rate(PRINT_ANY, NULL, "%s ", min_rate64[i]); + close_json_array(PRINT_ANY, ""); } if (tb[TCA_MQPRIO_MAX_RATE64]) { @@ -303,9 +308,10 @@ static int mqprio_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) return -1; *(max++) = rta_getattr_u64(r); } - fprintf(f, " max_rate:"); + open_json_array(PRINT_ANY, is_json_context() ? "max_rate" : " max_rate:"); for (i = 0; i < qopt->num_tc; i++) - fprintf(f, "%s ", sprint_rate(max_rate64[i], b1)); + tc_print_rate(PRINT_ANY, NULL, "%s ", max_rate64[i]); + close_json_array(PRINT_ANY, ""); } } return 0; diff --git a/tc/q_netem.c b/tc/q_netem.c index d01450f..d93e1c7 100644 --- a/tc/q_netem.c +++ b/tc/q_netem.c @@ -800,9 +800,7 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (rate && rate->rate) { open_json_object("rate"); rate64 = rate64 ? : rate->rate; - print_string(PRINT_FP, NULL, " rate %s", - sprint_rate(rate64, b1)); - print_lluint(PRINT_JSON, "rate", NULL, rate64); + tc_print_rate(PRINT_ANY, "rate", " rate %s", rate64); PRINT_INT_OPT("packetoverhead", rate->packet_overhead); print_uint(PRINT_ANY, "cellsize", rate->cell_size ? " cellsize %u" : "", diff --git a/tc/q_red.c b/tc/q_red.c index df788f8..fd50d37 100644 --- a/tc/q_red.c +++ b/tc/q_red.c @@ -192,10 +192,6 @@ static int red_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) struct tc_red_qopt *qopt; __u32 max_P = 0; - SPRINT_BUF(b1); - SPRINT_BUF(b2); - SPRINT_BUF(b3); - if (opt == NULL) return 0; @@ -217,12 +213,9 @@ static int red_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) qopt->flags = flags_bf->value; } - print_uint(PRINT_JSON, "limit", NULL, qopt->limit); - print_string(PRINT_FP, NULL, "limit %s ", sprint_size(qopt->limit, b1)); - print_uint(PRINT_JSON, "min", NULL, qopt->qth_min); - print_string(PRINT_FP, NULL, "min %s ", sprint_size(qopt->qth_min, b2)); - print_uint(PRINT_JSON, "max", NULL, qopt->qth_max); - print_string(PRINT_FP, NULL, "max %s ", sprint_size(qopt->qth_max, b3)); + print_size(PRINT_ANY, "limit", "limit %s ", qopt->limit); + print_size(PRINT_ANY, "min", "min %s ", qopt->qth_min); + print_size(PRINT_ANY, "max", "max %s ", qopt->qth_max); tc_red_print_flags(qopt->flags); diff --git a/tc/q_sfq.c b/tc/q_sfq.c index 2b9bbcd..d04a440 100644 --- a/tc/q_sfq.c +++ b/tc/q_sfq.c @@ -206,9 +206,6 @@ static int sfq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) struct tc_sfq_qopt *qopt; struct tc_sfq_qopt_v1 *qopt_ext = NULL; - SPRINT_BUF(b1); - SPRINT_BUF(b2); - SPRINT_BUF(b3); if (opt == NULL) return 0; @@ -219,9 +216,7 @@ static int sfq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) qopt = RTA_DATA(opt); print_uint(PRINT_ANY, "limit", "limit %up ", qopt->limit); - print_uint(PRINT_JSON, "quantum", NULL, qopt->quantum); - print_string(PRINT_FP, NULL, "quantum %s ", - sprint_size(qopt->quantum, b1)); + print_size(PRINT_ANY, "quantum", "quantum %s ", qopt->quantum); if (qopt_ext && qopt_ext->depth) print_uint(PRINT_ANY, "depth", "depth %u ", qopt_ext->depth); @@ -237,12 +232,8 @@ static int sfq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) qopt->perturb_period); if (qopt_ext && qopt_ext->qth_min) { print_uint(PRINT_ANY, "ewma", "ewma %u ", qopt_ext->Wlog); - print_uint(PRINT_JSON, "min", NULL, qopt_ext->qth_min); - print_string(PRINT_FP, NULL, "min %s ", - sprint_size(qopt_ext->qth_min, b2)); - print_uint(PRINT_JSON, "max", NULL, qopt_ext->qth_max); - print_string(PRINT_FP, NULL, "max %s ", - sprint_size(qopt_ext->qth_max, b3)); + print_size(PRINT_ANY, "min", "min %s ", qopt_ext->qth_min); + print_size(PRINT_ANY, "max", "max %s ", qopt_ext->qth_max); print_float(PRINT_ANY, "probability", "probability %lg ", qopt_ext->max_P / pow(2, 32)); tc_red_print_flags(qopt_ext->flags); diff --git a/tc/q_tbf.c b/tc/q_tbf.c index 5135b1d..4e5bf38 100644 --- a/tc/q_tbf.c +++ b/tc/q_tbf.c @@ -286,20 +286,15 @@ static int tbf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (tb[TCA_TBF_RATE64] && RTA_PAYLOAD(tb[TCA_TBF_RATE64]) >= sizeof(rate64)) rate64 = rta_getattr_u64(tb[TCA_TBF_RATE64]); - print_u64(PRINT_JSON, "rate", NULL, rate64); - print_string(PRINT_FP, NULL, "rate %s ", sprint_rate(rate64, b1)); + tc_print_rate(PRINT_ANY, "rate", "rate %s ", rate64); buffer = tc_calc_xmitsize(rate64, qopt->buffer); if (show_details) { sprintf(b1, "%s/%u", sprint_size(buffer, b2), 1 << qopt->rate.cell_log); print_string(PRINT_ANY, "burst", "burst %s ", b1); - print_uint(PRINT_JSON, "mpu", NULL, qopt->rate.mpu); - print_string(PRINT_FP, NULL, "mpu %s ", - sprint_size(qopt->rate.mpu, b1)); + print_size(PRINT_ANY, "mpu", "mpu %s ", qopt->rate.mpu); } else { - print_u64(PRINT_JSON, "burst", NULL, buffer); - print_string(PRINT_FP, NULL, "burst %s ", - sprint_size(buffer, b1)); + print_size(PRINT_ANY, "burst", "burst %s ", buffer); } if (show_raw) print_hex(PRINT_ANY, "burst_raw", "[%08x] ", qopt->buffer); @@ -308,24 +303,18 @@ static int tbf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) RTA_PAYLOAD(tb[TCA_TBF_PRATE64]) >= sizeof(prate64)) prate64 = rta_getattr_u64(tb[TCA_TBF_PRATE64]); if (prate64) { - print_u64(PRINT_JSON, "peakrate", NULL, prate64); - print_string(PRINT_FP, NULL, "peakrate %s ", - sprint_rate(prate64, b1)); + tc_print_rate(PRINT_FP, "peakrate", "peakrate %s ", prate64); if (qopt->mtu || qopt->peakrate.mpu) { mtu = tc_calc_xmitsize(prate64, qopt->mtu); if (show_details) { sprintf(b1, "%s/%u", sprint_size(mtu, b2), 1 << qopt->peakrate.cell_log); print_string(PRINT_ANY, "mtu", "mtu %s ", b1); - print_uint(PRINT_JSON, "mpu", NULL, + print_size(PRINT_ANY, "mpu", "mpu %s ", qopt->peakrate.mpu); - print_string(PRINT_FP, NULL, "mpu %s ", - sprint_size(qopt->peakrate.mpu, - b1)); } else { - print_u64(PRINT_JSON, "minburst", NULL, mtu); - print_string(PRINT_FP, NULL, "minburst %s ", - sprint_size(mtu, b1)); + print_size(PRINT_ANY, "minburst", + "minburst %s ", mtu); } if (show_raw) print_hex(PRINT_ANY, "mtu_raw", "[%08x] ", @@ -347,11 +336,8 @@ static int tbf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) print_string(PRINT_FP, NULL, "lat %s ", sprint_time(latency, b1)); } - if (show_raw || latency < 0.0) { - print_uint(PRINT_JSON, "limit", NULL, qopt->limit); - print_string(PRINT_FP, NULL, "limit %s ", - sprint_size(qopt->limit, b1)); - } + if (show_raw || latency < 0.0) + print_size(PRINT_ANY, "limit", "limit %s ", qopt->limit); if (qopt->rate.overhead) print_int(PRINT_ANY, "overhead", "overhead %d ", qopt->rate.overhead); diff --git a/tc/tc.c b/tc/tc.c index 5d57054..7557b97 100644 --- a/tc/tc.c +++ b/tc/tc.c @@ -30,6 +30,7 @@ #include "tc_common.h" #include "namespace.h" #include "rt_names.h" +#include "bpf_util.h" int show_stats; int show_details; @@ -44,6 +45,7 @@ bool use_names; int json; int color; int oneline; +int brief; static char *conf_file; @@ -202,7 +204,8 @@ static void usage(void) " OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[aw] |\n" " -o[neline] | -j[son] | -p[retty] | -c[olor]\n" " -b[atch] [filename] | -n[etns] name | -N[umeric] |\n" - " -nm | -nam[es] | { -cf | -conf } path }\n"); + " -nm | -nam[es] | { -cf | -conf } path\n" + " -br[ief] }\n"); } static int do_cmd(int argc, char **argv) @@ -231,22 +234,16 @@ static int do_cmd(int argc, char **argv) return -1; } +static int tc_batch_cmd(int argc, char *argv[], void *data) +{ + return do_cmd(argc, argv); +} + static int batch(const char *name) { - char *line = NULL; - size_t len = 0; - int ret = 0; + int ret; batch_mode = 1; - if (name && strcmp(name, "-") != 0) { - if (freopen(name, "r", stdin) == NULL) { - fprintf(stderr, - "Cannot open file \"%s\" for reading: %s\n", - name, strerror(errno)); - return -1; - } - } - tc_core_init(); if (rtnl_open(&rth, 0) < 0) { @@ -254,26 +251,8 @@ static int batch(const char *name) return -1; } - cmdlineno = 0; - while (getcmdline(&line, &len, stdin) != -1) { - char *largv[100]; - int largc; - - largc = makeargs(line, largv, 100); - if (largc == 0) - continue; /* blank line */ - - if (do_cmd(largc, largv)) { - fprintf(stderr, "Command failed %s:%d\n", - name, cmdlineno); - ret = 1; - if (!force) - break; - } - fflush(stdout); - } + ret = do_batch(name, force, tc_batch_cmd, NULL); - free(line); rtnl_close(&rth); return ret; } @@ -281,8 +260,9 @@ static int batch(const char *name) int main(int argc, char **argv) { - int ret; + const char *libbpf_version; char *batch_file = NULL; + int ret; while (argc > 1) { if (argv[1][0] != '-') @@ -299,7 +279,11 @@ int main(int argc, char **argv) } else if (matches(argv[1], "-graph") == 0) { show_graph = 1; } else if (matches(argv[1], "-Version") == 0) { - printf("tc utility, iproute2-%s\n", version); + printf("tc utility, iproute2-%s", version); + libbpf_version = get_libbpf_version(); + if (libbpf_version) + printf(", libbpf %s", libbpf_version); + printf("\n"); return 0; } else if (matches(argv[1], "-iec") == 0) { ++use_iec; @@ -336,6 +320,8 @@ int main(int argc, char **argv) ++json; } else if (matches(argv[1], "-oneline") == 0) { ++oneline; + }else if (matches(argv[1], "-brief") == 0) { + ++brief; } else { fprintf(stderr, "Option \"%s\" is unknown, try \"tc -help\".\n", diff --git a/tc/tc_common.h b/tc/tc_common.h index 802fb7f..58dc9d6 100644 --- a/tc/tc_common.h +++ b/tc/tc_common.h @@ -27,3 +27,4 @@ int check_size_table_opts(struct tc_sizespec *s); extern int show_graph; extern bool use_names; +extern int use_iec; diff --git a/tc/tc_filter.c b/tc/tc_filter.c index c591a19..71be2e8 100644 --- a/tc/tc_filter.c +++ b/tc/tc_filter.c @@ -721,6 +721,15 @@ static int tc_filter_list(int cmd, int argc, char **argv) if (filter_chain_index_set) addattr32(&req.n, sizeof(req), TCA_CHAIN, chain_index); + if (brief) { + struct nla_bitfield32 flags = { + .value = TCA_DUMP_FLAGS_TERSE, + .selector = TCA_DUMP_FLAGS_TERSE + }; + + addattr_l(&req.n, MAX_MSG, TCA_DUMP_FLAGS, &flags, sizeof(flags)); + } + if (rtnl_dump_request_n(&rth, &req.n) < 0) { perror("Cannot send dump request"); return 1; diff --git a/tc/tc_util.c b/tc/tc_util.c index b7ff911..4806589 100644 --- a/tc/tc_util.c +++ b/tc/tc_util.c @@ -164,32 +164,6 @@ char *sprint_tc_classid(__u32 h, char *buf) return buf; } -/* See http://physics.nist.gov/cuu/Units/binary.html */ -static const struct rate_suffix { - const char *name; - double scale; -} suffixes[] = { - { "bit", 1. }, - { "Kibit", 1024. }, - { "kbit", 1000. }, - { "mibit", 1024.*1024. }, - { "mbit", 1000000. }, - { "gibit", 1024.*1024.*1024. }, - { "gbit", 1000000000. }, - { "tibit", 1024.*1024.*1024.*1024. }, - { "tbit", 1000000000000. }, - { "Bps", 8. }, - { "KiBps", 8.*1024. }, - { "KBps", 8000. }, - { "MiBps", 8.*1024*1024. }, - { "MBps", 8000000. }, - { "GiBps", 8.*1024.*1024.*1024. }, - { "GBps", 8000000000. }, - { "TiBps", 8.*1024.*1024.*1024.*1024. }, - { "TBps", 8000000000000. }, - { NULL } -}; - /* Parse a percent e.g: '30%' * return: 0 = ok, -1 = error, 1 = out of range */ @@ -273,84 +247,10 @@ int get_percent_rate64(__u64 *rate, const char *str, const char *dev) return get_rate64(rate, r_str); } -int get_rate(unsigned int *rate, const char *str) -{ - char *p; - double bps = strtod(str, &p); - const struct rate_suffix *s; - - if (p == str) - return -1; - - for (s = suffixes; s->name; ++s) { - if (strcasecmp(s->name, p) == 0) { - bps *= s->scale; - p += strlen(p); - break; - } - } - - if (*p) - return -1; /* unknown suffix */ - - bps /= 8; /* -> bytes per second */ - *rate = bps; - /* detect if an overflow happened */ - if (*rate != floor(bps)) - return -1; - return 0; -} - -int get_rate64(__u64 *rate, const char *str) -{ - char *p; - double bps = strtod(str, &p); - const struct rate_suffix *s; - - if (p == str) - return -1; - - for (s = suffixes; s->name; ++s) { - if (strcasecmp(s->name, p) == 0) { - bps *= s->scale; - p += strlen(p); - break; - } - } - - if (*p) - return -1; /* unknown suffix */ - - bps /= 8; /* -> bytes per second */ - *rate = bps; - return 0; -} - -void print_rate(char *buf, int len, __u64 rate) -{ - extern int use_iec; - unsigned long kilo = use_iec ? 1024 : 1000; - const char *str = use_iec ? "i" : ""; - static char *units[5] = {"", "K", "M", "G", "T"}; - int i; - - rate <<= 3; /* bytes/sec -> bits/sec */ - - for (i = 0; i < ARRAY_SIZE(units) - 1; i++) { - if (rate < kilo) - break; - if (((rate % kilo) != 0) && rate < 1000*kilo) - break; - rate /= kilo; - } - - snprintf(buf, len, "%.0f%s%sbit", (double)rate, units[i], str); -} - -char *sprint_rate(__u64 rate, char *buf) +void tc_print_rate(enum output_type t, const char *key, const char *fmt, + unsigned long long rate) { - print_rate(buf, SPRINT_BSIZE-1, rate); - return buf; + print_rate(use_iec, t, key, fmt, rate); } char *sprint_ticks(__u32 ticks, char *buf) @@ -358,41 +258,6 @@ char *sprint_ticks(__u32 ticks, char *buf) return sprint_time(tc_core_tick2time(ticks), buf); } -int get_size(unsigned int *size, const char *str) -{ - double sz; - char *p; - - sz = strtod(str, &p); - if (p == str) - return -1; - - if (*p) { - if (strcasecmp(p, "kb") == 0 || strcasecmp(p, "k") == 0) - sz *= 1024; - else if (strcasecmp(p, "gb") == 0 || strcasecmp(p, "g") == 0) - sz *= 1024*1024*1024; - else if (strcasecmp(p, "gbit") == 0) - sz *= 1024*1024*1024/8; - else if (strcasecmp(p, "mb") == 0 || strcasecmp(p, "m") == 0) - sz *= 1024*1024; - else if (strcasecmp(p, "mbit") == 0) - sz *= 1024*1024/8; - else if (strcasecmp(p, "kbit") == 0) - sz *= 1024/8; - else if (strcasecmp(p, "b") != 0) - return -1; - } - - *size = sz; - - /* detect if an overflow happened */ - if (*size != floor(sz)) - return -1; - - return 0; -} - int get_size_and_cell(unsigned int *size, int *cell_log, char *str) { char *slash = strchr(str, '/'); @@ -433,24 +298,6 @@ void print_devname(enum output_type type, int ifindex) "dev", "%s ", ifname); } -static void print_size(char *buf, int len, __u32 sz) -{ - double tmp = sz; - - if (sz >= 1024*1024 && fabs(1024*1024*rint(tmp/(1024*1024)) - sz) < 1024) - snprintf(buf, len, "%gMb", rint(tmp/(1024*1024))); - else if (sz >= 1024 && fabs(1024*rint(tmp/1024) - sz) < 16) - snprintf(buf, len, "%gKb", rint(tmp/1024)); - else - snprintf(buf, len, "%ub", sz); -} - -char *sprint_size(__u32 size, char *buf) -{ - print_size(buf, SPRINT_BSIZE-1, size); - return buf; -} - static const char *action_n2a(int action) { static char buf[64]; @@ -807,7 +654,6 @@ static void print_tcstats_basic_hw(struct rtattr **tbs, char *prefix) void print_tcstats2_attr(FILE *fp, struct rtattr *rta, char *prefix, struct rtattr **xstats) { - SPRINT_BUF(b1); struct rtattr *tbs[TCA_STATS_MAX + 1]; parse_rtattr_nested(tbs, TCA_STATS_MAX, rta); @@ -853,8 +699,7 @@ void print_tcstats2_attr(FILE *fp, struct rtattr *rta, char *prefix, struct rtat sizeof(re))); print_string(PRINT_FP, NULL, "\n%s", prefix); print_lluint(PRINT_JSON, "rate", NULL, re.bps); - print_string(PRINT_FP, NULL, "rate %s", - sprint_rate(re.bps, b1)); + tc_print_rate(PRINT_FP, NULL, "rate %s", re.bps); print_lluint(PRINT_ANY, "pps", " %llupps", re.pps); } else if (tbs[TCA_STATS_RATE_EST]) { struct gnet_stats_rate_est re = {0}; @@ -863,8 +708,7 @@ void print_tcstats2_attr(FILE *fp, struct rtattr *rta, char *prefix, struct rtat MIN(RTA_PAYLOAD(tbs[TCA_STATS_RATE_EST]), sizeof(re))); print_string(PRINT_FP, NULL, "\n%s", prefix); print_uint(PRINT_JSON, "rate", NULL, re.bps); - print_string(PRINT_FP, NULL, "rate %s", - sprint_rate(re.bps, b1)); + tc_print_rate(PRINT_FP, NULL, "rate %s", re.bps); print_uint(PRINT_ANY, "pps", " %upps", re.pps); } @@ -875,10 +719,8 @@ void print_tcstats2_attr(FILE *fp, struct rtattr *rta, char *prefix, struct rtat MIN(RTA_PAYLOAD(tbs[TCA_STATS_QUEUE]), sizeof(q))); if (!tbs[TCA_STATS_RATE_EST]) print_nl(); - print_uint(PRINT_JSON, "backlog", NULL, q.backlog); print_string(PRINT_FP, NULL, "%s", prefix); - print_string(PRINT_FP, NULL, "backlog %s", - sprint_size(q.backlog, b1)); + print_size(PRINT_ANY, "backlog", "backlog %s", q.backlog); print_uint(PRINT_ANY, "qlen", " %up", q.qlen); print_uint(PRINT_FP, NULL, " requeues %u", q.requeues); } @@ -890,8 +732,6 @@ void print_tcstats2_attr(FILE *fp, struct rtattr *rta, char *prefix, struct rtat void print_tcstats_attr(FILE *fp, struct rtattr *tb[], char *prefix, struct rtattr **xstats) { - SPRINT_BUF(b1); - if (tb[TCA_STATS2]) { print_tcstats2_attr(fp, tb[TCA_STATS2], prefix, xstats); if (xstats && !*xstats) @@ -916,16 +756,16 @@ void print_tcstats_attr(FILE *fp, struct rtattr *tb[], char *prefix, if (st.bps || st.pps) { fprintf(fp, "rate "); if (st.bps) - fprintf(fp, "%s ", - sprint_rate(st.bps, b1)); + tc_print_rate(PRINT_FP, NULL, "%s ", + st.bps); if (st.pps) fprintf(fp, "%upps ", st.pps); } if (st.qlen || st.backlog) { fprintf(fp, "backlog "); if (st.backlog) - fprintf(fp, "%s ", - sprint_size(st.backlog, b1)); + print_size(PRINT_FP, NULL, "%s ", + st.backlog); if (st.qlen) fprintf(fp, "%up ", st.qlen); } diff --git a/tc/tc_util.h b/tc/tc_util.h index c8af4e9..b197bcd 100644 --- a/tc/tc_util.h +++ b/tc/tc_util.h @@ -76,19 +76,15 @@ struct qdisc_util *get_qdisc_kind(const char *str); struct filter_util *get_filter_kind(const char *str); int get_qdisc_handle(__u32 *h, const char *str); -int get_rate(unsigned int *rate, const char *str); int get_percent_rate(unsigned int *rate, const char *str, const char *dev); -int get_rate64(__u64 *rate, const char *str); int get_percent_rate64(__u64 *rate, const char *str, const char *dev); -int get_size(unsigned int *size, const char *str); int get_size_and_cell(unsigned int *size, int *cell_log, char *str); int get_linklayer(unsigned int *val, const char *arg); -void print_rate(char *buf, int len, __u64 rate); +void tc_print_rate(enum output_type t, const char *key, const char *fmt, + unsigned long long rate); void print_devname(enum output_type type, int ifindex); -char *sprint_rate(__u64 rate, char *buf); -char *sprint_size(__u32 size, char *buf); char *sprint_tc_classid(__u32 h, char *buf); char *sprint_ticks(__u32 ticks, char *buf); char *sprint_linklayer(unsigned int linklayer, char *buf); diff --git a/testsuite/tests/tc/flower_mpls.t b/testsuite/tests/tc/flower_mpls.t new file mode 100755 index 0000000..430ed13 --- /dev/null +++ b/testsuite/tests/tc/flower_mpls.t @@ -0,0 +1,82 @@ +#!/bin/sh + +. lib/generic.sh + +DEV="$(rand_dev)" +ts_ip "$0" "Add $DEV dummy interface" link add dev $DEV up type dummy +ts_tc "$0" "Add ingress qdisc" qdisc add dev $DEV ingress + +reset_qdisc() +{ + ts_tc "$0" "Remove ingress qdisc" qdisc del dev $DEV ingress + ts_tc "$0" "Add ingress qdisc" qdisc add dev $DEV ingress +} + +ts_tc "$0" "Add MPLS filter matching first LSE with minimal values" \ + filter add dev $DEV ingress protocol mpls_uc flower \ + mpls_label 0 mpls_tc 0 mpls_bos 0 mpls_ttl 0 \ + action drop +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "mpls_label 0" +test_on "mpls_tc 0" +test_on "mpls_bos 0" +test_on "mpls_ttl 0" + +reset_qdisc +ts_tc "$0" "Add MPLS filter matching first LSE with maximal values" \ + filter add dev $DEV ingress protocol mpls_uc flower \ + mpls_label 1048575 mpls_tc 7 mpls_bos 1 mpls_ttl 255 \ + action drop +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "mpls_label 1048575" +test_on "mpls_tc 7" +test_on "mpls_bos 1" +test_on "mpls_ttl 255" + +reset_qdisc +ts_tc "$0" "Add MPLS filter matching second LSE with minimal values" \ + filter add dev $DEV ingress protocol mpls_uc flower \ + mpls lse depth 2 label 0 tc 0 bos 0 ttl 0 \ + action drop +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "mpls" +test_on "lse" +test_on "depth 2" +test_on "label 0" +test_on "tc 0" +test_on "bos 0" +test_on "ttl 0" + +reset_qdisc +ts_tc "$0" "Add MPLS filter matching second LSE with maximal values" \ + filter add dev $DEV ingress protocol mpls_uc flower \ + mpls lse depth 2 label 1048575 tc 7 bos 1 ttl 255 \ + action drop +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "mpls" +test_on "lse" +test_on "depth 2" +test_on "label 1048575" +test_on "tc 7" +test_on "bos 1" +test_on "ttl 255" + +reset_qdisc +ts_tc "$0" "Add MPLS filter matching two LSEs" \ + filter add dev $DEV ingress protocol mpls_uc flower mpls \ + lse depth 1 label 0 tc 0 bos 0 ttl 0 \ + lse depth 2 label 1048575 tc 7 bos 1 ttl 255 \ + action drop +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "mpls" +test_on "lse" +test_on "depth 1" +test_on "label 0" +test_on "tc 0" +test_on "bos 0" +test_on "ttl 0" +test_on "depth 2" +test_on "label 1048575" +test_on "tc 7" +test_on "bos 1" +test_on "ttl 255" diff --git a/testsuite/tests/tc/mpls.t b/testsuite/tests/tc/mpls.t new file mode 100755 index 0000000..cb25f36 --- /dev/null +++ b/testsuite/tests/tc/mpls.t @@ -0,0 +1,69 @@ +#!/bin/sh + +. lib/generic.sh + +DEV="$(rand_dev)" +ts_ip "$0" "Add $DEV dummy interface" link add dev $DEV up type dummy +ts_tc "$0" "Add ingress qdisc" qdisc add dev $DEV ingress + +reset_qdisc() +{ + ts_tc "$0" "Remove ingress qdisc" qdisc del dev $DEV ingress + ts_tc "$0" "Add ingress qdisc" qdisc add dev $DEV ingress +} + +ts_tc "$0" "Add mpls action pop" \ + filter add dev $DEV ingress protocol mpls_uc matchall \ + action mpls pop protocol ip +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "mpls" +test_on "pop protocol ip pipe" + +reset_qdisc +ts_tc "$0" "Add mpls action push" \ + filter add dev $DEV ingress protocol ip matchall \ + action mpls push protocol mpls_uc label 20 tc 3 bos 1 ttl 64 +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "mpls" +test_on "push" +test_on "protocol mpls_uc" +test_on "label 20" +test_on "tc 3" +test_on "bos 1" +test_on "ttl 64" +test_on "pipe" + +reset_qdisc +ts_tc "$0" "Add mpls action mac_push" \ + filter add dev $DEV ingress matchall \ + action mpls mac_push protocol mpls_uc label 20 tc 3 bos 1 ttl 64 +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "mpls" +test_on "mac_push" +test_on "protocol mpls_uc" +test_on "label 20" +test_on "tc 3" +test_on "bos 1" +test_on "ttl 64" +test_on "pipe" + +reset_qdisc +ts_tc "$0" "Add mpls action modify" \ + filter add dev $DEV ingress protocol mpls_uc matchall \ + action mpls modify label 20 tc 3 ttl 64 +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "mpls" +test_on "modify" +test_on "label 20" +test_on "tc 3" +test_on "ttl 64" +test_on "pipe" + +reset_qdisc +ts_tc "$0" "Add mpls action dec_ttl" \ + filter add dev $DEV ingress protocol mpls_uc matchall \ + action mpls dec_ttl +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "mpls" +test_on "dec_ttl" +test_on "pipe" diff --git a/testsuite/tests/tc/vlan.t b/testsuite/tests/tc/vlan.t new file mode 100755 index 0000000..b86dc36 --- /dev/null +++ b/testsuite/tests/tc/vlan.t @@ -0,0 +1,86 @@ +#!/bin/sh + +. lib/generic.sh + +DEV="$(rand_dev)" +ts_ip "$0" "Add $DEV dummy interface" link add dev $DEV up type dummy +ts_tc "$0" "Add ingress qdisc" qdisc add dev $DEV ingress + +reset_qdisc() +{ + ts_tc "$0" "Remove ingress qdisc" qdisc del dev $DEV ingress + ts_tc "$0" "Add ingress qdisc" qdisc add dev $DEV ingress +} + +ts_tc "$0" "Add vlan action pop" \ + filter add dev $DEV ingress matchall action vlan pop +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "vlan" +test_on "pop" +test_on "pipe" + +reset_qdisc +ts_tc "$0" "Add vlan action push (default parameters)" \ + filter add dev $DEV ingress matchall action vlan push id 5 +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "vlan" +test_on "push" +test_on "id 5" +test_on "protocol 802.1Q" +test_on "priority 0" +test_on "pipe" + +reset_qdisc +ts_tc "$0" "Add vlan action push (explicit parameters)" \ + filter add dev $DEV ingress matchall \ + action vlan push id 5 protocol 802.1ad priority 2 +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "vlan" +test_on "push" +test_on "id 5" +test_on "protocol 802.1ad" +test_on "priority 2" +test_on "pipe" + +reset_qdisc +ts_tc "$0" "Add vlan action modify (default parameters)" \ + filter add dev $DEV ingress matchall action vlan modify id 5 +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "vlan" +test_on "modify" +test_on "id 5" +test_on "protocol 802.1Q" +test_on "priority 0" +test_on "pipe" + +reset_qdisc +ts_tc "$0" "Add vlan action modify (explicit parameters)" \ + filter add dev $DEV ingress matchall \ + action vlan modify id 5 protocol 802.1ad priority 2 +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "vlan" +test_on "modify" +test_on "id 5" +test_on "protocol 802.1ad" +test_on "priority 2" +test_on "pipe" + +reset_qdisc +ts_tc "$0" "Add vlan action pop_eth" \ + filter add dev $DEV ingress matchall action vlan pop_eth +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "vlan" +test_on "pop_eth" +test_on "pipe" + +reset_qdisc +ts_tc "$0" "Add vlan action push_eth" \ + filter add dev $DEV ingress matchall \ + action vlan push_eth dst_mac 02:00:00:00:00:02 \ + src_mac 02:00:00:00:00:01 +ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress +test_on "vlan" +test_on "push_eth" +test_on "dst_mac 02:00:00:00:00:02" +test_on "src_mac 02:00:00:00:00:01" +test_on "pipe" diff --git a/tipc/cmdl.c b/tipc/cmdl.c index f2f259c..981e268 100644 --- a/tipc/cmdl.c +++ b/tipc/cmdl.c @@ -33,7 +33,7 @@ static const struct cmd *find_cmd(const struct cmd *cmds, char *str) return match; } -static struct opt *find_opt(struct opt *opts, char *str) +struct opt *find_opt(struct opt *opts, char *str) { struct opt *o; struct opt *match = NULL; diff --git a/tipc/cmdl.h b/tipc/cmdl.h index 03db359..dcade36 100644 --- a/tipc/cmdl.h +++ b/tipc/cmdl.h @@ -46,6 +46,7 @@ struct opt { char *val; }; +struct opt *find_opt(struct opt *opts, char *str); struct opt *get_opt(struct opt *opts, char *key); bool has_opt(struct opt *opts, char *key); int parse_opts(struct opt *opts, struct cmdl *cmdl); diff --git a/tipc/node.c b/tipc/node.c index ffdaeae..0524601 100644 --- a/tipc/node.c +++ b/tipc/node.c @@ -160,19 +160,27 @@ static int cmd_node_set_nodeid(struct nlmsghdr *nlh, const struct cmd *cmd, static void cmd_node_set_key_help(struct cmdl *cmdl) { fprintf(stderr, - "Usage: %s node set key KEY [algname ALGNAME] [nodeid NODEID]\n\n" + "Usage: %s node set key KEY [algname ALGNAME] [PROPERTIES]\n" + " %s node set key rekeying REKEYING\n\n" + "KEY\n" + " Symmetric KEY & SALT as a composite ASCII or hex string (0x...) in form:\n" + " [KEY: 16, 24 or 32 octets][SALT: 4 octets]\n\n" + "ALGNAME\n" + " Cipher algorithm [default: \"gcm(aes)\"]\n\n" "PROPERTIES\n" - " KEY - Symmetric KEY & SALT as a normal or hex string\n" - " that consists of two parts:\n" - " [KEY: 16, 24 or 32 octets][SALT: 4 octets]\n\n" - " algname ALGNAME - Default: \"gcm(aes)\"\n\n" - " nodeid NODEID - Own or peer node identity to which the key will\n" - " be attached. If not present, the key is a cluster\n" - " key!\n\n" + " master - Set KEY as a cluster master key\n" + " - Set KEY as a cluster key\n" + " nodeid NODEID - Set KEY as a per-node key for own or peer\n\n" + "REKEYING\n" + " INTERVAL - Set rekeying interval (in minutes) [0: disable]\n" + " now - Trigger one (first) rekeying immediately\n\n" "EXAMPLES\n" - " %s node set key this_is_a_key16_salt algname \"gcm(aes)\" nodeid node1\n" - " %s node set key 0x746869735F69735F615F6B657931365F73616C74 nodeid node2\n\n", - cmdl->argv[0], cmdl->argv[0], cmdl->argv[0]); + " %s node set key this_is_a_master_key master\n" + " %s node set key 0x746869735F69735F615F6B657931365F73616C74\n" + " %s node set key this_is_a_key16_salt algname \"gcm(aes)\" nodeid 1001002\n" + " %s node set key rekeying 600\n\n", + cmdl->argv[0], cmdl->argv[0], cmdl->argv[0], cmdl->argv[0], + cmdl->argv[0], cmdl->argv[0]); } static int cmd_node_set_key(struct nlmsghdr *nlh, const struct cmd *cmd, @@ -187,35 +195,49 @@ static int cmd_node_set_key(struct nlmsghdr *nlh, const struct cmd *cmd, struct opt opts[] = { { "algname", OPT_KEYVAL, NULL }, { "nodeid", OPT_KEYVAL, NULL }, + { "master", OPT_KEY, NULL }, + { "rekeying", OPT_KEYVAL, NULL }, { NULL } }; struct nlattr *nest; - struct opt *opt_algname, *opt_nodeid; + struct opt *opt_algname, *opt_nodeid, *opt_master, *opt_rekeying; char buf[MNL_SOCKET_BUFFER_SIZE]; uint8_t id[TIPC_NODEID_LEN] = {0,}; + uint32_t rekeying = 0; + bool has_key = false; int keysize; char *str; - if (help_flag) { + if (help_flag || cmdl->optind >= cmdl->argc) { (cmd->help)(cmdl); return -EINVAL; } - if (cmdl->optind >= cmdl->argc) { - fprintf(stderr, "error, missing key\n"); - return -EINVAL; - } + /* Check if command starts with opts i.e. "rekeying" opt without key */ + if (find_opt(opts, cmdl->argv[cmdl->optind])) + goto get_ops; /* Get user key */ + has_key = true; str = shift_cmdl(cmdl); if (str2key(str, &input.key)) { fprintf(stderr, "error, invalid key input\n"); return -EINVAL; } +get_ops: if (parse_opts(opts, cmdl) < 0) return -EINVAL; + /* Get rekeying time */ + opt_rekeying = get_opt(opts, "rekeying"); + if (opt_rekeying) { + if (!strcmp(opt_rekeying->val, "now")) + rekeying = TIPC_REKEYING_NOW; + else + rekeying = atoi(opt_rekeying->val); + } + /* Get algorithm name, default: "gcm(aes)" */ opt_algname = get_opt(opts, "algname"); if (!opt_algname) @@ -230,17 +252,34 @@ static int cmd_node_set_key(struct nlmsghdr *nlh, const struct cmd *cmd, return -EINVAL; } + /* Get master key indication */ + opt_master = get_opt(opts, "master"); + + /* Sanity check if wrong option */ + if (opt_nodeid && opt_master) { + fprintf(stderr, "error, per-node key cannot be master\n"); + return -EINVAL; + } + /* Init & do the command */ nlh = msg_init(buf, TIPC_NL_KEY_SET); if (!nlh) { fprintf(stderr, "error, message initialisation failed\n"); return -1; } + nest = mnl_attr_nest_start(nlh, TIPC_NLA_NODE); - keysize = tipc_aead_key_size(&input.key); - mnl_attr_put(nlh, TIPC_NLA_NODE_KEY, keysize, &input.key); - if (opt_nodeid) - mnl_attr_put(nlh, TIPC_NLA_NODE_ID, TIPC_NODEID_LEN, id); + if (has_key) { + keysize = tipc_aead_key_size(&input.key); + mnl_attr_put(nlh, TIPC_NLA_NODE_KEY, keysize, &input.key); + if (opt_nodeid) + mnl_attr_put(nlh, TIPC_NLA_NODE_ID, TIPC_NODEID_LEN, id); + if (opt_master) + mnl_attr_put(nlh, TIPC_NLA_NODE_KEY_MASTER, 0, NULL); + } + if (opt_rekeying) + mnl_attr_put_u32(nlh, TIPC_NLA_NODE_REKEYING, rekeying); + mnl_attr_nest_end(nlh, nest); return msg_doit(nlh, NULL, NULL); } diff --git a/tipc/peer.c b/tipc/peer.c index f638077..f14ec35 100644 --- a/tipc/peer.c +++ b/tipc/peer.c @@ -59,17 +59,68 @@ static int cmd_peer_rm_addr(struct nlmsghdr *nlh, const struct cmd *cmd, return msg_doit(nlh, NULL, NULL); } +static int cmd_peer_rm_nodeid(struct nlmsghdr *nlh, const struct cmd *cmd, + struct cmdl *cmdl, void *data) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + __u8 id[16] = {0,}; + __u64 *w0 = (__u64 *)&id[0]; + __u64 *w1 = (__u64 *)&id[8]; + struct nlattr *nest; + char *str; + + if (cmdl->argc != cmdl->optind + 1) { + fprintf(stderr, "Usage: %s peer remove identity NODEID\n", + cmdl->argv[0]); + return -EINVAL; + } + + str = shift_cmdl(cmdl); + if (str2nodeid(str, id)) { + fprintf(stderr, "Invalid node identity\n"); + return -EINVAL; + } + + nlh = msg_init(buf, TIPC_NL_PEER_REMOVE); + if (!nlh) { + fprintf(stderr, "error, message initialisation failed\n"); + return -1; + } + + nest = mnl_attr_nest_start(nlh, TIPC_NLA_NET); + mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID, *w0); + mnl_attr_put_u64(nlh, TIPC_NLA_NET_NODEID_W1, *w1); + mnl_attr_nest_end(nlh, nest); + + return msg_doit(nlh, NULL, NULL); +} + static void cmd_peer_rm_help(struct cmdl *cmdl) { + fprintf(stderr, "Usage: %s peer remove PROPERTY\n\n" + "PROPERTIES\n" + " identity NODEID - Remove peer node identity\n", + cmdl->argv[0]); +} + +static void cmd_peer_rm_addr_help(struct cmdl *cmdl) +{ fprintf(stderr, "Usage: %s peer remove address ADDRESS\n", cmdl->argv[0]); } +static void cmd_peer_rm_nodeid_help(struct cmdl *cmdl) +{ + fprintf(stderr, "Usage: %s peer remove identity NODEID\n", + cmdl->argv[0]); +} + static int cmd_peer_rm(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl, void *data) { const struct cmd cmds[] = { - { "address", cmd_peer_rm_addr, cmd_peer_rm_help }, + { "address", cmd_peer_rm_addr, cmd_peer_rm_addr_help }, + { "identity", cmd_peer_rm_nodeid, cmd_peer_rm_nodeid_help }, { NULL } }; diff --git a/vdpa/.gitignore b/vdpa/.gitignore new file mode 100644 index 0000000..7ef2878 --- /dev/null +++ b/vdpa/.gitignore @@ -0,0 +1 @@ +vdpa diff --git a/vdpa/Makefile b/vdpa/Makefile new file mode 100644 index 0000000..253e20a --- /dev/null +++ b/vdpa/Makefile @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../config.mk + +TARGETS := + +ifeq ($(HAVE_MNL),y) + +CFLAGS += -I./include/uapi/ +VDPAOBJ = vdpa.o +TARGETS += vdpa + +endif + +all: $(TARGETS) $(LIBS) + +vdpa: $(VDPAOBJ) + $(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@ + +install: all + for i in $(TARGETS); \ + do install -m 0755 $$i $(DESTDIR)$(SBINDIR); \ + done + +clean: + rm -f $(VDPAOBJ) $(TARGETS) diff --git a/vdpa/include/uapi/linux/vdpa.h b/vdpa/include/uapi/linux/vdpa.h new file mode 100644 index 0000000..37ae26b --- /dev/null +++ b/vdpa/include/uapi/linux/vdpa.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * vdpa device management interface + * Copyright (c) 2020 Mellanox Technologies Ltd. All rights reserved. + */ + +#ifndef _LINUX_VDPA_H_ +#define _LINUX_VDPA_H_ + +#define VDPA_GENL_NAME "vdpa" +#define VDPA_GENL_VERSION 0x1 + +enum vdpa_command { + VDPA_CMD_UNSPEC, + VDPA_CMD_MGMTDEV_NEW, + VDPA_CMD_MGMTDEV_GET, /* can dump */ + VDPA_CMD_DEV_NEW, + VDPA_CMD_DEV_DEL, + VDPA_CMD_DEV_GET, /* can dump */ +}; + +enum vdpa_attr { + VDPA_ATTR_UNSPEC, + + /* bus name (optional) + dev name together make the parent device handle */ + VDPA_ATTR_MGMTDEV_BUS_NAME, /* string */ + VDPA_ATTR_MGMTDEV_DEV_NAME, /* string */ + VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES, /* u64 */ + + VDPA_ATTR_DEV_NAME, /* string */ + VDPA_ATTR_DEV_ID, /* u32 */ + VDPA_ATTR_DEV_VENDOR_ID, /* u32 */ + VDPA_ATTR_DEV_MAX_VQS, /* u32 */ + VDPA_ATTR_DEV_MAX_VQ_SIZE, /* u16 */ + + /* new attributes must be added above here */ + VDPA_ATTR_MAX, +}; + +#endif diff --git a/vdpa/include/uapi/linux/virtio_ids.h b/vdpa/include/uapi/linux/virtio_ids.h new file mode 100644 index 0000000..bc1c062 --- /dev/null +++ b/vdpa/include/uapi/linux/virtio_ids.h @@ -0,0 +1,58 @@ +#ifndef _LINUX_VIRTIO_IDS_H +#define _LINUX_VIRTIO_IDS_H +/* + * Virtio IDs + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ + +#define VIRTIO_ID_NET 1 /* virtio net */ +#define VIRTIO_ID_BLOCK 2 /* virtio block */ +#define VIRTIO_ID_CONSOLE 3 /* virtio console */ +#define VIRTIO_ID_RNG 4 /* virtio rng */ +#define VIRTIO_ID_BALLOON 5 /* virtio balloon */ +#define VIRTIO_ID_IOMEM 6 /* virtio ioMemory */ +#define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ +#define VIRTIO_ID_SCSI 8 /* virtio scsi */ +#define VIRTIO_ID_9P 9 /* 9p virtio console */ +#define VIRTIO_ID_MAC80211_WLAN 10 /* virtio WLAN MAC */ +#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ +#define VIRTIO_ID_CAIF 12 /* Virtio caif */ +#define VIRTIO_ID_MEMORY_BALLOON 13 /* virtio memory balloon */ +#define VIRTIO_ID_GPU 16 /* virtio GPU */ +#define VIRTIO_ID_CLOCK 17 /* virtio clock/timer */ +#define VIRTIO_ID_INPUT 18 /* virtio input */ +#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ +#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ +#define VIRTIO_ID_SIGNAL_DIST 21 /* virtio signal distribution device */ +#define VIRTIO_ID_PSTORE 22 /* virtio pstore device */ +#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ +#define VIRTIO_ID_MEM 24 /* virtio mem */ +#define VIRTIO_ID_FS 26 /* virtio filesystem */ +#define VIRTIO_ID_PMEM 27 /* virtio pmem */ +#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ + +#endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/vdpa/vdpa.c b/vdpa/vdpa.c new file mode 100644 index 0000000..7fdb36b --- /dev/null +++ b/vdpa/vdpa.c @@ -0,0 +1,675 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "mnl_utils.h" + +#include "version.h" +#include "json_print.h" +#include "utils.h" + +#define VDPA_OPT_MGMTDEV_HANDLE BIT(0) +#define VDPA_OPT_VDEV_MGMTDEV_HANDLE BIT(1) +#define VDPA_OPT_VDEV_NAME BIT(2) +#define VDPA_OPT_VDEV_HANDLE BIT(3) + +struct vdpa_opts { + uint64_t present; /* flags of present items */ + char *mdev_bus_name; + char *mdev_name; + const char *vdev_name; + unsigned int device_id; +}; + +struct vdpa { + struct mnlu_gen_socket nlg; + struct vdpa_opts opts; + bool json_output; + struct indent_mem *indent; +}; + +static void pr_out_section_start(struct vdpa *vdpa, const char *name) +{ + open_json_object(NULL); + open_json_object(name); +} + +static void pr_out_section_end(struct vdpa *vdpa) +{ + close_json_object(); + close_json_object(); +} + +static void pr_out_array_start(struct vdpa *vdpa, const char *name) +{ + if (!vdpa->json_output) { + print_nl(); + inc_indent(vdpa->indent); + print_indent(vdpa->indent); + } + open_json_array(PRINT_ANY, name); +} + +static void pr_out_array_end(struct vdpa *vdpa) +{ + close_json_array(PRINT_JSON, NULL); + if (!vdpa->json_output) + dec_indent(vdpa->indent); +} + +static const enum mnl_attr_data_type vdpa_policy[VDPA_ATTR_MAX + 1] = { + [VDPA_ATTR_MGMTDEV_BUS_NAME] = MNL_TYPE_NUL_STRING, + [VDPA_ATTR_MGMTDEV_DEV_NAME] = MNL_TYPE_NUL_STRING, + [VDPA_ATTR_DEV_NAME] = MNL_TYPE_STRING, + [VDPA_ATTR_DEV_ID] = MNL_TYPE_U32, + [VDPA_ATTR_DEV_VENDOR_ID] = MNL_TYPE_U32, + [VDPA_ATTR_DEV_MAX_VQS] = MNL_TYPE_U32, + [VDPA_ATTR_DEV_MAX_VQ_SIZE] = MNL_TYPE_U16, +}; + +static int attr_cb(const struct nlattr *attr, void *data) +{ + const struct nlattr **tb = data; + int type; + + if (mnl_attr_type_valid(attr, VDPA_ATTR_MAX) < 0) + return MNL_CB_OK; + + type = mnl_attr_get_type(attr); + if (mnl_attr_validate(attr, vdpa_policy[type]) < 0) + return MNL_CB_ERROR; + + tb[type] = attr; + return MNL_CB_OK; +} + +static int vdpa_argv_handle(struct vdpa *vdpa, int argc, char **argv, + char **p_mdev_bus_name, + char **p_mdev_name) +{ + unsigned int slashcount; + char *str; + + if (argc <= 0 || *argv == NULL) { + fprintf(stderr, + "vdpa identification (\"mgmtdev_bus_name/mgmtdev_name\") expected\n"); + return -EINVAL; + } + str = *argv; + slashcount = get_str_char_count(str, '/'); + if (slashcount > 1) { + fprintf(stderr, + "Wrong vdpa mgmtdev identification string format\n"); + fprintf(stderr, "Expected \"mgmtdev_bus_name/mgmtdev_name\"\n"); + fprintf(stderr, "Expected \"mgmtdev_name\"\n"); + return -EINVAL; + } + switch (slashcount) { + case 0: + *p_mdev_bus_name = NULL; + *p_mdev_name = str; + return 0; + case 1: + str_split_by_char(str, p_mdev_bus_name, p_mdev_name, '/'); + return 0; + default: + return -EINVAL; + } +} + +static int vdpa_argv_str(struct vdpa *vdpa, int argc, char **argv, + const char **p_str) +{ + if (argc <= 0 || *argv == NULL) { + fprintf(stderr, "String parameter expected\n"); + return -EINVAL; + } + *p_str = *argv; + return 0; +} + +struct vdpa_args_metadata { + uint64_t o_flag; + const char *err_msg; +}; + +static const struct vdpa_args_metadata vdpa_args_required[] = { + {VDPA_OPT_VDEV_MGMTDEV_HANDLE, "management device handle not set."}, + {VDPA_OPT_VDEV_NAME, "device name is not set."}, + {VDPA_OPT_VDEV_HANDLE, "device name is not set."}, +}; + +static int vdpa_args_finding_required_validate(uint64_t o_required, + uint64_t o_found) +{ + uint64_t o_flag; + int i; + + for (i = 0; i < ARRAY_SIZE(vdpa_args_required); i++) { + o_flag = vdpa_args_required[i].o_flag; + if ((o_required & o_flag) && !(o_found & o_flag)) { + fprintf(stderr, "%s\n", vdpa_args_required[i].err_msg); + return -EINVAL; + } + } + if (o_required & ~o_found) { + fprintf(stderr, + "BUG: unknown argument required but not found\n"); + return -EINVAL; + } + return 0; +} + +static void vdpa_opts_put(struct nlmsghdr *nlh, struct vdpa *vdpa) +{ + struct vdpa_opts *opts = &vdpa->opts; + + if ((opts->present & VDPA_OPT_MGMTDEV_HANDLE) || + (opts->present & VDPA_OPT_VDEV_MGMTDEV_HANDLE)) { + if (opts->mdev_bus_name) + mnl_attr_put_strz(nlh, VDPA_ATTR_MGMTDEV_BUS_NAME, + opts->mdev_bus_name); + mnl_attr_put_strz(nlh, VDPA_ATTR_MGMTDEV_DEV_NAME, + opts->mdev_name); + } + if ((opts->present & VDPA_OPT_VDEV_NAME) || + (opts->present & VDPA_OPT_VDEV_HANDLE)) + mnl_attr_put_strz(nlh, VDPA_ATTR_DEV_NAME, opts->vdev_name); +} + +static int vdpa_argv_parse(struct vdpa *vdpa, int argc, char **argv, + uint64_t o_required) +{ + struct vdpa_opts *opts = &vdpa->opts; + uint64_t o_all = o_required; + uint64_t o_found = 0; + int err; + + if (o_required & VDPA_OPT_MGMTDEV_HANDLE) { + err = vdpa_argv_handle(vdpa, argc, argv, &opts->mdev_bus_name, + &opts->mdev_name); + if (err) + return err; + + NEXT_ARG_FWD(); + o_found |= VDPA_OPT_MGMTDEV_HANDLE; + } else if (o_required & VDPA_OPT_VDEV_HANDLE) { + err = vdpa_argv_str(vdpa, argc, argv, &opts->vdev_name); + if (err) + return err; + + NEXT_ARG_FWD(); + o_found |= VDPA_OPT_VDEV_HANDLE; + } + + while (NEXT_ARG_OK()) { + if ((matches(*argv, "name") == 0) && + (o_all & VDPA_OPT_VDEV_NAME)) { + const char *namestr; + + NEXT_ARG_FWD(); + err = vdpa_argv_str(vdpa, argc, argv, &namestr); + if (err) + return err; + opts->vdev_name = namestr; + NEXT_ARG_FWD(); + o_found |= VDPA_OPT_VDEV_NAME; + } else if ((matches(*argv, "mgmtdev") == 0) && + (o_all & VDPA_OPT_VDEV_MGMTDEV_HANDLE)) { + NEXT_ARG_FWD(); + err = vdpa_argv_handle(vdpa, argc, argv, + &opts->mdev_bus_name, + &opts->mdev_name); + if (err) + return err; + + NEXT_ARG_FWD(); + o_found |= VDPA_OPT_VDEV_MGMTDEV_HANDLE; + } else { + fprintf(stderr, "Unknown option \"%s\"\n", *argv); + return -EINVAL; + } + } + + opts->present = o_found; + + return vdpa_args_finding_required_validate(o_required, o_found); +} + +static int vdpa_argv_parse_put(struct nlmsghdr *nlh, struct vdpa *vdpa, + int argc, char **argv, + uint64_t o_required) +{ + int err; + + err = vdpa_argv_parse(vdpa, argc, argv, o_required); + if (err) + return err; + vdpa_opts_put(nlh, vdpa); + return 0; +} + +static void cmd_mgmtdev_help(void) +{ + fprintf(stderr, "Usage: vdpa mgmtdev show [ DEV ]\n"); +} + +static void pr_out_handle_start(struct vdpa *vdpa, struct nlattr **tb) +{ + const char *mdev_bus_name = NULL; + const char *mdev_name; + SPRINT_BUF(buf); + + mdev_name = mnl_attr_get_str(tb[VDPA_ATTR_MGMTDEV_DEV_NAME]); + if (tb[VDPA_ATTR_MGMTDEV_BUS_NAME]) { + mdev_bus_name = mnl_attr_get_str(tb[VDPA_ATTR_MGMTDEV_BUS_NAME]); + sprintf(buf, "%s/%s", mdev_bus_name, mdev_name); + } else { + sprintf(buf, "%s", mdev_name); + } + + if (vdpa->json_output) + open_json_object(buf); + else + printf("%s: ", buf); +} + +static void pr_out_handle_end(struct vdpa *vdpa) +{ + if (vdpa->json_output) + close_json_object(); + else + print_nl(); +} + +static void __pr_out_vdev_handle_start(struct vdpa *vdpa, const char *vdev_name) +{ + SPRINT_BUF(buf); + + sprintf(buf, "%s", vdev_name); + if (vdpa->json_output) + open_json_object(buf); + else + printf("%s: ", buf); +} + +static void pr_out_vdev_handle_start(struct vdpa *vdpa, struct nlattr **tb) +{ + const char *vdev_name; + + vdev_name = mnl_attr_get_str(tb[VDPA_ATTR_DEV_NAME]); + __pr_out_vdev_handle_start(vdpa, vdev_name); +} + +static void pr_out_vdev_handle_end(struct vdpa *vdpa) +{ + if (vdpa->json_output) + close_json_object(); + else + print_nl(); +} + +static struct str_num_map class_map[] = { + { .str = "net", .num = VIRTIO_ID_NET }, + { .str = "block", .num = VIRTIO_ID_BLOCK }, + { .str = NULL, }, +}; + +static const char *parse_class(int num) +{ + const char *class; + + class = str_map_lookup_uint(class_map, num); + return class ? class : "< unknown class >"; +} + +static void pr_out_mgmtdev_show(struct vdpa *vdpa, const struct nlmsghdr *nlh, + struct nlattr **tb) +{ + const char *class; + unsigned int i; + + pr_out_handle_start(vdpa, tb); + + if (tb[VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES]) { + uint64_t classes = mnl_attr_get_u64(tb[VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES]); + + pr_out_array_start(vdpa, "supported_classes"); + + for (i = 1; i < 64; i++) { + if ((classes & (1ULL << i)) == 0) + continue; + + class = parse_class(i); + print_string(PRINT_ANY, NULL, " %s", class); + } + pr_out_array_end(vdpa); + } + + pr_out_handle_end(vdpa); +} + +static int cmd_mgmtdev_show_cb(const struct nlmsghdr *nlh, void *data) +{ + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + struct nlattr *tb[VDPA_ATTR_MAX + 1] = {}; + struct vdpa *vdpa = data; + + mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); + + if (!tb[VDPA_ATTR_MGMTDEV_DEV_NAME]) + return MNL_CB_ERROR; + + pr_out_mgmtdev_show(vdpa, nlh, tb); + + return MNL_CB_OK; +} + +static int cmd_mgmtdev_show(struct vdpa *vdpa, int argc, char **argv) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK; + struct nlmsghdr *nlh; + int err; + + if (argc == 0) + flags |= NLM_F_DUMP; + + nlh = mnlu_gen_socket_cmd_prepare(&vdpa->nlg, VDPA_CMD_MGMTDEV_GET, + flags); + if (argc > 0) { + err = vdpa_argv_parse_put(nlh, vdpa, argc, argv, + VDPA_OPT_MGMTDEV_HANDLE); + if (err) + return err; + } + + pr_out_section_start(vdpa, "mgmtdev"); + err = mnlu_gen_socket_sndrcv(&vdpa->nlg, nlh, cmd_mgmtdev_show_cb, vdpa); + pr_out_section_end(vdpa); + return err; +} + +static int cmd_mgmtdev(struct vdpa *vdpa, int argc, char **argv) +{ + if (!argc || matches(*argv, "help") == 0) { + cmd_mgmtdev_help(); + return 0; + } else if (matches(*argv, "show") == 0 || + matches(*argv, "list") == 0) { + return cmd_mgmtdev_show(vdpa, argc - 1, argv + 1); + } + fprintf(stderr, "Command \"%s\" not found\n", *argv); + return -ENOENT; +} + +static void cmd_dev_help(void) +{ + fprintf(stderr, "Usage: vdpa dev show [ DEV ]\n"); + fprintf(stderr, " vdpa dev add name NAME mgmtdev MANAGEMENTDEV\n"); + fprintf(stderr, " vdpa dev del DEV\n"); +} + +static const char *device_type_name(uint32_t type) +{ + switch (type) { + case 0x1: return "network"; + case 0x2: return "block"; + default: return ""; + } +} + +static void pr_out_dev(struct vdpa *vdpa, struct nlattr **tb) +{ + const char *mdev_name = mnl_attr_get_str(tb[VDPA_ATTR_MGMTDEV_DEV_NAME]); + uint32_t device_id = mnl_attr_get_u32(tb[VDPA_ATTR_DEV_ID]); + const char *mdev_bus_name = NULL; + char mgmtdev_buf[128]; + + if (tb[VDPA_ATTR_MGMTDEV_BUS_NAME]) + mdev_bus_name = mnl_attr_get_str(tb[VDPA_ATTR_MGMTDEV_BUS_NAME]); + + if (mdev_bus_name) + sprintf(mgmtdev_buf, "%s/%s", mdev_bus_name, mdev_name); + else + sprintf(mgmtdev_buf, "%s", mdev_name); + pr_out_vdev_handle_start(vdpa, tb); + print_string(PRINT_ANY, "type", "type %s", device_type_name(device_id)); + print_string(PRINT_ANY, "mgmtdev", " mgmtdev %s", mgmtdev_buf); + + if (tb[VDPA_ATTR_DEV_VENDOR_ID]) + print_uint(PRINT_ANY, "vendor_id", " vendor_id %u", + mnl_attr_get_u32(tb[VDPA_ATTR_DEV_VENDOR_ID])); + if (tb[VDPA_ATTR_DEV_MAX_VQS]) + print_uint(PRINT_ANY, "max_vqs", " max_vqs %u", + mnl_attr_get_u32(tb[VDPA_ATTR_DEV_MAX_VQS])); + if (tb[VDPA_ATTR_DEV_MAX_VQ_SIZE]) + print_uint(PRINT_ANY, "max_vq_size", " max_vq_size %u", + mnl_attr_get_u16(tb[VDPA_ATTR_DEV_MAX_VQ_SIZE])); + pr_out_vdev_handle_end(vdpa); +} + +static int cmd_dev_show_cb(const struct nlmsghdr *nlh, void *data) +{ + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + struct nlattr *tb[VDPA_ATTR_MAX + 1] = {}; + struct vdpa *vdpa = data; + + mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); + if (!tb[VDPA_ATTR_MGMTDEV_DEV_NAME] || + !tb[VDPA_ATTR_DEV_NAME] || !tb[VDPA_ATTR_DEV_ID]) + return MNL_CB_ERROR; + pr_out_dev(vdpa, tb); + return MNL_CB_OK; +} + +static int cmd_dev_show(struct vdpa *vdpa, int argc, char **argv) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK; + struct nlmsghdr *nlh; + int err; + + if (argc <= 0) + flags |= NLM_F_DUMP; + + nlh = mnlu_gen_socket_cmd_prepare(&vdpa->nlg, VDPA_CMD_DEV_GET, flags); + if (argc > 0) { + err = vdpa_argv_parse_put(nlh, vdpa, argc, argv, + VDPA_OPT_VDEV_HANDLE); + if (err) + return err; + } + + pr_out_section_start(vdpa, "dev"); + err = mnlu_gen_socket_sndrcv(&vdpa->nlg, nlh, cmd_dev_show_cb, vdpa); + pr_out_section_end(vdpa); + return err; +} + +static int cmd_dev_add(struct vdpa *vdpa, int argc, char **argv) +{ + struct nlmsghdr *nlh; + int err; + + nlh = mnlu_gen_socket_cmd_prepare(&vdpa->nlg, VDPA_CMD_DEV_NEW, + NLM_F_REQUEST | NLM_F_ACK); + err = vdpa_argv_parse_put(nlh, vdpa, argc, argv, + VDPA_OPT_VDEV_MGMTDEV_HANDLE | VDPA_OPT_VDEV_NAME); + if (err) + return err; + + return mnlu_gen_socket_sndrcv(&vdpa->nlg, nlh, NULL, NULL); +} + +static int cmd_dev_del(struct vdpa *vdpa, int argc, char **argv) +{ + struct nlmsghdr *nlh; + int err; + + nlh = mnlu_gen_socket_cmd_prepare(&vdpa->nlg, VDPA_CMD_DEV_DEL, + NLM_F_REQUEST | NLM_F_ACK); + err = vdpa_argv_parse_put(nlh, vdpa, argc, argv, VDPA_OPT_VDEV_HANDLE); + if (err) + return err; + + return mnlu_gen_socket_sndrcv(&vdpa->nlg, nlh, NULL, NULL); +} + +static int cmd_dev(struct vdpa *vdpa, int argc, char **argv) +{ + if (!argc) + return cmd_dev_show(vdpa, argc - 1, argv + 1); + + if (matches(*argv, "help") == 0) { + cmd_dev_help(); + return 0; + } else if (matches(*argv, "show") == 0 || + matches(*argv, "list") == 0) { + return cmd_dev_show(vdpa, argc - 1, argv + 1); + } else if (matches(*argv, "add") == 0) { + return cmd_dev_add(vdpa, argc - 1, argv + 1); + } else if (matches(*argv, "del") == 0) { + return cmd_dev_del(vdpa, argc - 1, argv + 1); + } + fprintf(stderr, "Command \"%s\" not found\n", *argv); + return -ENOENT; +} + +static void help(void) +{ + fprintf(stderr, + "Usage: vdpa [ OPTIONS ] OBJECT { COMMAND | help }\n" + "where OBJECT := { mgmtdev | dev }\n" + " OPTIONS := { -V[ersion] | -n[o-nice-names] | -j[son] | -p[retty] | -v[erbose] }\n"); +} + +static int vdpa_cmd(struct vdpa *vdpa, int argc, char **argv) +{ + if (!argc || matches(*argv, "help") == 0) { + help(); + return 0; + } else if (matches(*argv, "mgmtdev") == 0) { + return cmd_mgmtdev(vdpa, argc - 1, argv + 1); + } else if (matches(*argv, "dev") == 0) { + return cmd_dev(vdpa, argc - 1, argv + 1); + } + fprintf(stderr, "Object \"%s\" not found\n", *argv); + return -ENOENT; +} + +static int vdpa_init(struct vdpa *vdpa) +{ + int err; + + err = mnlu_gen_socket_open(&vdpa->nlg, VDPA_GENL_NAME, + VDPA_GENL_VERSION); + if (err) { + fprintf(stderr, "Failed to connect to vdpa Netlink\n"); + return -errno; + } + new_json_obj_plain(vdpa->json_output); + return 0; +} + +static void vdpa_fini(struct vdpa *vdpa) +{ + delete_json_obj_plain(); + mnlu_gen_socket_close(&vdpa->nlg); +} + +static struct vdpa *vdpa_alloc(void) +{ + struct vdpa *vdpa = calloc(1, sizeof(struct vdpa)); + + if (!vdpa) + return NULL; + + vdpa->indent = alloc_indent_mem(); + if (!vdpa->indent) + goto indent_err; + + return vdpa; + +indent_err: + free(vdpa); + return NULL; +} + +static void vdpa_free(struct vdpa *vdpa) +{ + free_indent_mem(vdpa->indent); + free(vdpa); +} + +int main(int argc, char **argv) +{ + static const struct option long_options[] = { + { "Version", no_argument, NULL, 'V' }, + { "json", no_argument, NULL, 'j' }, + { "pretty", no_argument, NULL, 'p' }, + { "help", no_argument, NULL, 'h' }, + { NULL, 0, NULL, 0 } + }; + struct vdpa *vdpa; + int opt; + int err; + int ret; + + vdpa = vdpa_alloc(); + if (!vdpa) { + fprintf(stderr, "Failed to allocate memory for vdpa\n"); + return EXIT_FAILURE; + } + + while ((opt = getopt_long(argc, argv, "Vjpsh", long_options, NULL)) >= 0) { + switch (opt) { + case 'V': + printf("vdpa utility, iproute2-%s\n", version); + ret = EXIT_SUCCESS; + goto vdpa_free; + case 'j': + vdpa->json_output = true; + break; + case 'p': + pretty = true; + break; + case 'h': + help(); + ret = EXIT_SUCCESS; + goto vdpa_free; + default: + fprintf(stderr, "Unknown option.\n"); + help(); + ret = EXIT_FAILURE; + goto vdpa_free; + } + } + + argc -= optind; + argv += optind; + + err = vdpa_init(vdpa); + if (err) { + ret = EXIT_FAILURE; + goto vdpa_free; + } + + err = vdpa_cmd(vdpa, argc, argv); + if (err) { + ret = EXIT_FAILURE; + goto vdpa_fini; + } + + ret = EXIT_SUCCESS; + +vdpa_fini: + vdpa_fini(vdpa); +vdpa_free: + vdpa_free(vdpa); + return ret; +}