[lxc-devel] [lxc/master] network: improve veth device creation

brauner on Github lxc-bot at linuxcontainers.org
Wed Jan 8 16:03:03 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 588 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200108/524d3abf/attachment.bin>
-------------- next part --------------
From f78d30d14fb60016b6915ccb885b1f9de1a2d29d Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 8 Jan 2020 14:17:06 +0100
Subject: [PATCH 1/2] start: move network device creation after setting ids

This will ensure that ownership of the devices is correct.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/start.c | 57 +++++++++++++++++++++++++------------------------
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/src/lxc/start.c b/src/lxc/start.c
index d62acf4bd8..a87c3ff935 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -1169,34 +1169,6 @@ static int do_start(void *data)
 	if (ret < 0)
 		goto out_warn_father;
 
-	/* Unshare CLONE_NEWNET after CLONE_NEWUSER. See
-	 * https://github.com/lxc/lxd/issues/1978.
-	 */
-	if ((handler->ns_clone_flags & (CLONE_NEWNET | CLONE_NEWUSER)) ==
-	    (CLONE_NEWNET | CLONE_NEWUSER)) {
-		ret = unshare(CLONE_NEWNET);
-		if (ret < 0) {
-			SYSERROR("Failed to unshare CLONE_NEWNET");
-			goto out_warn_father;
-		}
-		INFO("Unshared CLONE_NEWNET");
-	}
-
-	/* Tell the parent task it can begin to configure the container and wait
-	 * for it to finish.
-	 */
-	ret = lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE);
-	if (ret < 0)
-		goto out_error;
-
-	if (handler->ns_clone_flags & CLONE_NEWNET) {
-		ret = lxc_network_recv_from_parent(handler);
-		if (ret < 0) {
-			ERROR("Failed to receive veth names from parent");
-			goto out_warn_father;
-		}
-	}
-
 	/* If we are in a new user namespace, become root there to have
 	 * privilege over our namespace.
 	 */
@@ -1230,6 +1202,35 @@ static int do_start(void *data)
 		}
 	}
 
+	/* Unshare CLONE_NEWNET after CLONE_NEWUSER. See
+	 * https://github.com/lxc/lxd/issues/1978.
+	 */
+	if ((handler->ns_clone_flags & (CLONE_NEWNET | CLONE_NEWUSER)) ==
+	    (CLONE_NEWNET | CLONE_NEWUSER)) {
+		ret = unshare(CLONE_NEWNET);
+		if (ret < 0) {
+			SYSERROR("Failed to unshare CLONE_NEWNET");
+			goto out_warn_father;
+		}
+		INFO("Unshared CLONE_NEWNET");
+	}
+
+	/* Tell the parent task it can begin to configure the container and wait
+	 * for it to finish.
+	 */
+	ret = lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE);
+	if (ret < 0)
+		goto out_error;
+
+	if (handler->ns_clone_flags & CLONE_NEWNET) {
+		ret = lxc_network_recv_from_parent(handler);
+		if (ret < 0) {
+			ERROR("Failed to receive veth names from parent");
+			goto out_warn_father;
+		}
+	}
+
+
 	ret = access(handler->lxcpath, X_OK);
 	if (ret != 0) {
 		print_top_failing_dir(handler->lxcpath);

From ad4d4fd81e6f38efb65d26d55e8ffca81855949e Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 8 Jan 2020 15:13:16 +0100
Subject: [PATCH 2/2] network: create veth directly in the container's
 namespace

This allows us to avoid having to move the network device. It also allows us to
work around a kernel bug that in combination with a recent change in systemd
244 causes uses of systemd-networkd to not get an ip address.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/cmd/lxc_user_nic.c |  18 ++---
 src/lxc/network.c          | 147 +++++++++++++++----------------------
 src/lxc/network.h          |   5 +-
 3 files changed, 68 insertions(+), 102 deletions(-)

diff --git a/src/lxc/cmd/lxc_user_nic.c b/src/lxc/cmd/lxc_user_nic.c
index cfb9d8d563..ae23621800 100644
--- a/src/lxc/cmd/lxc_user_nic.c
+++ b/src/lxc/cmd/lxc_user_nic.c
@@ -487,11 +487,11 @@ static char *find_line(char *buf_start, char *buf_end, char *name,
 	return NULL;
 }
 
-static int instantiate_veth(char *veth1, char *veth2)
+static int instantiate_veth(char *veth1, char *veth2, pid_t pid)
 {
 	int ret;
 
-	ret = lxc_veth_create(veth1, veth2);
+	ret = lxc_veth_create(veth1, veth2, pid);
 	if (ret < 0) {
 		errno = -ret;
 		CMD_SYSERROR("Failed to create %s-%s\n", veth1, veth2);
@@ -540,7 +540,7 @@ static int create_nic(char *nic, char *br, int pid, char **cnic)
 	}
 
 	/* create the nics */
-	ret = instantiate_veth(veth1buf, veth2buf);
+	ret = instantiate_veth(veth1buf, veth2buf, pid);
 	if (ret < 0) {
 		usernic_error("%s", "Error creating veth tunnel\n");
 		return -1;
@@ -550,14 +550,14 @@ static int create_nic(char *nic, char *br, int pid, char **cnic)
 		/* copy the bridge's mtu to both ends */
 		mtu = get_mtu(br);
 		if (mtu > 0) {
-			ret = lxc_netdev_set_mtu(veth1buf, mtu);
+			ret = lxc_netdev_set_mtu(veth1buf, mtu, -1);
 			if (ret < 0) {
 				usernic_error("Failed to set mtu to %d on %s\n",
 					      mtu, veth1buf);
 				goto out_del;
 			}
 
-			ret = lxc_netdev_set_mtu(veth2buf, mtu);
+			ret = lxc_netdev_set_mtu(veth2buf, mtu, pid);
 			if (ret < 0) {
 				usernic_error("Failed to set mtu to %d on %s\n",
 					      mtu, veth2buf);
@@ -573,14 +573,6 @@ static int create_nic(char *nic, char *br, int pid, char **cnic)
 		}
 	}
 
-	/* pass veth2 to target netns */
-	ret = lxc_netdev_move_by_name(veth2buf, pid, NULL);
-	if (ret < 0) {
-		usernic_error("Error moving %s to network namespace of %d\n",
-			      veth2buf, pid);
-		goto out_del;
-	}
-
 	*cnic = strdup(veth2buf);
 	if (!*cnic) {
 		usernic_error("Failed to copy string \"%s\"\n", veth2buf);
diff --git a/src/lxc/network.c b/src/lxc/network.c
index 65eca60e83..00bd649302 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -271,10 +271,9 @@ static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
 
 static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
 {
-	int bridge_index, err;
+	int err;
 	char *veth1, *veth2;
 	char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
-	unsigned int mtu = 0;
 
 	if (netdev->priv.veth_attr.pair[0] != '\0') {
 		veth1 = netdev->priv.veth_attr.pair;
@@ -299,13 +298,13 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
 
 	veth2 = lxc_mkifname(veth2buf);
 	if (!veth2)
-		goto out_delete;
+		return -1;
 
-	err = lxc_veth_create(veth1, veth2);
+	err = lxc_veth_create(veth1, veth2, handler->pid);
 	if (err) {
 		errno = -err;
 		SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
-		goto out_delete;
+		return -1;
 	}
 
 	strlcpy(netdev->created_name, veth2, IFNAMSIZ);
@@ -327,43 +326,36 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
 		goto out_delete;
 	}
 
-	/* Note that we're retrieving the container's ifindex in the host's
-	 * network namespace because we need it to move the device from the
-	 * host's network namespace to the container's network namespace later
-	 * on.
-	 */
-	netdev->ifindex = if_nametoindex(veth2);
-	if (!netdev->ifindex) {
-		ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
-		goto out_delete;
-	}
 
 	if (netdev->mtu) {
-		if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
+		if (lxc_safe_uint(netdev->mtu, &netdev->mtu_converted) < 0)
 			WARN("Failed to parse mtu");
 		else
-			INFO("Retrieved mtu %d", mtu);
+			INFO("Retrieved mtu %d", netdev->mtu_converted);
 	} else if (netdev->link[0] != '\0') {
-		bridge_index = if_nametoindex(netdev->link);
-		if (bridge_index) {
-			mtu = netdev_get_mtu(bridge_index);
-			INFO("Retrieved mtu %d from %s", mtu, netdev->link);
+		int ifindex_mtu;
+
+		ifindex_mtu = if_nametoindex(netdev->link);
+		if (ifindex_mtu) {
+			netdev->mtu_converted = netdev_get_mtu(ifindex_mtu);
+			INFO("Retrieved mtu %d from %s", netdev->mtu_converted, netdev->link);
 		} else {
-			mtu = netdev_get_mtu(netdev->ifindex);
-			INFO("Retrieved mtu %d from %s", mtu, veth2);
+			ifindex_mtu = if_nametoindex(veth1);
+			if (ifindex_mtu) {
+				netdev->mtu_converted = netdev_get_mtu(ifindex_mtu);
+				INFO("Retrieved mtu %d from %s", netdev->mtu_converted, veth1);
+			} else {
+				SYSWARN("Failed to retrieve mtu from %s", veth1);
+			}
 		}
 	}
 
-	if (mtu) {
-		err = lxc_netdev_set_mtu(veth1, mtu);
-		if (!err)
-			err = lxc_netdev_set_mtu(veth2, mtu);
-
+	if (netdev->mtu_converted) {
+		err = lxc_netdev_set_mtu(veth1, netdev->mtu_converted, -1);
 		if (err) {
-			errno = -err;
-			SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
-			         "and \"%s\"", mtu, veth1, veth2);
-			goto out_delete;
+                        errno = -err;
+                        SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", netdev->mtu_converted, veth1);
+                        goto out_delete;
 		}
 	}
 
@@ -484,14 +476,12 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
 			goto out_delete;
 	}
 
-	DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
-	      netdev->ifindex);
+	DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
 
 	return 0;
 
 out_delete:
-	if (netdev->ifindex != 0)
-		lxc_netdev_delete_by_name(veth1);
+	lxc_netdev_delete_by_name(veth1);
 	return -1;
 }
 
@@ -499,7 +489,6 @@ static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *n
 {
 	char peer[IFNAMSIZ];
 	int err;
-	unsigned int mtu = 0;
 
 	if (netdev->link[0] == '\0') {
 		ERROR("No link for macvlan network device specified");
@@ -531,19 +520,12 @@ static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *n
 	}
 
 	if (netdev->mtu) {
-		err = lxc_safe_uint(netdev->mtu, &mtu);
+		err = lxc_safe_uint(netdev->mtu, &netdev->mtu_converted);
 		if (err < 0) {
 			errno = -err;
 			SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
 			goto on_error;
 		}
-
-		err = lxc_netdev_set_mtu(peer, mtu);
-		if (err < 0) {
-			errno = -err;
-			SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
-			goto on_error;
-		}
 	}
 
 	if (netdev->upscript) {
@@ -661,7 +643,6 @@ static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *ne
 {
 	char peer[IFNAMSIZ];
 	int err;
-	unsigned int mtu = 0;
 
 	if (netdev->link[0] == '\0') {
 		ERROR("No link for ipvlan network device specified");
@@ -692,21 +673,13 @@ static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *ne
 	}
 
 	if (netdev->mtu) {
-		err = lxc_safe_uint(netdev->mtu, &mtu);
+		err = lxc_safe_uint(netdev->mtu, &netdev->mtu_converted);
 		if (err < 0) {
 			errno = -err;
 			SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
 				 netdev->mtu, peer);
 			goto on_error;
 		}
-
-		err = lxc_netdev_set_mtu(peer, mtu);
-		if (err < 0) {
-			errno = -err;
-			SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
-				 netdev->mtu, peer);
-			goto on_error;
-		}
 	}
 
 	if (netdev->upscript) {
@@ -737,7 +710,6 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd
 	char peer[IFNAMSIZ];
 	int err;
 	static uint16_t vlan_cntr = 0;
-	unsigned int mtu = 0;
 
 	if (netdev->link[0] == '\0') {
 		ERROR("No link for vlan network device specified");
@@ -766,21 +738,13 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd
 	}
 
 	if (netdev->mtu) {
-		err = lxc_safe_uint(netdev->mtu, &mtu);
+		err = lxc_safe_uint(netdev->mtu, &netdev->mtu_converted);
 		if (err < 0) {
 			errno = -err;
 			SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
 				 netdev->mtu, peer);
 			goto on_error;
 		}
-
-		err = lxc_netdev_set_mtu(peer, mtu);
-		if (err) {
-			errno = -err;
-			SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
-				 netdev->mtu, peer);
-			goto on_error;
-		}
 	}
 
 	if (netdev->upscript) {
@@ -810,7 +774,6 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd
 static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
 {
 	int err, mtu_orig = 0;
-	unsigned int mtu = 0;
 
 	if (netdev->link[0] == '\0') {
 		ERROR("No link for physical interface specified");
@@ -852,21 +815,13 @@ static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netd
 	netdev->priv.phys_attr.mtu = mtu_orig;
 
 	if (netdev->mtu) {
-		err = lxc_safe_uint(netdev->mtu, &mtu);
+		err = lxc_safe_uint(netdev->mtu, &netdev->mtu_converted);
 		if (err < 0) {
 			errno = -err;
 			SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
 				 netdev->mtu, netdev->link);
 			return -1;
 		}
-
-		err = lxc_netdev_set_mtu(netdev->link, mtu);
-		if (err < 0) {
-			errno = -err;
-			SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
-				 netdev->mtu, netdev->link);
-			return -1;
-		}
 	}
 
 	if (netdev->upscript) {
@@ -1640,9 +1595,9 @@ int netdev_get_mtu(int ifindex)
 	return err;
 }
 
-int lxc_netdev_set_mtu(const char *name, int mtu)
+int lxc_netdev_set_mtu(const char *name, int mtu, pid_t pid)
 {
-	int err, index, len;
+	int err, len;
 	struct ifinfomsg *ifi;
 	struct nl_handler nlh;
 	struct nlmsg *answer = NULL, *nlmsg = NULL;
@@ -1665,11 +1620,6 @@ int lxc_netdev_set_mtu(const char *name, int mtu)
 	if (!answer)
 		goto out;
 
-	err = -EINVAL;
-	index = if_nametoindex(name);
-	if (!index)
-		goto out;
-
 	nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
 	nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
 
@@ -1679,7 +1629,12 @@ int lxc_netdev_set_mtu(const char *name, int mtu)
 		goto out;
 	}
 	ifi->ifi_family = AF_UNSPEC;
-	ifi->ifi_index = index;
+
+	if (nla_put_string(nlmsg, IFLA_IFNAME, name))
+		goto out;
+
+	if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
+		goto out;
 
 	if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
 		goto out;
@@ -1702,7 +1657,7 @@ int lxc_netdev_down(const char *name)
 	return netdev_set_flag(name, 0);
 }
 
-int lxc_veth_create(const char *name1, const char *name2)
+int lxc_veth_create(const char *name1, const char *name2, pid_t pid)
 {
 	int err, len;
 	struct ifinfomsg *ifi;
@@ -1763,14 +1718,17 @@ int lxc_veth_create(const char *name1, const char *name2)
 		goto out;
 	}
 
-	if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
+	if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
 		goto out;
 
 	nla_end_nested(nlmsg, nest3);
 	nla_end_nested(nlmsg, nest2);
 	nla_end_nested(nlmsg, nest1);
 
-	if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
+	if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
+		goto out;
+
+	if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
 		goto out;
 
 	err = netlink_transaction(&nlh, nlmsg, answer);
@@ -3361,7 +3319,7 @@ bool lxc_delete_network_priv(struct lxc_handler *handler)
 				      netdev->link);
 
 				/* Restore original MTU */
-				ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
+				ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu, -1);
 				if (ret < 0) {
 					WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
 						netdev->link, netdev->priv.phys_attr.mtu);
@@ -3704,6 +3662,12 @@ static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
 	 */
 	(void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
 
+	if (netdev->mtu_converted) {
+		err = lxc_netdev_set_mtu(netdev->name, netdev->mtu_converted, -1);
+		if (err)
+			log_error_errno(-1, -err, "Failed to set mtu \"%d\" for \"%s\"", netdev->mtu_converted, netdev->name);
+	}
+
 	/* set a mac address */
 	if (netdev->hwaddr) {
 		if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
@@ -3900,6 +3864,10 @@ int lxc_network_send_to_child(struct lxc_handler *handler)
 		if (ret < 0)
 			return -1;
 
+		ret = lxc_send_nointr(data_sock, &netdev->mtu_converted, sizeof(netdev->mtu_converted), MSG_NOSIGNAL);
+		if (ret < 0)
+			return -1;
+
 		TRACE("Sent network device name \"%s\" to child", netdev->created_name);
 	}
 
@@ -3926,6 +3894,11 @@ int lxc_network_recv_from_parent(struct lxc_handler *handler)
 		ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
 		if (ret < 0)
 			return -1;
+
+		ret = lxc_recv_nointr(data_sock, &netdev->mtu_converted, sizeof(netdev->mtu_converted), 0);
+		if (ret < 0)
+			return -1;
+
 		TRACE("Received network device name \"%s\" from parent", netdev->created_name);
 	}
 
diff --git a/src/lxc/network.h b/src/lxc/network.h
index 025a56152e..c04088cd99 100644
--- a/src/lxc/network.h
+++ b/src/lxc/network.h
@@ -163,6 +163,7 @@ struct lxc_netdev {
 	char created_name[IFNAMSIZ];
 	char *hwaddr;
 	char *mtu;
+	unsigned int mtu_converted;
 	union netdev_p priv;
 	struct lxc_list ipv4;
 	struct lxc_list ipv6;
@@ -200,10 +201,10 @@ extern int lxc_netdev_up(const char *name);
 extern int lxc_netdev_down(const char *name);
 
 /* Change the mtu size for the specified device. */
-extern int lxc_netdev_set_mtu(const char *name, int mtu);
+extern int lxc_netdev_set_mtu(const char *name, int mtu, pid_t pid);
 
 /* Create a virtual network devices. */
-extern int lxc_veth_create(const char *name1, const char *name2);
+extern int lxc_veth_create(const char *name1, const char *name2, pid_t pid);
 extern int lxc_macvlan_create(const char *master, const char *name, int mode);
 extern int lxc_vlan_create(const char *master, const char *name,
 			   unsigned short vid);


More information about the lxc-devel mailing list