[lxc-devel] [lxc/master] network: improve veth device creation
brauner on Github
lxc-bot at linuxcontainers.org
Wed Jan 8 16:03:03 UTC 2020
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 588 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200108/524d3abf/attachment.bin>
-------------- next part --------------
From f78d30d14fb60016b6915ccb885b1f9de1a2d29d Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 8 Jan 2020 14:17:06 +0100
Subject: [PATCH 1/2] start: move network device creation after setting ids
This will ensure that ownership of the devices is correct.
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
src/lxc/start.c | 57 +++++++++++++++++++++++++------------------------
1 file changed, 29 insertions(+), 28 deletions(-)
diff --git a/src/lxc/start.c b/src/lxc/start.c
index d62acf4bd8..a87c3ff935 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -1169,34 +1169,6 @@ static int do_start(void *data)
if (ret < 0)
goto out_warn_father;
- /* Unshare CLONE_NEWNET after CLONE_NEWUSER. See
- * https://github.com/lxc/lxd/issues/1978.
- */
- if ((handler->ns_clone_flags & (CLONE_NEWNET | CLONE_NEWUSER)) ==
- (CLONE_NEWNET | CLONE_NEWUSER)) {
- ret = unshare(CLONE_NEWNET);
- if (ret < 0) {
- SYSERROR("Failed to unshare CLONE_NEWNET");
- goto out_warn_father;
- }
- INFO("Unshared CLONE_NEWNET");
- }
-
- /* Tell the parent task it can begin to configure the container and wait
- * for it to finish.
- */
- ret = lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE);
- if (ret < 0)
- goto out_error;
-
- if (handler->ns_clone_flags & CLONE_NEWNET) {
- ret = lxc_network_recv_from_parent(handler);
- if (ret < 0) {
- ERROR("Failed to receive veth names from parent");
- goto out_warn_father;
- }
- }
-
/* If we are in a new user namespace, become root there to have
* privilege over our namespace.
*/
@@ -1230,6 +1202,35 @@ static int do_start(void *data)
}
}
+ /* Unshare CLONE_NEWNET after CLONE_NEWUSER. See
+ * https://github.com/lxc/lxd/issues/1978.
+ */
+ if ((handler->ns_clone_flags & (CLONE_NEWNET | CLONE_NEWUSER)) ==
+ (CLONE_NEWNET | CLONE_NEWUSER)) {
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ SYSERROR("Failed to unshare CLONE_NEWNET");
+ goto out_warn_father;
+ }
+ INFO("Unshared CLONE_NEWNET");
+ }
+
+ /* Tell the parent task it can begin to configure the container and wait
+ * for it to finish.
+ */
+ ret = lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE);
+ if (ret < 0)
+ goto out_error;
+
+ if (handler->ns_clone_flags & CLONE_NEWNET) {
+ ret = lxc_network_recv_from_parent(handler);
+ if (ret < 0) {
+ ERROR("Failed to receive veth names from parent");
+ goto out_warn_father;
+ }
+ }
+
+
ret = access(handler->lxcpath, X_OK);
if (ret != 0) {
print_top_failing_dir(handler->lxcpath);
From ad4d4fd81e6f38efb65d26d55e8ffca81855949e Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 8 Jan 2020 15:13:16 +0100
Subject: [PATCH 2/2] network: create veth directly in the container's
namespace
This allows us to avoid having to move the network device. It also allows us to
work around a kernel bug that in combination with a recent change in systemd
244 causes uses of systemd-networkd to not get an ip address.
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
src/lxc/cmd/lxc_user_nic.c | 18 ++---
src/lxc/network.c | 147 +++++++++++++++----------------------
src/lxc/network.h | 5 +-
3 files changed, 68 insertions(+), 102 deletions(-)
diff --git a/src/lxc/cmd/lxc_user_nic.c b/src/lxc/cmd/lxc_user_nic.c
index cfb9d8d563..ae23621800 100644
--- a/src/lxc/cmd/lxc_user_nic.c
+++ b/src/lxc/cmd/lxc_user_nic.c
@@ -487,11 +487,11 @@ static char *find_line(char *buf_start, char *buf_end, char *name,
return NULL;
}
-static int instantiate_veth(char *veth1, char *veth2)
+static int instantiate_veth(char *veth1, char *veth2, pid_t pid)
{
int ret;
- ret = lxc_veth_create(veth1, veth2);
+ ret = lxc_veth_create(veth1, veth2, pid);
if (ret < 0) {
errno = -ret;
CMD_SYSERROR("Failed to create %s-%s\n", veth1, veth2);
@@ -540,7 +540,7 @@ static int create_nic(char *nic, char *br, int pid, char **cnic)
}
/* create the nics */
- ret = instantiate_veth(veth1buf, veth2buf);
+ ret = instantiate_veth(veth1buf, veth2buf, pid);
if (ret < 0) {
usernic_error("%s", "Error creating veth tunnel\n");
return -1;
@@ -550,14 +550,14 @@ static int create_nic(char *nic, char *br, int pid, char **cnic)
/* copy the bridge's mtu to both ends */
mtu = get_mtu(br);
if (mtu > 0) {
- ret = lxc_netdev_set_mtu(veth1buf, mtu);
+ ret = lxc_netdev_set_mtu(veth1buf, mtu, -1);
if (ret < 0) {
usernic_error("Failed to set mtu to %d on %s\n",
mtu, veth1buf);
goto out_del;
}
- ret = lxc_netdev_set_mtu(veth2buf, mtu);
+ ret = lxc_netdev_set_mtu(veth2buf, mtu, pid);
if (ret < 0) {
usernic_error("Failed to set mtu to %d on %s\n",
mtu, veth2buf);
@@ -573,14 +573,6 @@ static int create_nic(char *nic, char *br, int pid, char **cnic)
}
}
- /* pass veth2 to target netns */
- ret = lxc_netdev_move_by_name(veth2buf, pid, NULL);
- if (ret < 0) {
- usernic_error("Error moving %s to network namespace of %d\n",
- veth2buf, pid);
- goto out_del;
- }
-
*cnic = strdup(veth2buf);
if (!*cnic) {
usernic_error("Failed to copy string \"%s\"\n", veth2buf);
diff --git a/src/lxc/network.c b/src/lxc/network.c
index 65eca60e83..00bd649302 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -271,10 +271,9 @@ static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
{
- int bridge_index, err;
+ int err;
char *veth1, *veth2;
char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
- unsigned int mtu = 0;
if (netdev->priv.veth_attr.pair[0] != '\0') {
veth1 = netdev->priv.veth_attr.pair;
@@ -299,13 +298,13 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
veth2 = lxc_mkifname(veth2buf);
if (!veth2)
- goto out_delete;
+ return -1;
- err = lxc_veth_create(veth1, veth2);
+ err = lxc_veth_create(veth1, veth2, handler->pid);
if (err) {
errno = -err;
SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
- goto out_delete;
+ return -1;
}
strlcpy(netdev->created_name, veth2, IFNAMSIZ);
@@ -327,43 +326,36 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
goto out_delete;
}
- /* Note that we're retrieving the container's ifindex in the host's
- * network namespace because we need it to move the device from the
- * host's network namespace to the container's network namespace later
- * on.
- */
- netdev->ifindex = if_nametoindex(veth2);
- if (!netdev->ifindex) {
- ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
- goto out_delete;
- }
if (netdev->mtu) {
- if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
+ if (lxc_safe_uint(netdev->mtu, &netdev->mtu_converted) < 0)
WARN("Failed to parse mtu");
else
- INFO("Retrieved mtu %d", mtu);
+ INFO("Retrieved mtu %d", netdev->mtu_converted);
} else if (netdev->link[0] != '\0') {
- bridge_index = if_nametoindex(netdev->link);
- if (bridge_index) {
- mtu = netdev_get_mtu(bridge_index);
- INFO("Retrieved mtu %d from %s", mtu, netdev->link);
+ int ifindex_mtu;
+
+ ifindex_mtu = if_nametoindex(netdev->link);
+ if (ifindex_mtu) {
+ netdev->mtu_converted = netdev_get_mtu(ifindex_mtu);
+ INFO("Retrieved mtu %d from %s", netdev->mtu_converted, netdev->link);
} else {
- mtu = netdev_get_mtu(netdev->ifindex);
- INFO("Retrieved mtu %d from %s", mtu, veth2);
+ ifindex_mtu = if_nametoindex(veth1);
+ if (ifindex_mtu) {
+ netdev->mtu_converted = netdev_get_mtu(ifindex_mtu);
+ INFO("Retrieved mtu %d from %s", netdev->mtu_converted, veth1);
+ } else {
+ SYSWARN("Failed to retrieve mtu from %s", veth1);
+ }
}
}
- if (mtu) {
- err = lxc_netdev_set_mtu(veth1, mtu);
- if (!err)
- err = lxc_netdev_set_mtu(veth2, mtu);
-
+ if (netdev->mtu_converted) {
+ err = lxc_netdev_set_mtu(veth1, netdev->mtu_converted, -1);
if (err) {
- errno = -err;
- SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
- "and \"%s\"", mtu, veth1, veth2);
- goto out_delete;
+ errno = -err;
+ SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", netdev->mtu_converted, veth1);
+ goto out_delete;
}
}
@@ -484,14 +476,12 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
goto out_delete;
}
- DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
- netdev->ifindex);
+ DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
return 0;
out_delete:
- if (netdev->ifindex != 0)
- lxc_netdev_delete_by_name(veth1);
+ lxc_netdev_delete_by_name(veth1);
return -1;
}
@@ -499,7 +489,6 @@ static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *n
{
char peer[IFNAMSIZ];
int err;
- unsigned int mtu = 0;
if (netdev->link[0] == '\0') {
ERROR("No link for macvlan network device specified");
@@ -531,19 +520,12 @@ static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *n
}
if (netdev->mtu) {
- err = lxc_safe_uint(netdev->mtu, &mtu);
+ err = lxc_safe_uint(netdev->mtu, &netdev->mtu_converted);
if (err < 0) {
errno = -err;
SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
goto on_error;
}
-
- err = lxc_netdev_set_mtu(peer, mtu);
- if (err < 0) {
- errno = -err;
- SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
- goto on_error;
- }
}
if (netdev->upscript) {
@@ -661,7 +643,6 @@ static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *ne
{
char peer[IFNAMSIZ];
int err;
- unsigned int mtu = 0;
if (netdev->link[0] == '\0') {
ERROR("No link for ipvlan network device specified");
@@ -692,21 +673,13 @@ static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *ne
}
if (netdev->mtu) {
- err = lxc_safe_uint(netdev->mtu, &mtu);
+ err = lxc_safe_uint(netdev->mtu, &netdev->mtu_converted);
if (err < 0) {
errno = -err;
SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
netdev->mtu, peer);
goto on_error;
}
-
- err = lxc_netdev_set_mtu(peer, mtu);
- if (err < 0) {
- errno = -err;
- SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
- netdev->mtu, peer);
- goto on_error;
- }
}
if (netdev->upscript) {
@@ -737,7 +710,6 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd
char peer[IFNAMSIZ];
int err;
static uint16_t vlan_cntr = 0;
- unsigned int mtu = 0;
if (netdev->link[0] == '\0') {
ERROR("No link for vlan network device specified");
@@ -766,21 +738,13 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd
}
if (netdev->mtu) {
- err = lxc_safe_uint(netdev->mtu, &mtu);
+ err = lxc_safe_uint(netdev->mtu, &netdev->mtu_converted);
if (err < 0) {
errno = -err;
SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
netdev->mtu, peer);
goto on_error;
}
-
- err = lxc_netdev_set_mtu(peer, mtu);
- if (err) {
- errno = -err;
- SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
- netdev->mtu, peer);
- goto on_error;
- }
}
if (netdev->upscript) {
@@ -810,7 +774,6 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd
static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
{
int err, mtu_orig = 0;
- unsigned int mtu = 0;
if (netdev->link[0] == '\0') {
ERROR("No link for physical interface specified");
@@ -852,21 +815,13 @@ static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netd
netdev->priv.phys_attr.mtu = mtu_orig;
if (netdev->mtu) {
- err = lxc_safe_uint(netdev->mtu, &mtu);
+ err = lxc_safe_uint(netdev->mtu, &netdev->mtu_converted);
if (err < 0) {
errno = -err;
SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
netdev->mtu, netdev->link);
return -1;
}
-
- err = lxc_netdev_set_mtu(netdev->link, mtu);
- if (err < 0) {
- errno = -err;
- SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
- netdev->mtu, netdev->link);
- return -1;
- }
}
if (netdev->upscript) {
@@ -1640,9 +1595,9 @@ int netdev_get_mtu(int ifindex)
return err;
}
-int lxc_netdev_set_mtu(const char *name, int mtu)
+int lxc_netdev_set_mtu(const char *name, int mtu, pid_t pid)
{
- int err, index, len;
+ int err, len;
struct ifinfomsg *ifi;
struct nl_handler nlh;
struct nlmsg *answer = NULL, *nlmsg = NULL;
@@ -1665,11 +1620,6 @@ int lxc_netdev_set_mtu(const char *name, int mtu)
if (!answer)
goto out;
- err = -EINVAL;
- index = if_nametoindex(name);
- if (!index)
- goto out;
-
nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
@@ -1679,7 +1629,12 @@ int lxc_netdev_set_mtu(const char *name, int mtu)
goto out;
}
ifi->ifi_family = AF_UNSPEC;
- ifi->ifi_index = index;
+
+ if (nla_put_string(nlmsg, IFLA_IFNAME, name))
+ goto out;
+
+ if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
+ goto out;
if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
goto out;
@@ -1702,7 +1657,7 @@ int lxc_netdev_down(const char *name)
return netdev_set_flag(name, 0);
}
-int lxc_veth_create(const char *name1, const char *name2)
+int lxc_veth_create(const char *name1, const char *name2, pid_t pid)
{
int err, len;
struct ifinfomsg *ifi;
@@ -1763,14 +1718,17 @@ int lxc_veth_create(const char *name1, const char *name2)
goto out;
}
- if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
+ if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
goto out;
nla_end_nested(nlmsg, nest3);
nla_end_nested(nlmsg, nest2);
nla_end_nested(nlmsg, nest1);
- if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
+ if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
+ goto out;
+
+ if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
goto out;
err = netlink_transaction(&nlh, nlmsg, answer);
@@ -3361,7 +3319,7 @@ bool lxc_delete_network_priv(struct lxc_handler *handler)
netdev->link);
/* Restore original MTU */
- ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
+ ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu, -1);
if (ret < 0) {
WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
netdev->link, netdev->priv.phys_attr.mtu);
@@ -3704,6 +3662,12 @@ static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
*/
(void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
+ if (netdev->mtu_converted) {
+ err = lxc_netdev_set_mtu(netdev->name, netdev->mtu_converted, -1);
+ if (err)
+ log_error_errno(-1, -err, "Failed to set mtu \"%d\" for \"%s\"", netdev->mtu_converted, netdev->name);
+ }
+
/* set a mac address */
if (netdev->hwaddr) {
if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
@@ -3900,6 +3864,10 @@ int lxc_network_send_to_child(struct lxc_handler *handler)
if (ret < 0)
return -1;
+ ret = lxc_send_nointr(data_sock, &netdev->mtu_converted, sizeof(netdev->mtu_converted), MSG_NOSIGNAL);
+ if (ret < 0)
+ return -1;
+
TRACE("Sent network device name \"%s\" to child", netdev->created_name);
}
@@ -3926,6 +3894,11 @@ int lxc_network_recv_from_parent(struct lxc_handler *handler)
ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
if (ret < 0)
return -1;
+
+ ret = lxc_recv_nointr(data_sock, &netdev->mtu_converted, sizeof(netdev->mtu_converted), 0);
+ if (ret < 0)
+ return -1;
+
TRACE("Received network device name \"%s\" from parent", netdev->created_name);
}
diff --git a/src/lxc/network.h b/src/lxc/network.h
index 025a56152e..c04088cd99 100644
--- a/src/lxc/network.h
+++ b/src/lxc/network.h
@@ -163,6 +163,7 @@ struct lxc_netdev {
char created_name[IFNAMSIZ];
char *hwaddr;
char *mtu;
+ unsigned int mtu_converted;
union netdev_p priv;
struct lxc_list ipv4;
struct lxc_list ipv6;
@@ -200,10 +201,10 @@ extern int lxc_netdev_up(const char *name);
extern int lxc_netdev_down(const char *name);
/* Change the mtu size for the specified device. */
-extern int lxc_netdev_set_mtu(const char *name, int mtu);
+extern int lxc_netdev_set_mtu(const char *name, int mtu, pid_t pid);
/* Create a virtual network devices. */
-extern int lxc_veth_create(const char *name1, const char *name2);
+extern int lxc_veth_create(const char *name1, const char *name2, pid_t pid);
extern int lxc_macvlan_create(const char *master, const char *name, int mode);
extern int lxc_vlan_create(const char *master, const char *name,
unsigned short vid);
More information about the lxc-devel
mailing list