[lxc-devel] [lxc/stable-2.0] stable 2.0: cherry-picks + delta reduction between master and stable 2.0

brauner on Github lxc-bot at linuxcontainers.org
Mon Sep 4 11:20:43 UTC 2017


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 364 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20170904/fa352808/attachment.bin>
-------------- next part --------------
From b7bf792663a1b2926a65854ceddfc5b8d5360049 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Mon, 4 Sep 2017 12:49:52 +0200
Subject: [PATCH 01/32] lxc-user-nic: remove delta between master + stable

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/lxc_user_nic.c | 51 +++++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c
index f55cc6d77..0fb788877 100644
--- a/src/lxc/lxc_user_nic.c
+++ b/src/lxc/lxc_user_nic.c
@@ -78,7 +78,7 @@ static int open_and_lock(char *path)
 
 	fd = open(path, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR);
 	if (fd < 0) {
-		usernic_error("Failed to open %s: %s.\n", path,
+		usernic_error("Failed to open \"%s\": %s\n", path,
 			      strerror(errno));
 		return -1;
 	}
@@ -145,7 +145,7 @@ static char **get_groupnames(void)
 	group_ids = malloc(sizeof(gid_t) * ngroups);
 	if (!group_ids) {
 		usernic_error("Failed to allocate memory while getting groups "
-			      "the user belongs to: %s.\n",
+			      "the user belongs to: %s\n",
 			      strerror(errno));
 		return NULL;
 	}
@@ -153,7 +153,7 @@ static char **get_groupnames(void)
 	ret = getgroups(ngroups, group_ids);
 	if (ret < 0) {
 		free(group_ids);
-		usernic_error("Failed to get process groups: %s.\n",
+		usernic_error("Failed to get process groups: %s\n",
 			      strerror(errno));
 		return NULL;
 	}
@@ -162,7 +162,7 @@ static char **get_groupnames(void)
 	if (!groupnames) {
 		free(group_ids);
 		usernic_error("Failed to allocate memory while getting group "
-			      "names: %s.\n",
+			      "names: %s\n",
 			      strerror(errno));
 		return NULL;
 	}
@@ -172,7 +172,7 @@ static char **get_groupnames(void)
 	for (i = 0; i < ngroups; i++) {
 		gr = getgrgid(group_ids[i]);
 		if (!gr) {
-			usernic_error("Failed to get group name: %s.\n",
+			usernic_error("Failed to get group name: %s\n",
 				      strerror(errno));
 			free(group_ids);
 			free_groupnames(groupnames);
@@ -181,7 +181,7 @@ static char **get_groupnames(void)
 
 		groupnames[i] = strdup(gr->gr_name);
 		if (!groupnames[i]) {
-			usernic_error("Failed to copy group name \"%s\".",
+			usernic_error("Failed to copy group name \"%s\"",
 				      gr->gr_name);
 			free(group_ids);
 			free_groupnames(groupnames);
@@ -426,7 +426,7 @@ static int instantiate_veth(char *n1, char **n2)
 
 	err = snprintf(*n2, IFNAMSIZ, "%sp", n1);
 	if (err < 0 || err >= IFNAMSIZ) {
-		usernic_error("%s\n", "Could not create nic name.");
+		usernic_error("%s\n", "Could not create nic name");
 		return -1;
 	}
 
@@ -464,19 +464,19 @@ static bool create_nic(char *nic, char *br, int pid, char **cnic)
 	veth1buf = alloca(IFNAMSIZ);
 	veth2buf = alloca(IFNAMSIZ);
 	if (!veth1buf || !veth2buf) {
-		usernic_error("Failed allocate memory: %s.\n", strerror(errno));
+		usernic_error("Failed allocate memory: %s\n", strerror(errno));
 		return false;
 	}
 
 	ret = snprintf(veth1buf, IFNAMSIZ, "%s", nic);
 	if (ret < 0 || ret >= IFNAMSIZ) {
-		usernic_error("%s", "Could not create nic name.\n");
+		usernic_error("%s", "Could not create nic name\n");
 		return false;
 	}
 
 	/* create the nics */
 	if (instantiate_veth(veth1buf, &veth2buf) < 0) {
-		usernic_error("%s", "Error creating veth tunnel.\n");
+		usernic_error("%s", "Error creating veth tunnel\n");
 		return false;
 	}
 
@@ -502,7 +502,7 @@ static bool create_nic(char *nic, char *br, int pid, char **cnic)
 		/* attach veth1 to bridge */
 		ret = lxc_bridge_attach(br, veth1buf);
 		if (ret < 0) {
-			usernic_error("Error attaching %s to %s.\n", veth1buf, br);
+			usernic_error("Error attaching %s to %s\n", veth1buf, br);
 			goto out_del;
 		}
 	}
@@ -517,7 +517,7 @@ static bool create_nic(char *nic, char *br, int pid, char **cnic)
 
 	*cnic = strdup(veth2buf);
 	if (!*cnic) {
-		usernic_error("Failed to copy string \"%s\".\n", veth2buf);
+		usernic_error("Failed to copy string \"%s\"\n", veth2buf);
 		return false;
 	}
 
@@ -813,7 +813,7 @@ static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname,
 
 	fd = lxc_preserve_ns(pid, "net");
 	if (fd < 0) {
-		usernic_error("Failed opening network namespace path for '%d'.", pid);
+		usernic_error("Failed opening network namespace path for %d", pid);
 		goto do_partial_cleanup;
 	}
 
@@ -830,7 +830,7 @@ static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname,
 	fd = -1;
 	if (ret < 0) {
 		usernic_error("Failed to setns() to the network namespace of "
-			      "the container with PID %d: %s.\n",
+			      "the container with PID %d: %s\n",
 			      pid, strerror(errno));
 		goto do_partial_cleanup;
 	}
@@ -839,11 +839,12 @@ static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname,
 	if (ret < 0) {
 		usernic_error("Failed to drop privilege by setting effective "
 			      "user id and real user id to %d, and saved user "
-			      "ID to 0: %s.\n",
+			      "ID to 0: %s\n",
 			      ruid, strerror(errno));
-		// COMMENT(brauner): It's ok to jump to do_full_cleanup here
-		// since setresuid() will succeed when trying to set real,
-		// effective, and saved to values they currently have.
+		/* It's ok to jump to do_full_cleanup here since setresuid()
+		 * will succeed when trying to set real, effective, and saved to
+		 * values they currently have.
+		 */
 		goto do_full_cleanup;
 	}
 
@@ -913,10 +914,8 @@ static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname,
 	return string_ret;
 }
 
-/*
- * If the caller (real uid, not effective uid) may read the
- * /proc/[pid]/ns/net, then it is either the caller's netns or one
- * which it created.
+/* If the caller (real uid, not effective uid) may read the /proc/[pid]/ns/net,
+ * then it is either the caller's netns or one which it created.
  */
 static bool may_access_netns(int pid)
 {
@@ -937,7 +936,7 @@ static bool may_access_netns(int pid)
 	if (ret < 0) {
 		usernic_error("Failed to drop privilege by setting effective "
 			      "user id and real user id to %d, and saved user "
-			      "ID to %d: %s.\n",
+			      "ID to %d: %s\n",
 			      ruid, euid, strerror(errno));
 		return false;
 	}
@@ -956,7 +955,7 @@ static bool may_access_netns(int pid)
 	ret = setresuid(ruid, euid, suid);
 	if (ret < 0) {
 		usernic_error("Failed to restore user id to %d, real user id "
-			      "to %d, and saved user ID to %d: %s.\n",
+			      "to %d, and saved user ID to %d: %s\n",
 			      ruid, euid, suid, strerror(errno));
 		may_access = false;
 	}
@@ -1035,7 +1034,7 @@ int main(int argc, char *argv[])
 	}
 
 	if (!create_db_dir(LXC_USERNIC_DB)) {
-		usernic_error("%s", "Failed to create directory for db file.\n");
+		usernic_error("%s", "Failed to create directory for db file\n");
 		exit(EXIT_FAILURE);
 	}
 
@@ -1046,7 +1045,7 @@ int main(int argc, char *argv[])
 	}
 
 	if (!may_access_netns(pid)) {
-		usernic_error("User %s may not modify netns for pid %d.\n", me, pid);
+		usernic_error("User %s may not modify netns for pid %d\n", me, pid);
 		exit(EXIT_FAILURE);
 	}
 

From 8ddde7baff47f05e0c58237bc47a3e7a3f7da8f4 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 31 Aug 2017 01:32:39 +0200
Subject: [PATCH 02/32] lxc-user-nic: test privilege over netns on delete

When lxc-user-nic is called with the "delete" subcommand we need to make sure
that we are actually privileged over the network namespace for which we are
supposed to delete devices on the host. To this end we require that path to the
affected network namespace is passed. We then setns() to the network namespace
and drop privilege to the caller's real user id. Then we try to delete the
loopback interface which is not possible. If we are privileged over the network
namespace this operation will fail with ENOTSUP. If we are not privileged over
the network namespace we will get EPERM.

This is the first part of the commit. As of now nothing guarantees that the
caller does not just give us a random path to a network namespace it is
privileged over.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/lxc_user_nic.c | 116 +++++++++++++++++++++++++++++++++---
 src/lxc/network.c      | 156 +++++++++++++++++++++++++++++++++++--------------
 src/lxc/network.h      |   3 +-
 src/lxc/start.c        |  49 +++++++++++-----
 4 files changed, 254 insertions(+), 70 deletions(-)

diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c
index 0fb788877..4c446f7c3 100644
--- a/src/lxc/lxc_user_nic.c
+++ b/src/lxc/lxc_user_nic.c
@@ -976,13 +976,89 @@ struct user_nic_args {
 #define LXC_USERNIC_CREATE 0
 #define LXC_USERNIC_DELETE 1
 
+static bool is_privileged_over_netns(int netns_fd)
+{
+	int ret;
+	uid_t euid, ruid, suid;
+	bool bret = false;
+	int ofd = -1;
+
+	ofd = lxc_preserve_ns(getpid(), "net");
+	if (ofd < 0) {
+		usernic_error("Failed opening network namespace path for %d", getpid());
+		return false;
+	}
+
+	ret = getresuid(&ruid, &euid, &suid);
+	if (ret < 0) {
+		usernic_error("Failed to retrieve real, effective, and saved "
+			      "user IDs: %s\n",
+			      strerror(errno));
+		goto do_partial_cleanup;
+	}
+
+	ret = setns(netns_fd, CLONE_NEWNET);
+	if (ret < 0) {
+		usernic_error("Failed to setns() to network namespace %s\n",
+			      strerror(errno));
+		goto do_partial_cleanup;
+	}
+
+	ret = setresuid(ruid, ruid, 0);
+	if (ret < 0) {
+		usernic_error("Failed to drop privilege by setting effective "
+			      "user id and real user id to %d, and saved user "
+			      "ID to 0: %s\n",
+			      ruid, strerror(errno));
+		/* It's ok to jump to do_full_cleanup here since setresuid()
+		 * will succeed when trying to set real, effective, and saved to
+		 * values they currently have.
+		 */
+		goto do_full_cleanup;
+	}
+
+	/* Test whether we are privileged over the network namespace. To do this
+	 * we try to delete the loopback interface which is not possible. If we
+	 * are privileged over the network namespace we will get ENOTSUP. If we
+	 * are not privileged over the network namespace we will get EPERM.
+	 */
+	ret = lxc_netdev_delete_by_name("lo");
+	if (ret == -ENOTSUP)
+		bret = true;
+
+do_full_cleanup:
+	ret = setresuid(ruid, euid, suid);
+	if (ret < 0) {
+		usernic_error("Failed to restore privilege by setting "
+			      "effective user id to %d, real user id to %d, "
+			      "and saved user ID to %d: %s\n", ruid, euid, suid,
+			      strerror(errno));
+
+		bret = false;
+	}
+
+	ret = setns(ofd, CLONE_NEWNET);
+	if (ret < 0) {
+		usernic_error("Failed to setns() to original network namespace "
+			      "of PID %d: %s\n", ofd, strerror(errno));
+
+		bret = false;
+	}
+
+do_partial_cleanup:
+
+	close(ofd);
+	return bret;
+}
+
 int main(int argc, char *argv[])
 {
 	int fd, ifindex, n, pid, request, ret;
 	char *me, *newname;
+	struct user_nic_args args;
+	int netns_fd = -1;
 	char *cnic = NULL, *nicname = NULL;
 	struct alloted_s *alloted = NULL;
-	struct user_nic_args args;
 
 	if (argc < 7 || argc > 8) {
 		usage(argv[0], true);
@@ -1027,26 +1103,50 @@ int main(int argc, char *argv[])
 		exit(EXIT_FAILURE);
 	}
 
-	ret = lxc_safe_int(args.pid, &pid);
-	if (ret < 0) {
-		usernic_error("Could not read pid: %s\n", args.pid);
-		exit(EXIT_FAILURE);
+	if (request == LXC_USERNIC_CREATE) {
+		ret = lxc_safe_int(args.pid, &pid);
+		if (ret < 0) {
+			usernic_error("Could not read pid: %s\n", args.pid);
+			exit(EXIT_FAILURE);
+		}
+	} else if (request == LXC_USERNIC_DELETE) {
+		netns_fd = open(args.pid, O_RDONLY);
+		if (netns_fd < 0) {
+			usernic_error("Could not open \"%s\": %s\n", args.pid,
+				      strerror(errno));
+			exit(EXIT_FAILURE);
+		}
 	}
 
 	if (!create_db_dir(LXC_USERNIC_DB)) {
 		usernic_error("%s", "Failed to create directory for db file\n");
+		if (netns_fd >= 0)
+			close(netns_fd);
 		exit(EXIT_FAILURE);
 	}
 
 	fd = open_and_lock(LXC_USERNIC_DB);
 	if (fd < 0) {
 		usernic_error("Failed to lock %s\n", LXC_USERNIC_DB);
+		if (netns_fd >= 0)
+			close(netns_fd);
 		exit(EXIT_FAILURE);
 	}
 
-	if (!may_access_netns(pid)) {
-		usernic_error("User %s may not modify netns for pid %d\n", me, pid);
-		exit(EXIT_FAILURE);
+	if (request == LXC_USERNIC_CREATE) {
+		if (!may_access_netns(pid)) {
+			usernic_error("User %s may not modify netns for pid %d\n", me, pid);
+			exit(EXIT_FAILURE);
+		}
+	} else if (request == LXC_USERNIC_DELETE) {
+		bool has_priv;
+		has_priv = is_privileged_over_netns(netns_fd);
+		close(netns_fd);
+		if (!has_priv) {
+			usernic_error("%s", "Process is not privileged over "
+					    "network namespace\n");
+			exit(EXIT_FAILURE);
+		}
 	}
 
 	n = get_alloted(me, args.type, args.link, &alloted);
diff --git a/src/lxc/network.c b/src/lxc/network.c
index bbb586ca3..6461b03b9 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -2132,8 +2132,9 @@ static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
 	return 0;
 }
 
-static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname,
-				     struct lxc_netdev *netdev, pid_t pid)
+static int lxc_delete_network_unpriv_exec(const char *lxcpath, char *lxcname,
+					  struct lxc_netdev *netdev,
+					  const char *netns_path)
 {
 	int bytes, ret;
 	pid_t child;
@@ -2161,7 +2162,6 @@ static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname,
 
 	if (child == 0) {
 		int ret;
-		char pidstr[LXC_NUMSTRLEN64];
 
 		close(pipefd[0]);
 
@@ -2178,15 +2178,10 @@ static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname,
 			SYSERROR("Network link for network device \"%s\" is "
 				 "missing", netdev->priv.veth_attr.pair);
 
-		ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid);
-		if (ret < 0 || ret >= LXC_NUMSTRLEN64)
-			exit(EXIT_FAILURE);
-		pidstr[LXC_NUMSTRLEN64 - 1] = '\0';
-
 		INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
-		     lxcname, pidstr, netdev->link, netdev->priv.veth_attr.pair);
+		     lxcname, netns_path, netdev->link, netdev->priv.veth_attr.pair);
 		execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
-		       lxcname, pidstr, "veth", netdev->link,
+		       lxcname, netns_path, "veth", netdev->link,
 		       netdev->priv.veth_attr.pair, (char *)NULL);
 		SYSERROR("Failed to exec lxc-user-nic.");
 		exit(EXIT_FAILURE);
@@ -2214,6 +2209,91 @@ static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname,
 	return 0;
 }
 
+bool lxc_delete_network_unpriv(struct lxc_handler *handler)
+{
+	int ret;
+	struct lxc_list *iterator;
+	struct lxc_list *network = &handler->conf->network;
+	/* strlen("/proc/") = 6
+	 * +
+	 * LXC_NUMSTRLEN64
+	 * +
+	 * strlen("/fd/") = 4
+	 * +
+	 * LXC_NUMSTRLEN64
+	 * +
+	 * \0
+	 */
+	char netns_path[6 + LXC_NUMSTRLEN64 + 4 + LXC_NUMSTRLEN64 + 1];
+	bool deleted_all = true;
+
+	if (!am_unpriv())
+		return true;
+
+	*netns_path = '\0';
+
+	if (handler->netnsfd < 0) {
+		DEBUG("Cannot not guarantee safe deletion of network devices. "
+		      "Manual cleanup maybe needed");
+		return false;
+	}
+
+	ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
+		       getpid(), handler->netnsfd);
+	if (ret < 0 || ret >= sizeof(netns_path))
+		return false;
+
+	lxc_list_for_each(iterator, network) {
+		char *hostveth = NULL;
+		struct lxc_netdev *netdev = iterator->elem;
+
+		/* We can only delete devices whose ifindex we have. If we don't
+		 * have the index it means that we didn't create it.
+		 */
+		if (!netdev->ifindex)
+			continue;
+
+		if (netdev->type == LXC_NET_PHYS) {
+			ret = lxc_netdev_rename_by_index(netdev->ifindex,
+							 netdev->link);
+			if (ret < 0)
+				WARN("Failed to rename interface with index %d "
+				     "to its initial name \"%s\"",
+				     netdev->ifindex, netdev->link);
+			else
+				TRACE("Renamed interface with index %d to its "
+				      "initial name \"%s\"",
+				      netdev->ifindex, netdev->link);
+			continue;
+		}
+
+		ret = netdev_deconf[netdev->type](handler, netdev);
+		if (ret < 0)
+			WARN("Failed to deconfigure network device");
+
+		if (netdev->type != LXC_NET_VETH)
+			continue;
+
+		if (!is_ovs_bridge(netdev->link))
+			continue;
+
+		ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
+						     handler->name, netdev,
+						     netns_path);
+		if (ret < 0) {
+			deleted_all = false;
+			WARN("Failed to remove port \"%s\" from openvswitch "
+			     "bridge \"%s\"",
+			     netdev->priv.veth_attr.pair, netdev->link);
+			continue;
+		}
+		INFO("Removed interface \"%s\" from \"%s\"", hostveth,
+		     netdev->link);
+	}
+
+	return deleted_all;
+}
+
 int lxc_create_network_priv(struct lxc_handler *handler)
 {
 	bool am_root;
@@ -2296,13 +2376,16 @@ int lxc_create_network(const char *lxcpath, char *lxcname,
 	return 0;
 }
 
-bool lxc_delete_network(struct lxc_handler *handler)
+bool lxc_delete_network_priv(struct lxc_handler *handler)
 {
 	int ret;
 	struct lxc_list *iterator;
 	struct lxc_list *network = &handler->conf->network;
 	bool deleted_all = true;
 
+	if (am_unpriv())
+		return true;
+
 	lxc_list_for_each(iterator, network) {
 		char *hostveth = NULL;
 		struct lxc_netdev *netdev = iterator->elem;
@@ -2334,45 +2417,28 @@ bool lxc_delete_network(struct lxc_handler *handler)
 		 * namespace is destroyed but in case we did not move the
 		 * interface to the network namespace, we have to destroy it.
 		 */
-		if (!am_unpriv()) {
-			ret = lxc_netdev_delete_by_index(netdev->ifindex);
-			if (-ret == ENODEV) {
-				INFO("Interface \"%s\" with index %d already "
-				     "deleted or existing in different network "
-				     "namespace",
-				     netdev->name ? netdev->name : "(null)",
-				     netdev->ifindex);
-			} else if (ret < 0) {
-				deleted_all = false;
-				WARN("Failed to remove interface \"%s\" with "
-				     "index %d: %s",
-				     netdev->name ? netdev->name : "(null)",
-				     netdev->ifindex, strerror(-ret));
-				continue;
-			}
-			INFO("Removed interface \"%s\" with index %d",
-			     netdev->name ? netdev->name : "(null)",
-			     netdev->ifindex);
+		ret = lxc_netdev_delete_by_index(netdev->ifindex);
+		if (-ret == ENODEV) {
+			INFO("Interface \"%s\" with index %d already "
+					"deleted or existing in different network "
+					"namespace",
+					netdev->name ? netdev->name : "(null)",
+					netdev->ifindex);
+		} else if (ret < 0) {
+			deleted_all = false;
+			WARN("Failed to remove interface \"%s\" with "
+					"index %d: %s",
+					netdev->name ? netdev->name : "(null)",
+					netdev->ifindex, strerror(-ret));
+			continue;
 		}
+		INFO("Removed interface \"%s\" with index %d",
+				netdev->name ? netdev->name : "(null)",
+				netdev->ifindex);
 
 		if (netdev->type != LXC_NET_VETH)
 			continue;
 
-		if (am_unpriv()) {
-			if (is_ovs_bridge(netdev->link)) {
-				ret = lxc_delete_network_unpriv(handler->lxcpath,
-								handler->name,
-								netdev, getpid());
-				if (ret < 0)
-					WARN("Failed to remove port \"%s\" "
-					     "from openvswitch bridge \"%s\"",
-					     netdev->priv.veth_attr.pair,
-					     netdev->link);
-			}
-
-			continue;
-		}
-
 		/* Explicitly delete host veth device to prevent lingering
 		 * devices. We had issues in LXD around this.
 		 */
diff --git a/src/lxc/network.h b/src/lxc/network.h
index 8557b2acf..d4c98d2ed 100644
--- a/src/lxc/network.h
+++ b/src/lxc/network.h
@@ -257,7 +257,8 @@ extern const char *lxc_net_type_to_str(int type);
 extern int setup_private_host_hw_addr(char *veth1);
 extern int netdev_get_mtu(int ifindex);
 extern int lxc_create_network_priv(struct lxc_handler *handler);
-extern bool lxc_delete_network(struct lxc_handler *handler);
+extern bool lxc_delete_network_priv(struct lxc_handler *handler);
+extern bool lxc_delete_network_unpriv(struct lxc_handler *handler);
 extern int lxc_find_gateway_addresses(struct lxc_handler *handler);
 extern int lxc_create_network(const char *lxcpath, char *lxcname,
 			      struct lxc_list *network, pid_t pid);
diff --git a/src/lxc/start.c b/src/lxc/start.c
index cf37281c2..cb9a68cf6 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -1377,6 +1377,7 @@ static int lxc_spawn(struct lxc_handler *handler)
 		SYSERROR("Failed to clone a new set of namespaces.");
 		goto out_delete_net;
 	}
+
 	for (i = 0; i < LXC_NS_MAX; i++)
 		if (flags & ns_info[i].clone_flag)
 			INFO("Cloned %s.", ns_info[i].flag_name);
@@ -1428,6 +1429,12 @@ static int lxc_spawn(struct lxc_handler *handler)
 	if (failed_before_rename)
 		goto out_delete_net;
 
+	handler->netnsfd = lxc_preserve_ns(handler->pid, "net");
+	if (handler->netnsfd < 0) {
+		ERROR("Failed to preserve network namespace");
+		goto out_delete_net;
+	}
+
 	/* Create the network configuration. */
 	if (handler->clone_flags & CLONE_NEWNET) {
 		if (lxc_create_network(handler->lxcpath, handler->name,
@@ -1497,15 +1504,22 @@ static int lxc_spawn(struct lxc_handler *handler)
 	}
 
 	lxc_sync_fini(handler);
-	handler->netnsfd = lxc_preserve_ns(handler->pid, "net");
 
 	return 0;
 
 out_delete_net:
 	if (cgroups_connected)
 		cgroup_disconnect();
-	if (handler->clone_flags & CLONE_NEWNET)
-		lxc_delete_network(handler);
+
+	if (handler->clone_flags & CLONE_NEWNET) {
+		DEBUG("Tearing down network devices");
+		if (!lxc_delete_network_priv(handler))
+			DEBUG("Failed tearing down network devices");
+
+		if (!lxc_delete_network_unpriv(handler))
+			DEBUG("Failed tearing down network devices");
+	}
+
 out_abort:
 	lxc_abort(name, handler);
 	lxc_sync_fini(handler);
@@ -1514,6 +1528,11 @@ static int lxc_spawn(struct lxc_handler *handler)
 		handler->pinfd = -1;
 	}
 
+	if (handler->netnsfd >= 0) {
+		close(handler->netnsfd);
+		handler->netnsfd = -1;
+	}
+
 	return -1;
 }
 
@@ -1523,7 +1542,6 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
 {
 	int status;
 	int err = -1;
-	bool removed_all_netdevs = true;
 	struct lxc_conf *conf = handler->conf;
 
 	if (lxc_init(name, handler) < 0) {
@@ -1580,10 +1598,6 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
 	err = lxc_poll(name, handler);
 	if (err) {
 		ERROR("LXC mainloop exited with error: %d.", err);
-		if (handler->netnsfd >= 0) {
-			close(handler->netnsfd);
-			handler->netnsfd = -1;
-		}
 		goto out_abort;
 	}
 
@@ -1615,9 +1629,6 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
 	DEBUG("Pushing physical nics back to host namespace");
 	lxc_restore_phys_nics_to_netns(handler->netnsfd, handler->conf);
 
-	DEBUG("Tearing down virtual network devices used by container \"%s\".", name);
-	removed_all_netdevs = lxc_delete_network(handler);
-
 	if (handler->pinfd >= 0) {
 		close(handler->pinfd);
 		handler->pinfd = -1;
@@ -1625,12 +1636,18 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
 
 	lxc_monitor_send_exit_code(name, status, handler->lxcpath);
 	err =  lxc_error_set_and_log(handler->pid, status);
+
 out_fini:
-	if (!removed_all_netdevs) {
-		DEBUG("Failed tearing down network devices used by container. Trying again!");
-		removed_all_netdevs = lxc_delete_network(handler);
-		if (!removed_all_netdevs)
-			DEBUG("Failed tearing down network devices used by container. Not trying again!");
+	DEBUG("Tearing down network devices");
+	if (!lxc_delete_network_priv(handler))
+		DEBUG("Failed tearing down network devices");
+
+	if (!lxc_delete_network_unpriv(handler))
+		DEBUG("Failed tearing down network devices");
+
+	if (handler->netnsfd >= 0) {
+		close(handler->netnsfd);
+		handler->netnsfd = -1;
 	}
 
 out_detach_blockdev:

From dd208f1a336f824399419bdceafad14c072d710f Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 31 Aug 2017 13:17:11 +0200
Subject: [PATCH 03/32] network: log veth_attr.pair and veth_attr.veth1

If the user specified lxc.net.[i].veth.pair attribute to request that the host
side of a veth pair be given a specific name let's log it at the trace level.
Otherwise, if the user didn't not specify lxc.net.[i].veth.pair veth_attr.veth1
will contain the name of the host side veth device.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/confile_utils.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/lxc/confile_utils.c b/src/lxc/confile_utils.c
index 3e8334a0d..18d8226e0 100644
--- a/src/lxc/confile_utils.c
+++ b/src/lxc/confile_utils.c
@@ -260,6 +260,12 @@ void lxc_log_configured_netdevs(const struct lxc_conf *conf)
 		switch (netdev->type) {
 		case LXC_NET_VETH:
 			TRACE("type: veth");
+			if (netdev->priv.veth_attr.pair)
+				TRACE("veth pair: %s",
+				      netdev->priv.veth_attr.pair);
+			if (netdev->priv.veth_attr.veth1[0] != '\0')
+				TRACE("veth1 : %s",
+				      netdev->priv.veth_attr.veth1);
 			break;
 		case LXC_NET_MACVLAN:
 			TRACE("type: macvlan");

From 1482e8e0f6cd18a9cd2b6bd2031e4f2695d2a682 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 31 Aug 2017 13:19:33 +0200
Subject: [PATCH 04/32] network: add ifindex field for host veth device

We should not just record the ifindex for the container's veth device but also
for the host's veth device. This is useful when {configuring,deconfiguring}
veth devices and becomes crucial when calling our lxc-user-nic setuid helper
where we rely on the ifindex to make decisions about whether we are licensed to
perform certain operations on the veth device in question.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/network.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/lxc/network.h b/src/lxc/network.h
index d4c98d2ed..173596969 100644
--- a/src/lxc/network.h
+++ b/src/lxc/network.h
@@ -79,9 +79,21 @@ struct lxc_route6 {
 	struct in6_addr addr;
 };
 
+/* Contains information about the host side veth device.
+ * @pair    : Name of the host side veth device.
+ *            If the user requested that the host veth device be created with a
+ *            specific names this field will be set. If this field is set @veth1
+ *            is not set.
+ * @veth1   : Name of the host side veth device.
+ *            If the user did not request that the host veth device be created
+ *            with a specific name this field will be set. If this field is set
+ *            @pair is not set.
+ * @ifindex : Ifindex of the network device.
+ */
 struct ifla_veth {
-	char *pair; /* pair name */
-	char veth1[IFNAMSIZ]; /* needed for deconf */
+	char *pair;
+	char veth1[IFNAMSIZ];
+	int ifindex;
 };
 
 struct ifla_vlan {

From 9a5df38f322c7fb6d00ee3fd59c7aaaaf1c6c54b Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 31 Aug 2017 13:23:18 +0200
Subject: [PATCH 05/32] network: document all fields in struct lxc_netdev

This is menial work but I'll thank myself later... a lot.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/network.h | 44 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/src/lxc/network.h b/src/lxc/network.h
index 173596969..4c359dc6c 100644
--- a/src/lxc/network.h
+++ b/src/lxc/network.h
@@ -115,20 +115,42 @@ union netdev_p {
 
 /*
  * Defines a structure to configure a network device
- * @link       : lxc.net.[i].link, name of bridge or host iface to attach if any
- * @name       : lxc.net.[i].name, name of iface on the container side
- * @flags      : flag of the network device (IFF_UP, ... )
- * @ipv4       : a list of ipv4 addresses to be set on the network device
- * @ipv6       : a list of ipv6 addresses to be set on the network device
- * @upscript   : a script filename to be executed during interface configuration
- * @downscript : a script filename to be executed during interface destruction
- * @idx        : network counter
+ * @idx               : network counter
+ * @ifindex           : ifindex of the network device
+ *                      Note that this is the ifindex of the network device in
+ *                      the container's network namespace. If the network device
+ *                      consists of a pair of network devices (e.g. veth pairs
+ *                      attached to a network bridge) then this index cannot be
+ *                      used to identify or modify the host veth device. See
+ *                      struct ifla_veth for the host side information.
+ * @type              : network type (veth, macvlan, vlan, ...)
+ * @flags             : flag of the network device (IFF_UP, ... )
+ * @link              : lxc.net.[i].link, name of bridge or host iface to attach
+ *                      if any
+ * @name              : lxc.net.[i].name, name of iface on the container side
+ * @hwaddr            : mac address
+ * @mtu               : maximum transmission unit
+ * @priv              : information specific to the specificed network type
+ *                      Note that this is a union so whether accessing a struct
+ *                      is possible is dependent on the network type.
+ * @ipv4              : a list of ipv4 addresses to be set on the network device
+ * @ipv6              : a list of ipv6 addresses to be set on the network device
+ * @ipv4_gateway_auto : whether the ipv4 gateway is to be automatically gathered
+ *                      from the associated @link
+ * @ipv4_gateway      : ipv4 gateway
+ * @ipv6_gateway_auto : whether the ipv6 gateway is to be automatically gathered
+ *                      from the associated @link
+ * @ipv6_gateway      : ipv6 gateway
+ * @upscript          : a script filename to be executed during interface
+ *                      configuration
+ * @downscript        : a script filename to be executed during interface
+ *                      destruction
  */
 struct lxc_netdev {
 	ssize_t idx;
+	int ifindex;
 	int type;
 	int flags;
-	int ifindex;
 	char *link;
 	char *name;
 	char *hwaddr;
@@ -136,10 +158,10 @@ struct lxc_netdev {
 	union netdev_p priv;
 	struct lxc_list ipv4;
 	struct lxc_list ipv6;
-	struct in_addr *ipv4_gateway;
 	bool ipv4_gateway_auto;
-	struct in6_addr *ipv6_gateway;
+	struct in_addr *ipv4_gateway;
 	bool ipv6_gateway_auto;
+	struct in6_addr *ipv6_gateway;
 	char *upscript;
 	char *downscript;
 };

From 434f6022d5d0803e4160d1101a4cf299e3cbfc69 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 31 Aug 2017 15:25:16 +0200
Subject: [PATCH 06/32] network: log ifindex for host side veth device

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/confile_utils.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/lxc/confile_utils.c b/src/lxc/confile_utils.c
index 18d8226e0..31e705f9d 100644
--- a/src/lxc/confile_utils.c
+++ b/src/lxc/confile_utils.c
@@ -266,6 +266,9 @@ void lxc_log_configured_netdevs(const struct lxc_conf *conf)
 			if (netdev->priv.veth_attr.veth1[0] != '\0')
 				TRACE("veth1 : %s",
 				      netdev->priv.veth_attr.veth1);
+			if (netdev->priv.veth_attr.ifindex > 0)
+				TRACE("host side ifindex for veth device: %d",
+				      netdev->priv.veth_attr.ifindex);
 			break;
 		case LXC_NET_MACVLAN:
 			TRACE("type: macvlan");

From 96416905e79d09e3cd572c1b3ccbb8323d43cffa Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 31 Aug 2017 15:30:39 +0200
Subject: [PATCH 07/32] network: rework network creation

- On unprivileged veth network creation have lxc-user-nic send the names of the
  veth devices and their respective ifindeces. The advantage of retrieving this
  information from lxc-user-nic is that we spare us sending around more stuff
  via the netpipe in start.c. Also, lxc-user-nic operates in both namespaces
  (the container's namespace and the hosts's namespace) via setns and so is
  guaranteed to retrieve the correct ifindex via if_nametoindex() which is an
  network namespace aware ioctl() call. While I'm pretty sure the ifindeces for
  veth devices are identical across network namespaces I'm weary to rely on
  this. We need the ifindexes to guarantee safe deletion of unprivileged
  network devices via lxc-user-nic later on since we use them to identify the
  network devices in their corresponding network namespaces.
- Move the network device logging from the child to the parent. The child does
  not have all of the information about the network devices available only the
  few bits it actually needs to now. The monitor process is the only process
  that needs all this information.
- The network creation code for privileged and unprivileged networks was
  previously mangled into one single function but at the same time some of the
  privileged code had additional functions that were called in other places in
  start.c. Let's divide and conquer and split out the privileged and
  unprivileged network creation into completely separate functions. This makes
  what's happening way more clear. This will also have no performance impact
  since either you are privileged and only execute the privileged network
  creation functions or you are unprivileged and only execute the unprivileged
  network creation functions.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/lxc_user_nic.c |  17 ++++---
 src/lxc/network.c      | 128 +++++++++++++++++++++++++++++++++----------------
 src/lxc/network.h      |   8 +++-
 src/lxc/start.c        |  26 ++++++++--
 4 files changed, 127 insertions(+), 52 deletions(-)

diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c
index 4c446f7c3..1853e0412 100644
--- a/src/lxc/lxc_user_nic.c
+++ b/src/lxc/lxc_user_nic.c
@@ -797,7 +797,7 @@ static bool create_db_dir(char *fnam)
 }
 
 static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname,
-				     int *ifidx)
+				     int *container_veth_ifidx)
 {
 	int ret;
 	uid_t ruid, suid, euid;
@@ -881,7 +881,7 @@ static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname,
 	/* Allocation failure for strdup() is checked below. */
 	name = strdup(ifname);
 	string_ret = name;
-	*ifidx = ifindex;
+	*container_veth_ifidx = ifindex;
 
 do_full_cleanup:
 	ret = setresuid(ruid, euid, suid);
@@ -1053,7 +1053,7 @@ static bool is_privileged_over_netns(int netns_fd)
 
 int main(int argc, char *argv[])
 {
-	int fd, ifindex, n, pid, request, ret;
+	int container_veth_ifidx, fd, host_veth_ifidx, n, pid, request, ret;
 	char *me, *newname;
 	struct user_nic_args args;
 	int netns_fd = -1;
@@ -1204,7 +1204,8 @@ int main(int argc, char *argv[])
 	}
 
 	/* Now rename the link. */
-	newname = lxc_secure_rename_in_ns(pid, cnic, args.veth_name, &ifindex);
+	newname = lxc_secure_rename_in_ns(pid, cnic, args.veth_name,
+					  &container_veth_ifidx);
 	if (!newname) {
 		usernic_error("%s", "Failed to rename the link\n");
 		ret = lxc_netdev_delete_by_name(cnic);
@@ -1213,9 +1214,13 @@ int main(int argc, char *argv[])
 		free(nicname);
 		exit(EXIT_FAILURE);
 	}
+	host_veth_ifidx = if_nametoindex(nicname);
 
-	/* Write the name of the interface pair to the stdout: eth0:veth9MT2L4 */
-	fprintf(stdout, "%s:%s:%d\n", newname, nicname, ifindex);
+	/* Write names of veth pairs and their ifindeces to stout:
+	 * (e.g. eth0:731:veth9MT2L4:730)
+	 */
+	fprintf(stdout, "%s:%d:%s:%d\n", newname, container_veth_ifidx, nicname,
+		host_veth_ifidx);
 	free(newname);
 	free(nicname);
 	exit(EXIT_SUCCESS);
diff --git a/src/lxc/network.c b/src/lxc/network.c
index 6461b03b9..8d565ed53 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -47,7 +47,6 @@
 
 #include "conf.h"
 #include "config.h"
-#include "confile_utils.h"
 #include "log.h"
 #include "network.h"
 #include "nl.h"
@@ -2005,8 +2004,8 @@ int lxc_find_gateway_addresses(struct lxc_handler *handler)
 }
 
 #define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
-static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
-				     struct lxc_netdev *netdev, pid_t pid)
+static int lxc_create_network_unpriv_exec(const char *lxcpath, char *lxcname,
+					  struct lxc_netdev *netdev, pid_t pid)
 {
 	int ret;
 	pid_t child;
@@ -2079,7 +2078,7 @@ static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
 
 	bytes = read(pipefd[0], &buffer, MAXPATHLEN);
 	if (bytes < 0) {
-		SYSERROR("Failed to read from pipe file descriptor.");
+		SYSERROR("Failed to read from pipe file descriptor");
 		close(pipefd[0]);
 		return -1;
 	}
@@ -2096,36 +2095,58 @@ static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
 
 	/* netdev->name */
 	token = strtok_r(buffer, ":", &saveptr);
-	if (!token)
+	if (!token) {
+		ERROR("Failed to parse lxc-user-nic output");
 		return -1;
+	}
 
 	netdev->name = malloc(IFNAMSIZ + 1);
 	if (!netdev->name) {
-		SYSERROR("Failed to allocate memory.");
+		SYSERROR("Failed to allocate memory");
 		return -1;
 	}
 	memset(netdev->name, 0, IFNAMSIZ + 1);
 	strncpy(netdev->name, token, IFNAMSIZ);
 
-	/* netdev->priv.veth_attr.pair */
+	/* netdev->ifindex */
 	token = strtok_r(NULL, ":", &saveptr);
-	if (!token)
+	if (!token) {
+		ERROR("Failed to parse lxc-user-nic output");
 		return -1;
+	}
 
-	netdev->priv.veth_attr.pair = strdup(token);
-	if (!netdev->priv.veth_attr.pair) {
-		ERROR("Failed to allocate memory.");
+	ret = lxc_safe_int(token, &netdev->ifindex);
+	if (ret < 0) {
+		ERROR("%s - Failed to convert string \"%s\" to integer",
+		      strerror(-ret), token);
 		return -1;
 	}
 
-	/* netdev->ifindex */
+	/* netdev->priv.veth_attr.veth1 */
 	token = strtok_r(NULL, ":", &saveptr);
-	if (!token)
+	if (!token) {
+		ERROR("Failed to parse lxc-user-nic output");
 		return -1;
+	}
 
-	ret = lxc_safe_int(token, &netdev->ifindex);
+	if (strlen(token) >= IFNAMSIZ) {
+		ERROR("Host side veth device name returned by lxc-user-nic is "
+		      "too long");
+		return -E2BIG;
+	}
+	strcpy(netdev->priv.veth_attr.veth1, token);
+
+	/* netdev->priv.veth_attr.ifindex */
+	token = strtok_r(NULL, ":", &saveptr);
+	if (!token) {
+		ERROR("Failed to parse lxc-user-nic output");
+		return -1;
+	}
+
+	ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
 	if (ret < 0) {
-		ERROR("Failed to parse ifindex for network device \"%s\"", netdev->name);
+		ERROR("%s - Failed to convert string \"%s\" to integer",
+		      strerror(-ret), token);
 		return -1;
 	}
 
@@ -2174,15 +2195,22 @@ static int lxc_delete_network_unpriv_exec(const char *lxcpath, char *lxcname,
 			exit(EXIT_FAILURE);
 		}
 
-		if (!netdev->link)
+		if (netdev->priv.veth_attr.veth1[0] == '\0') {
+			SYSERROR("Host side veth device name is missing");
+			exit(EXIT_FAILURE);
+		}
+
+		if (!netdev->link) {
 			SYSERROR("Network link for network device \"%s\" is "
-				 "missing", netdev->priv.veth_attr.pair);
+				 "missing", netdev->priv.veth_attr.veth1);
+			exit(EXIT_FAILURE);
+		}
 
 		INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
-		     lxcname, netns_path, netdev->link, netdev->priv.veth_attr.pair);
+		     lxcname, netns_path, netdev->link, netdev->priv.veth_attr.veth1);
 		execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
 		       lxcname, netns_path, "veth", netdev->link,
-		       netdev->priv.veth_attr.pair, (char *)NULL);
+		       netdev->priv.veth_attr.veth1, (char *)NULL);
 		SYSERROR("Failed to exec lxc-user-nic.");
 		exit(EXIT_FAILURE);
 	}
@@ -2284,7 +2312,7 @@ bool lxc_delete_network_unpriv(struct lxc_handler *handler)
 			deleted_all = false;
 			WARN("Failed to remove port \"%s\" from openvswitch "
 			     "bridge \"%s\"",
-			     netdev->priv.veth_attr.pair, netdev->link);
+			     netdev->priv.veth_attr.veth1, netdev->link);
 			continue;
 		}
 		INFO("Removed interface \"%s\" from \"%s\"", hostveth,
@@ -2323,33 +2351,19 @@ int lxc_create_network_priv(struct lxc_handler *handler)
 	return 0;
 }
 
-int lxc_create_network(const char *lxcpath, char *lxcname,
-		       struct lxc_list *network, pid_t pid)
+int lxc_network_move_created_netdev_priv(const char *lxcpath, char *lxcname,
+					 struct lxc_list *network, pid_t pid)
 {
 	int err;
-	bool am_root;
 	char ifname[IFNAMSIZ];
 	struct lxc_list *iterator;
 
-	am_root = (getuid() == 0);
+	if (am_unpriv())
+		return 0;
 
 	lxc_list_for_each(iterator, network) {
 		struct lxc_netdev *netdev = iterator->elem;
 
-		if (netdev->type == LXC_NET_VETH && !am_root) {
-			if (netdev->mtu)
-				INFO("mtu ignored due to insufficient privilege");
-			if (lxc_create_network_unpriv(lxcpath, lxcname, netdev, pid))
-				return -1;
-			/* lxc-user-nic has moved the nic to the new ns.
-			 * unpriv_assign_nic() fills in netdev->name.
-			 * netdev->ifindex will be filled in at
-			 * lxc_setup_netdev_in_child_namespaces().
-			 */
-			continue;
-		}
-
-		/* empty network namespace, nothing to move */
 		if (!netdev->ifindex)
 			continue;
 
@@ -2376,6 +2390,40 @@ int lxc_create_network(const char *lxcpath, char *lxcname,
 	return 0;
 }
 
+int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
+			      struct lxc_list *network, pid_t pid)
+{
+	struct lxc_list *iterator;
+
+	if (!am_unpriv())
+		return 0;
+
+	lxc_list_for_each(iterator, network) {
+		struct lxc_netdev *netdev = iterator->elem;
+
+		if (netdev->type == LXC_NET_EMPTY)
+			continue;
+
+		if (netdev->type == LXC_NET_NONE)
+			continue;
+
+		if (netdev->type != LXC_NET_VETH) {
+			ERROR("Networks of type %s are not supported by "
+			      "unprivileged containers",
+			      lxc_net_type_to_str(netdev->type));
+			return -1;
+		}
+
+		if (netdev->mtu)
+			INFO("mtu ignored due to insufficient privilege");
+
+		if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev, pid))
+			return -1;
+	}
+
+	return 0;
+}
+
 bool lxc_delete_network_priv(struct lxc_handler *handler)
 {
 	int ret;
@@ -2826,7 +2874,7 @@ static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
 		}
 	}
 
-	DEBUG("Network devie \"%s\" has been setup", current_ifname);
+	DEBUG("Network device \"%s\" has been setup", current_ifname);
 
 	return 0;
 }
@@ -2837,8 +2885,6 @@ int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
 	struct lxc_list *iterator;
 	struct lxc_netdev *netdev;
 
-	lxc_log_configured_netdevs(conf);
-
 	lxc_list_for_each(iterator, network) {
 		netdev = iterator->elem;
 
diff --git a/src/lxc/network.h b/src/lxc/network.h
index 4c359dc6c..3ef016642 100644
--- a/src/lxc/network.h
+++ b/src/lxc/network.h
@@ -291,11 +291,15 @@ extern const char *lxc_net_type_to_str(int type);
 extern int setup_private_host_hw_addr(char *veth1);
 extern int netdev_get_mtu(int ifindex);
 extern int lxc_create_network_priv(struct lxc_handler *handler);
+extern int lxc_network_move_created_netdev_priv(const char *lxcpath,
+						char *lxcname,
+						struct lxc_list *network,
+						pid_t pid);
 extern bool lxc_delete_network_priv(struct lxc_handler *handler);
 extern bool lxc_delete_network_unpriv(struct lxc_handler *handler);
 extern int lxc_find_gateway_addresses(struct lxc_handler *handler);
-extern int lxc_create_network(const char *lxcpath, char *lxcname,
-			      struct lxc_list *network, pid_t pid);
+extern int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
+				     struct lxc_list *network, pid_t pid);
 extern int lxc_requests_empty_network(struct lxc_handler *handler);
 extern void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf);
 extern int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
diff --git a/src/lxc/start.c b/src/lxc/start.c
index cb9a68cf6..0ced4e918 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -60,6 +60,7 @@
 #include "commands.h"
 #include "commands_utils.h"
 #include "conf.h"
+#include "confile_utils.h"
 #include "console.h"
 #include "error.h"
 #include "log.h"
@@ -875,15 +876,19 @@ static int read_unpriv_netifindex(struct lxc_list *network)
 
 	if (netpipe == -1)
 		return 0;
+
 	lxc_list_for_each(iterator, network) {
 		netdev = iterator->elem;
 		if (netdev->type != LXC_NET_VETH)
 			continue;
-		if (!(netdev->name = malloc(IFNAMSIZ))) {
+
+		netdev->name = malloc(IFNAMSIZ);
+		if (!netdev->name) {
 			ERROR("Out of memory.");
 			close(netpipe);
 			return -1;
 		}
+
 		if (read(netpipe, netdev->name, IFNAMSIZ) != IFNAMSIZ) {
 			close(netpipe);
 			return -1;
@@ -1437,11 +1442,26 @@ static int lxc_spawn(struct lxc_handler *handler)
 
 	/* Create the network configuration. */
 	if (handler->clone_flags & CLONE_NEWNET) {
-		if (lxc_create_network(handler->lxcpath, handler->name,
-				       &handler->conf->network, handler->pid)) {
+		if (lxc_network_move_created_netdev_priv(handler->lxcpath,
+							 handler->name,
+							 &handler->conf->network,
+							 handler->pid)) {
 			ERROR("Failed to create the configured network.");
 			goto out_delete_net;
 		}
+
+		if (lxc_create_network_unpriv(handler->lxcpath, handler->name,
+					      &handler->conf->network,
+					      handler->pid)) {
+			ERROR("Failed to create the configured network.");
+			goto out_delete_net;
+		}
+
+		/* Now all networks are created and moved into place. The
+		 * corresponding structs have now all been filled. So log them
+		 * for debugging purposes.
+		 */
+		lxc_log_configured_netdevs(handler->conf);
 	}
 
 	if (netpipe != -1) {

From 59574a77e34c5646cc0fe74f4996a3e11a3d1f94 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 31 Aug 2017 22:58:30 +0200
Subject: [PATCH 08/32] network: retrieve the host's veth device ifindex

- Retrieve the host's veth device ifindex in the host's network namespace.
- Add a note why we retrieve the container's veth device ifindex in the host's
  network namespace.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/network.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/lxc/network.c b/src/lxc/network.c
index 8d565ed53..6a3c36dcc 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -140,6 +140,18 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
 		goto out_delete;
 	}
 
+	/* Retrieve ifindex of the host's veth device. */
+	netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
+	if (!netdev->priv.veth_attr.ifindex) {
+		ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
+		goto out_delete;
+	}
+
+	/* Note that we're retrieving the container's ifindex in the host's
+	 * network namespace because we need it to move the device from the
+	 * host's network namespace to the container's network namespace later
+	 * on.
+	 */
 	netdev->ifindex = if_nametoindex(veth2);
 	if (!netdev->ifindex) {
 		ERROR("Failed to retrieve ifindex for \"%s\"", veth2);

From e1bbc5cef109cfb264d23618b7d24dfc35512b38 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 31 Aug 2017 23:01:46 +0200
Subject: [PATCH 09/32] start: non-functional changes

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/start.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/lxc/start.c b/src/lxc/start.c
index 0ced4e918..6232a7859 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -1273,14 +1273,13 @@ void resolve_clone_flags(struct lxc_handler *handler)
  */
 static int lxc_spawn(struct lxc_handler *handler)
 {
-	int failed_before_rename = 0;
+	int i, flags, nveths;
 	const char *name = handler->name;
-	bool cgroups_connected = false;
-	int saved_ns_fd[LXC_NS_MAX];
-	int preserve_mask = 0, i, flags;
-	int netpipepair[2], nveths;
 	bool wants_to_map_ids;
+	int netpipepair[2], saved_ns_fd[LXC_NS_MAX];
 	struct lxc_list *id_map;
+	int failed_before_rename = 0, preserve_mask = 0;
+	bool cgroups_connected = false;
 
 	netpipe = -1;
 	id_map = &handler->conf->id_map;

From e1008bcdc96c67d23b67323b33867c6c04954406 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 31 Aug 2017 23:08:28 +0200
Subject: [PATCH 10/32] lxc-user-nic: free memory and check for error

- check for error on ifindex retrieval
- free allocated memory

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/lxc_user_nic.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c
index 1853e0412..7541e4b8a 100644
--- a/src/lxc/lxc_user_nic.c
+++ b/src/lxc/lxc_user_nic.c
@@ -453,6 +453,8 @@ static int get_mtu(char *name)
 	int idx;
 
 	idx = if_nametoindex(name);
+	if (idx < 0)
+		return -1;
 	return netdev_get_mtu(idx);
 }
 
@@ -1215,6 +1217,12 @@ int main(int argc, char *argv[])
 		exit(EXIT_FAILURE);
 	}
 	host_veth_ifidx = if_nametoindex(nicname);
+	if (!host_veth_ifidx) {
+		free(newname);
+		free(nicname);
+		usernic_error("Failed to get netdev index: %s\n", strerror(errno));
+		exit(EXIT_FAILURE);
+	}
 
 	/* Write names of veth pairs and their ifindeces to stout:
 	 * (e.g. eth0:731:veth9MT2L4:730)

From 9d9677174c8a945c033d1426593adee42ba96e7f Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 31 Aug 2017 23:13:44 +0200
Subject: [PATCH 11/32] lxc-user-nic: initialize vars to silence gcc-7

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/lxc_user_nic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c
index 7541e4b8a..9164e44dc 100644
--- a/src/lxc/lxc_user_nic.c
+++ b/src/lxc/lxc_user_nic.c
@@ -1055,10 +1055,10 @@ static bool is_privileged_over_netns(int netns_fd)
 
 int main(int argc, char *argv[])
 {
-	int container_veth_ifidx, fd, host_veth_ifidx, n, pid, request, ret;
+	int fd, n, pid, request, ret;
 	char *me, *newname;
 	struct user_nic_args args;
-	int netns_fd = -1;
+	int container_veth_ifidx = -1, host_veth_ifidx = -1, netns_fd = -1;
 	char *cnic = NULL, *nicname = NULL;
 	struct alloted_s *alloted = NULL;
 

From c740c4d44cae8e362c647eb2c2d1f1390d527c97 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 1 Sep 2017 00:23:30 +0200
Subject: [PATCH 12/32] network: use static memory for net device names

All network devices can only be of size < IFNAMSIZ. So let's spare the useless
heap allocations and use static memory.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/confile.c       | 11 +++-------
 src/lxc/confile_utils.c | 11 ++++++++++
 src/lxc/confile_utils.h |  1 +
 src/lxc/criu.c          | 18 +++++++++++------
 src/lxc/network.c       | 54 ++++++++++++++++++++++++-------------------------
 src/lxc/network.h       |  6 +++---
 src/lxc/start.c         |  7 -------
 7 files changed, 56 insertions(+), 52 deletions(-)

diff --git a/src/lxc/confile.c b/src/lxc/confile.c
index 57d1d5607..d61e7b8f2 100644
--- a/src/lxc/confile.c
+++ b/src/lxc/confile.c
@@ -764,11 +764,6 @@ static struct lxc_netdev *network_netdev(const char *key, const char *value,
 	return netdev;
 }
 
-static int network_ifname(char **valuep, const char *value)
-{
-	return set_config_string_item_max(valuep, value, IFNAMSIZ);
-}
-
 #ifndef MACVLAN_MODE_PRIVATE
 #define MACVLAN_MODE_PRIVATE 1
 #endif
@@ -870,7 +865,7 @@ static int set_config_network_link(const char *key, const char *value,
 	if (!netdev)
 		return -1;
 
-	return network_ifname(&netdev->link, value);
+	return network_ifname(netdev->link, value);
 }
 
 static int set_config_network_name(const char *key, const char *value,
@@ -882,7 +877,7 @@ static int set_config_network_name(const char *key, const char *value,
 	if (!netdev)
 		return -1;
 
-	return network_ifname(&netdev->name, value);
+	return network_ifname(netdev->name, value);
 }
 
 static int set_config_network_veth_pair(const char *key, const char *value,
@@ -899,7 +894,7 @@ static int set_config_network_veth_pair(const char *key, const char *value,
 		return -1;
 	}
 
-	return network_ifname(&netdev->priv.veth_attr.pair, value);
+	return network_ifname(netdev->priv.veth_attr.pair, value);
 }
 
 static int set_config_network_macvlan_mode(const char *key, const char *value,
diff --git a/src/lxc/confile_utils.c b/src/lxc/confile_utils.c
index 31e705f9d..8b7b3a5b4 100644
--- a/src/lxc/confile_utils.c
+++ b/src/lxc/confile_utils.c
@@ -305,3 +305,14 @@ void lxc_log_configured_netdevs(const struct lxc_conf *conf)
 			TRACE("downscript: %s", netdev->downscript);
 	}
 }
+
+int network_ifname(char *valuep, const char *value)
+{
+	if (strlen(value) >= IFNAMSIZ) {
+		ERROR("Network devie name \"%s\" is too long (>= %zu)", value,
+		      (size_t)IFNAMSIZ);
+	}
+
+	strcpy(valuep, value);
+	return 0;
+}
diff --git a/src/lxc/confile_utils.h b/src/lxc/confile_utils.h
index 01cd0510b..53665b914 100644
--- a/src/lxc/confile_utils.h
+++ b/src/lxc/confile_utils.h
@@ -33,5 +33,6 @@ extern struct lxc_netdev *lxc_find_netdev_by_idx(struct lxc_conf *conf,
 extern struct lxc_netdev *lxc_get_netdev_by_idx(struct lxc_conf *conf,
 						unsigned int idx);
 extern void lxc_log_configured_netdevs(const struct lxc_conf *conf);
+extern int network_ifname(char *valuep, const char *value);
 
 #endif /* __LXC_CONFILE_UTILS_H */
diff --git a/src/lxc/criu.c b/src/lxc/criu.c
index fc7d086c8..b7354a33b 100644
--- a/src/lxc/criu.c
+++ b/src/lxc/criu.c
@@ -523,7 +523,7 @@ static void exec_criu(struct criu_opts *opts)
 			case LXC_NET_VETH:
 				veth = n->priv.veth_attr.pair;
 
-				if (n->link) {
+				if (n->link[0] != '\0') {
 					if (external_not_veth)
 						fmt = "veth[%s]:%s@%s";
 					else
@@ -542,7 +542,7 @@ static void exec_criu(struct criu_opts *opts)
 					goto err;
 				break;
 			case LXC_NET_MACVLAN:
-				if (!n->link) {
+				if (n->link[0] == '\0') {
 					ERROR("no host interface for macvlan %s", n->name);
 					goto err;
 				}
@@ -764,11 +764,17 @@ static bool restore_net_info(struct lxc_container *c)
 
 		snprintf(template, sizeof(template), "vethXXXXXX");
 
-		if (!netdev->priv.veth_attr.pair)
-			netdev->priv.veth_attr.pair = lxc_mkifname(template);
+		if (netdev->priv.veth_attr.pair[0] == '\0' &&
+		    netdev->priv.veth_attr.veth1[0] == '\0') {
+			char *tmp;
 
-		if (!netdev->priv.veth_attr.pair)
-			goto out_unlock;
+			tmp = lxc_mkifname(template);
+			if (!tmp)
+				goto out_unlock;
+
+			strcpy(netdev->priv.veth_attr.veth1, tmp);
+			free(tmp);
+		}
 	}
 
 	has_error = false;
diff --git a/src/lxc/network.c b/src/lxc/network.c
index 6a3c36dcc..a4476ccb2 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -101,7 +101,7 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
 	char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
 	unsigned int mtu = 0;
 
-	if (netdev->priv.veth_attr.pair) {
+	if (netdev->priv.veth_attr.pair[0] != '\0') {
 		veth1 = netdev->priv.veth_attr.pair;
 		if (handler->conf->reboot)
 			lxc_netdev_delete_by_name(veth1);
@@ -163,7 +163,7 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
 			WARN("Failed to parse mtu");
 		else
 			INFO("Retrieved mtu %d", mtu);
-	} else if (netdev->link) {
+	} else if (netdev->link[0] != '\0') {
 		bridge_index = if_nametoindex(netdev->link);
 		if (bridge_index) {
 			mtu = netdev_get_mtu(bridge_index);
@@ -186,7 +186,7 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
 		}
 	}
 
-	if (netdev->link) {
+	if (netdev->link[0] != '\0') {
 		err = lxc_bridge_attach(netdev->link, veth1);
 		if (err) {
 			ERROR("Failed to attach \"%s\" to bridge \"%s\": %s",
@@ -217,7 +217,7 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
 out_delete:
 	if (netdev->ifindex != 0)
 		lxc_netdev_delete_by_name(veth1);
-	if (!netdev->priv.veth_attr.pair)
+	if (netdev->priv.veth_attr.pair != veth1)
 		free(veth1);
 	free(veth2);
 	return -1;
@@ -228,7 +228,7 @@ static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *n
 	char peerbuf[IFNAMSIZ], *peer;
 	int err;
 
-	if (!netdev->link) {
+	if (netdev->link[0] == '\0') {
 		ERROR("No link for macvlan network device specified");
 		return -1;
 	}
@@ -279,7 +279,7 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd
 	static uint16_t vlan_cntr = 0;
 	unsigned int mtu = 0;
 
-	if (!netdev->link) {
+	if (netdev->link[0] == '\0') {
 		ERROR("No link for vlan network device specified");
 		return -1;
 	}
@@ -308,7 +308,7 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd
 		if (lxc_safe_uint(netdev->mtu, &mtu) < 0) {
 			ERROR("Failed to retrieve mtu from \"%d\"/\"%s\".",
 			      netdev->ifindex,
-			      netdev->name ? netdev->name : "(null)");
+			      netdev->name[0] != '\0' ? netdev->name : "(null)");
 			return -1;
 		}
 		err = lxc_netdev_set_mtu(peer, mtu);
@@ -325,7 +325,7 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd
 
 static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
 {
-	if (!netdev->link) {
+	if (netdev->link[0] == '\0') {
 		ERROR("No link for physical interface specified");
 		return -1;
 	}
@@ -380,7 +380,7 @@ static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
 	char *veth1;
 	int err;
 
-	if (netdev->priv.veth_attr.pair)
+	if (netdev->priv.veth_attr.pair[0] != '\0')
 		veth1 = netdev->priv.veth_attr.pair;
 	else
 		veth1 = netdev->priv.veth_attr.veth1;
@@ -1986,7 +1986,7 @@ int lxc_find_gateway_addresses(struct lxc_handler *handler)
 			return -1;
 		}
 
-		if (!netdev->link) {
+		if (netdev->link[0] == '\0') {
 			ERROR("Automatic gateway detection needs a link interface");
 			return -1;
 		}
@@ -2060,7 +2060,7 @@ static int lxc_create_network_unpriv_exec(const char *lxcpath, char *lxcname,
 			exit(EXIT_FAILURE);
 		}
 
-		if (netdev->link)
+		if (netdev->link[0] != '\0')
 			strncpy(netdev_link, netdev->link, IFNAMSIZ);
 		else
 			strncpy(netdev_link, "none", IFNAMSIZ);
@@ -2072,8 +2072,8 @@ static int lxc_create_network_unpriv_exec(const char *lxcpath, char *lxcname,
 
 		INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
 		     lxcname, pidstr, netdev_link,
-		     netdev->name ? netdev->name : "(null)");
-		if (netdev->name)
+		     netdev->name[0] != '\0' ? netdev->name : "(null)");
+		if (netdev->name[0] != '\0')
 			execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
 			       lxcpath, lxcname, pidstr, "veth", netdev_link,
 			       netdev->name, (char *)NULL);
@@ -2112,11 +2112,6 @@ static int lxc_create_network_unpriv_exec(const char *lxcpath, char *lxcname,
 		return -1;
 	}
 
-	netdev->name = malloc(IFNAMSIZ + 1);
-	if (!netdev->name) {
-		SYSERROR("Failed to allocate memory");
-		return -1;
-	}
 	memset(netdev->name, 0, IFNAMSIZ + 1);
 	strncpy(netdev->name, token, IFNAMSIZ);
 
@@ -2212,7 +2207,7 @@ static int lxc_delete_network_unpriv_exec(const char *lxcpath, char *lxcname,
 			exit(EXIT_FAILURE);
 		}
 
-		if (!netdev->link) {
+		if (netdev->link[0] == '\0') {
 			SYSERROR("Network link for network device \"%s\" is "
 				 "missing", netdev->priv.veth_attr.veth1);
 			exit(EXIT_FAILURE);
@@ -2395,7 +2390,7 @@ int lxc_network_move_created_netdev_priv(const char *lxcpath, char *lxcname,
 		}
 
 		DEBUG("Moved network device \"%s\"/\"%s\" to network namespace "
-		      "of %d:", ifname, netdev->name ? netdev->name : "(null)",
+		      "of %d:", ifname, netdev->name[0] != '\0' ? netdev->name : "(null)",
 		      pid);
 	}
 
@@ -2482,18 +2477,18 @@ bool lxc_delete_network_priv(struct lxc_handler *handler)
 			INFO("Interface \"%s\" with index %d already "
 					"deleted or existing in different network "
 					"namespace",
-					netdev->name ? netdev->name : "(null)",
+					netdev->name[0] != '\0' ? netdev->name : "(null)",
 					netdev->ifindex);
 		} else if (ret < 0) {
 			deleted_all = false;
 			WARN("Failed to remove interface \"%s\" with "
 					"index %d: %s",
-					netdev->name ? netdev->name : "(null)",
+					netdev->name[0] != '\0' ? netdev->name : "(null)",
 					netdev->ifindex, strerror(-ret));
 			continue;
 		}
 		INFO("Removed interface \"%s\" with index %d",
-				netdev->name ? netdev->name : "(null)",
+				netdev->name[0] != '\0' ? netdev->name : "(null)",
 				netdev->ifindex);
 
 		if (netdev->type != LXC_NET_VETH)
@@ -2502,11 +2497,11 @@ bool lxc_delete_network_priv(struct lxc_handler *handler)
 		/* Explicitly delete host veth device to prevent lingering
 		 * devices. We had issues in LXD around this.
 		 */
-		if (netdev->priv.veth_attr.pair)
+		if (netdev->priv.veth_attr.pair[0] != '\0')
 			hostveth = netdev->priv.veth_attr.pair;
 		else
 			hostveth = netdev->priv.veth_attr.veth1;
-		if (*hostveth == '\0')
+		if (hostveth[0] == '\0')
 			continue;
 
 		ret = lxc_netdev_delete_by_name(hostveth);
@@ -2739,9 +2734,12 @@ static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
 	 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
 	 * netlink will replace the format specifier with an appropriate index.
 	 */
-	if (!netdev->name)
-		netdev->name = netdev->type == LXC_NET_PHYS ?
-			netdev->link : "eth%d";
+	if (netdev->name[0] == '\0') {
+		if (netdev->type == LXC_NET_PHYS)
+			strcpy(netdev->name, netdev->link);
+		else
+			strcpy(netdev->name, "eth%d");
+	}
 
 	/* rename the interface name */
 	if (strcmp(ifname, netdev->name) != 0) {
diff --git a/src/lxc/network.h b/src/lxc/network.h
index 3ef016642..331d98208 100644
--- a/src/lxc/network.h
+++ b/src/lxc/network.h
@@ -91,7 +91,7 @@ struct lxc_route6 {
  * @ifindex : Ifindex of the network device.
  */
 struct ifla_veth {
-	char *pair;
+	char pair[IFNAMSIZ];
 	char veth1[IFNAMSIZ];
 	int ifindex;
 };
@@ -151,8 +151,8 @@ struct lxc_netdev {
 	int ifindex;
 	int type;
 	int flags;
-	char *link;
-	char *name;
+	char link[IFNAMSIZ];
+	char name[IFNAMSIZ];
 	char *hwaddr;
 	char *mtu;
 	union netdev_p priv;
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 6232a7859..327c7cb95 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -882,13 +882,6 @@ static int read_unpriv_netifindex(struct lxc_list *network)
 		if (netdev->type != LXC_NET_VETH)
 			continue;
 
-		netdev->name = malloc(IFNAMSIZ);
-		if (!netdev->name) {
-			ERROR("Out of memory.");
-			close(netpipe);
-			return -1;
-		}
-
 		if (read(netpipe, netdev->name, IFNAMSIZ) != IFNAMSIZ) {
 			close(netpipe);
 			return -1;

From a840c5a4f8f12405cfe6a868ee8257db8fb6a461 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 1 Sep 2017 12:54:43 +0200
Subject: [PATCH 13/32] network: non-functional changes

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/network.c | 19 +++++++++++--------
 src/lxc/network.h | 13 +++++--------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/lxc/network.c b/src/lxc/network.c
index a4476ccb2..242e1d39d 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -1889,7 +1889,10 @@ char *lxc_mkifname(const char *template)
 	struct ifaddrs *ifaddr, *ifa;
 	int ifexists = 0;
 
-	/* Get all the network interfaces */
+	if (strlen(template) >= IFNAMSIZ)
+		return NULL;
+
+	/* Get all the network interfaces. */
 	getifaddrs(&ifaddr);
 
 	/* Initialize the random number generator */
@@ -2361,7 +2364,7 @@ int lxc_create_network_priv(struct lxc_handler *handler)
 int lxc_network_move_created_netdev_priv(const char *lxcpath, char *lxcname,
 					 struct lxc_list *network, pid_t pid)
 {
-	int err;
+	int ret;
 	char ifname[IFNAMSIZ];
 	struct lxc_list *iterator;
 
@@ -2381,16 +2384,17 @@ int lxc_network_move_created_netdev_priv(const char *lxcpath, char *lxcname,
 			return -1;
 		}
 
-		err = lxc_netdev_move_by_name(ifname, pid, NULL);
-		if (err) {
+		ret = lxc_netdev_move_by_name(ifname, pid, NULL);
+		if (ret) {
 			ERROR("Failed to move network device \"%s\" to "
 			      "network namespace %d: %s", ifname, pid,
-			      strerror(-err));
+			      strerror(-ret));
 			return -1;
 		}
 
 		DEBUG("Moved network device \"%s\"/\"%s\" to network namespace "
-		      "of %d:", ifname, netdev->name[0] != '\0' ? netdev->name : "(null)",
+		      "of %d:",
+		      ifname, netdev->name[0] != '\0' ? netdev->name : "(null)",
 		      pid);
 	}
 
@@ -2705,8 +2709,7 @@ static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
 		if (netdev->type != LXC_NET_VETH) {
 			net_type_name = lxc_net_type_to_str(netdev->type);
 			ERROR("%s networks are not supported for containers "
-			      "not setup up by privileged users",
-			      net_type_name);
+			      "not setup up by privileged users", net_type_name);
 			return -1;
 		}
 
diff --git a/src/lxc/network.h b/src/lxc/network.h
index 331d98208..3f49a13d8 100644
--- a/src/lxc/network.h
+++ b/src/lxc/network.h
@@ -174,15 +174,12 @@ struct saved_nic {
 /* Convert a string mac address to a socket structure. */
 extern int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr);
 
-/*
- * Move a device between namespaces
- */
-extern int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char* ifname);
-extern int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname);
+/* Move a device between namespaces. */
+extern int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname);
+extern int lxc_netdev_move_by_name(const char *ifname, pid_t pid,
+				   const char *newname);
 
-/*
- * Delete a network device
- */
+/* Delete a network device. */
 extern int lxc_netdev_delete_by_name(const char *name);
 extern int lxc_netdev_delete_by_index(int ifindex);
 

From dce32a4a7403775c62a5dbbd4ce733c4b5a565c9 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 1 Sep 2017 13:04:00 +0200
Subject: [PATCH 14/32] start: non-functional changes

This renames the socketpair() variable "ttysock" to "data_sock" since we will
use it to send arbitrary data around, not just ttys anymore.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/conf.c  |  6 +++---
 src/lxc/start.c | 15 ++++++++-------
 src/lxc/start.h | 14 +++++++++-----
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index d92937ce7..52ada2ffa 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -3010,7 +3010,7 @@ static int lxc_send_ttys_to_parent(struct lxc_handler *handler)
 	struct lxc_pty_info *pty_info;
 	struct lxc_conf *conf = handler->conf;
 	const struct lxc_tty_info *tty_info = &conf->tty_info;
-	int sock = handler->ttysock[0];
+	int sock = handler->data_sock[0];
 	int ret = -1;
 	size_t num_ttyfds = (2 * conf->tty);
 
@@ -3034,8 +3034,8 @@ static int lxc_send_ttys_to_parent(struct lxc_handler *handler)
 	else
 		TRACE("sent %d ttys to parent", conf->tty);
 
-	close(handler->ttysock[0]);
-	close(handler->ttysock[1]);
+	close(handler->data_sock[0]);
+	close(handler->data_sock[1]);
 
 	for (i = 0; i < num_ttyfds; i++)
 		close(ttyfds[i]);
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 327c7cb95..529f67c27 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -532,7 +532,7 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
 
 	memset(handler, 0, sizeof(*handler));
 
-	handler->ttysock[0] = handler->ttysock[1] = -1;
+	handler->data_sock[0] = handler->data_sock[1] = -1;
 	handler->conf = conf;
 	handler->lxcpath = lxcpath;
 	handler->pinfd = -1;
@@ -756,9 +756,9 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
 		free(cur);
 	}
 
-	if (handler->ttysock[0] != -1) {
-		close(handler->ttysock[0]);
-		close(handler->ttysock[1]);
+	if (handler->data_sock[0] != -1) {
+		close(handler->data_sock[0]);
+		close(handler->data_sock[1]);
 	}
 
 	if (handler->conf->ephemeral == 1 && handler->conf->reboot != 1)
@@ -1193,7 +1193,7 @@ static int lxc_recv_ttys_from_child(struct lxc_handler *handler)
 	int *ttyfds;
 	struct lxc_pty_info *pty_info;
 	int ret = -1;
-	int sock = handler->ttysock[1];
+	int sock = handler->data_sock[1];
 	struct lxc_conf *conf = handler->conf;
 	struct lxc_tty_info *tty_info = &conf->tty_info;
 	size_t num_ttyfds = (2 * conf->tty);
@@ -1266,7 +1266,7 @@ void resolve_clone_flags(struct lxc_handler *handler)
  */
 static int lxc_spawn(struct lxc_handler *handler)
 {
-	int i, flags, nveths;
+	int i, flags, nveths, ret;
 	const char *name = handler->name;
 	bool wants_to_map_ids;
 	int netpipepair[2], saved_ns_fd[LXC_NS_MAX];
@@ -1285,7 +1285,8 @@ static int lxc_spawn(struct lxc_handler *handler)
 	if (lxc_sync_init(handler))
 		return -1;
 
-	if (socketpair(AF_UNIX, SOCK_DGRAM, 0, handler->ttysock) < 0) {
+	ret = socketpair(AF_UNIX, SOCK_DGRAM, 0, handler->data_sock);
+	if (ret < 0) {
 		lxc_sync_fini(handler);
 		return -1;
 	}
diff --git a/src/lxc/start.h b/src/lxc/start.h
index d8d06cfbf..272abb076 100644
--- a/src/lxc/start.h
+++ b/src/lxc/start.h
@@ -48,13 +48,17 @@ struct lxc_handler {
 	int pinfd;
 	const char *lxcpath;
 	void *cgroup_data;
-	int ttysock[2]; // socketpair for child->parent tty fd passing
-	bool backgrounded; // indicates whether should we close std{in,out,err} on start
+
+	/* Abstract unix domain SOCK_DGRAM socketpair to pass arbitrary data
+	 * between child and parent.
+	 */
+	int data_sock[2];
+
+	/* indicates whether should we close std{in,out,err} on start */
+	bool backgrounded;
 	int nsfd[LXC_NS_MAX];
 	int netnsfd;
-	/* The socketpair() fds used to wait on successful daemonized
-	 * startup.
-	 */
+	/* The socketpair() fds used to wait on successful daemonized startup. */
 	int state_socket_pair[2];
 	struct lxc_list state_clients;
 };

From 3c8aec6383be2c28d0f883351bdf522139226c9e Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 1 Sep 2017 15:30:28 +0200
Subject: [PATCH 15/32] network: retrieve correct names and ifindices

On privileged network creation we only retrieved the names and ifindeces of
network devices in the host's network namespace. This meant that the monitor
process was acting on possibly incorrect information. With this commit we have
the child send back the correct device names and ifindeces in the container's
network namespace.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/conf.c    | 42 ++++++++++++++++++++++++++++
 src/lxc/network.c | 20 +++++++++++++-
 src/lxc/network.h | 11 +++++++-
 src/lxc/start.c   | 82 ++++++++++++++++++++++++++++++++++++++++++++-----------
 src/lxc/start.h   |  1 +
 5 files changed, 138 insertions(+), 18 deletions(-)

diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 52ada2ffa..268c73e4f 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -3045,6 +3045,43 @@ static int lxc_send_ttys_to_parent(struct lxc_handler *handler)
 	return ret;
 }
 
+static int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
+{
+	struct lxc_list *iterator, *network;
+	int data_sock = handler->data_sock[0];
+
+	if (!handler->root)
+		return 0;
+
+	network = &handler->conf->network;
+	lxc_list_for_each(iterator, network) {
+		int ret;
+		struct lxc_netdev *netdev = iterator->elem;
+
+		/* Send network device name in the child's namespace to parent. */
+		ret = lxc_abstract_unix_send_credential(data_sock, netdev->name,
+							IFNAMSIZ);
+		if (ret < 0)
+			goto on_error;
+
+		/* Send network device ifindex in the child's namespace to
+		 * parent.
+		 */
+		ret = lxc_abstract_unix_send_credential(data_sock, &netdev->ifindex,
+							sizeof(netdev->ifindex));
+		if (ret < 0)
+			goto on_error;
+	}
+
+	TRACE("Sent network device names and ifindeces to parent");
+	return 0;
+
+on_error:
+	close(handler->data_sock[0]);
+	close(handler->data_sock[1]);
+	return -1;
+}
+
 int lxc_setup(struct lxc_handler *handler)
 {
 	const char *name = handler->name;
@@ -3068,6 +3105,11 @@ int lxc_setup(struct lxc_handler *handler)
 		return -1;
 	}
 
+	if (lxc_network_send_name_and_ifindex_to_parent(handler) < 0) {
+		ERROR("Failed to network device names and ifindices to parent");
+		return -1;
+	}
+
 	if (lxc_conf->autodev > 0) {
 		if (mount_autodev(name, &lxc_conf->rootfs, lxcpath)) {
 			ERROR("failed to mount /dev in the container");
diff --git a/src/lxc/network.c b/src/lxc/network.c
index 242e1d39d..c965653df 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -330,12 +330,24 @@ static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netd
 		return -1;
 	}
 
+	/* Note that we're retrieving the container's ifindex in the host's
+	 * network namespace because we need it to move the device from the
+	 * host's network namespace to the container's network namespace later
+	 * on.
+	 * Note that netdev->link will contain the name of the physical network
+	 * device in the host's namespace.
+	 */
 	netdev->ifindex = if_nametoindex(netdev->link);
 	if (!netdev->ifindex) {
 		ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
 		return -1;
 	}
 
+	/* Store the ifindex of the host's network device in the host's
+	 * namespace.
+	 */
+	netdev->priv.phys_attr.ifindex = netdev->ifindex;
+
 	if (netdev->upscript) {
 		int err;
 		err = run_script(handler->name, "net", netdev->upscript,
@@ -2393,7 +2405,7 @@ int lxc_network_move_created_netdev_priv(const char *lxcpath, char *lxcname,
 		}
 
 		DEBUG("Moved network device \"%s\"/\"%s\" to network namespace "
-		      "of %d:",
+		      "of %d",
 		      ifname, netdev->name[0] != '\0' ? netdev->name : "(null)",
 		      pid);
 	}
@@ -2763,6 +2775,12 @@ static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
 		return -1;
 	}
 
+	/* Now update the recorded name of the network device to reflect the
+	 * name of the network device in the child's network namespace. We will
+	 * later on send this information back to the parent.
+	 */
+	strcpy(netdev->name, current_ifname);
+
 	/* set a mac address */
 	if (netdev->hwaddr) {
 		if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
diff --git a/src/lxc/network.h b/src/lxc/network.h
index 3f49a13d8..f80ea15cb 100644
--- a/src/lxc/network.h
+++ b/src/lxc/network.h
@@ -107,10 +107,19 @@ struct ifla_macvlan {
 	int mode; /* private, vepa, bridge, passthru */
 };
 
+/* Contains information about the physical network device as seen from the host.
+ * @ifindex : The ifindex of the physical network device in the host's network
+ *            namespace.
+ */
+struct ifla_phys {
+	int ifindex;
+};
+
 union netdev_p {
+	struct ifla_macvlan macvlan_attr;
+	struct ifla_phys phys_attr;
 	struct ifla_veth veth_attr;
 	struct ifla_vlan vlan_attr;
-	struct ifla_macvlan macvlan_attr;
 };
 
 /*
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 529f67c27..616651187 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -82,7 +82,7 @@
 lxc_log_define(lxc_start, lxc);
 
 extern void mod_all_rdeps(struct lxc_container *c, bool inc);
-static bool do_destroy_container(struct lxc_conf *conf);
+static bool do_destroy_container(struct lxc_handler *handler);
 static int lxc_rmdir_onedev_wrapper(void *data);
 static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
 					    const char *name);
@@ -532,6 +532,11 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
 
 	memset(handler, 0, sizeof(*handler));
 
+	/* Note that am_unpriv() checks the effective uid. We probably don't
+	 * care if we are real root only if we are running as root so this
+	 * should be fine.
+	 */
+	handler->root = !am_unpriv();
 	handler->data_sock[0] = handler->data_sock[1] = -1;
 	handler->conf = conf;
 	handler->lxcpath = lxcpath;
@@ -1187,6 +1192,43 @@ static int save_phys_nics(struct lxc_conf *conf)
 	return 0;
 }
 
+static int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
+{
+	struct lxc_list *iterator, *network;
+	int data_sock = handler->data_sock[1];
+
+	if (!handler->root)
+		return 0;
+
+	network = &handler->conf->network;
+	lxc_list_for_each(iterator, network) {
+		int ret;
+		struct lxc_netdev *netdev = iterator->elem;
+
+		/* Receive network device name in the child's namespace to
+		 * parent.
+		 */
+		ret = lxc_abstract_unix_rcv_credential(data_sock, netdev->name, IFNAMSIZ);
+		if (ret < 0)
+			goto on_error;
+
+		/* Receive network device ifindex in the child's namespace to
+		 * parent.
+		 */
+		ret = lxc_abstract_unix_rcv_credential(data_sock, &netdev->ifindex,
+						       sizeof(netdev->ifindex));
+		if (ret < 0)
+			goto on_error;
+	}
+
+	return 0;
+
+on_error:
+	close(handler->data_sock[0]);
+	close(handler->data_sock[1]);
+	return -1;
+}
+
 static int lxc_recv_ttys_from_child(struct lxc_handler *handler)
 {
 	int i;
@@ -1285,7 +1327,8 @@ static int lxc_spawn(struct lxc_handler *handler)
 	if (lxc_sync_init(handler))
 		return -1;
 
-	ret = socketpair(AF_UNIX, SOCK_DGRAM, 0, handler->data_sock);
+	ret = socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0,
+			 handler->data_sock);
 	if (ret < 0) {
 		lxc_sync_fini(handler);
 		return -1;
@@ -1351,7 +1394,7 @@ static int lxc_spawn(struct lxc_handler *handler)
 	if (attach_ns(handler->conf->inherit_ns_fd) < 0)
 		goto out_delete_net;
 
-	if (am_unpriv() && (nveths = count_veths(&handler->conf->network))) {
+	if (!handler->root && (nveths = count_veths(&handler->conf->network))) {
 		if (pipe(netpipepair) < 0) {
 			SYSERROR("Failed to create pipe.");
 			goto out_delete_net;
@@ -1449,12 +1492,6 @@ static int lxc_spawn(struct lxc_handler *handler)
 			ERROR("Failed to create the configured network.");
 			goto out_delete_net;
 		}
-
-		/* Now all networks are created and moved into place. The
-		 * corresponding structs have now all been filled. So log them
-		 * for debugging purposes.
-		 */
-		lxc_log_configured_netdevs(handler->conf);
 	}
 
 	if (netpipe != -1) {
@@ -1501,6 +1538,19 @@ static int lxc_spawn(struct lxc_handler *handler)
 	if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CGROUP))
 		return -1;
 
+	if (lxc_network_recv_name_and_ifindex_from_child(handler) < 0) {
+		ERROR("Failed to receive names and ifindices for network "
+		      "devices from child");
+		goto out_delete_net;
+	}
+
+	/* Now all networks are created, network devices are moved into place,
+	 * and the correct names and ifindeces in the respective namespaces have
+	 * been recorded. The corresponding structs have now all been filled. So
+	 * log them for debugging purposes.
+	 */
+	lxc_log_configured_netdevs(handler->conf);
+
 	/* Read tty fds allocated by child. */
 	if (lxc_recv_ttys_from_child(handler) < 0) {
 		ERROR("Failed to receive tty info from child process.");
@@ -1722,7 +1772,7 @@ static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
 	int ret = 0;
 	struct lxc_container *c;
 	if (handler->conf->rootfs.path && handler->conf->rootfs.mount) {
-		bret = do_destroy_container(handler->conf);
+		bret = do_destroy_container(handler);
 		if (!bret) {
 			ERROR("Error destroying rootfs for container \"%s\".", name);
 			return;
@@ -1748,7 +1798,7 @@ static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
 		}
 	}
 
-	if (am_unpriv())
+	if (!handler->root)
 		ret = userns_exec_1(handler->conf, lxc_rmdir_onedev_wrapper,
 				    destroy, "lxc_rmdir_onedev_wrapper");
 	else
@@ -1767,14 +1817,14 @@ static int lxc_rmdir_onedev_wrapper(void *data)
 	return lxc_rmdir_onedev(arg, NULL);
 }
 
-static bool do_destroy_container(struct lxc_conf *conf) {
-	if (am_unpriv()) {
-		if (userns_exec_1(conf, storage_destroy_wrapper, conf,
-				  "storage_destroy_wrapper") < 0)
+static bool do_destroy_container(struct lxc_handler *handler) {
+	if (!handler->root) {
+		if (userns_exec_1(handler->conf, storage_destroy_wrapper,
+				  handler->conf, "storage_destroy_wrapper") < 0)
 			return false;
 
 		return true;
 	}
 
-	return storage_destroy(conf);
+	return storage_destroy(handler->conf);
 }
diff --git a/src/lxc/start.h b/src/lxc/start.h
index 272abb076..99cadd851 100644
--- a/src/lxc/start.h
+++ b/src/lxc/start.h
@@ -35,6 +35,7 @@
 #include "namespace.h"
 
 struct lxc_handler {
+	bool root;
 	pid_t pid;
 	char *name;
 	lxc_state_t state;

From d3c66b20cae5bcd13e0afcbabdb4b9385e8699ff Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 1 Sep 2017 16:44:46 +0200
Subject: [PATCH 16/32] network: stop recording saved physical net devices

liblxc will now correctly log any network device names and ifindeces in their
respective network namespaces. So there's no need to record physical network
devices any more. This spares us heap allocations and memory we need to have
lying around til the container is shutdown.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/conf.c          | 12 --------
 src/lxc/conf.h          |  2 --
 src/lxc/confile_utils.c |  4 +++
 src/lxc/network.c       | 75 +++++++++++++++++++++++++++++--------------------
 src/lxc/network.h       |  7 +----
 src/lxc/start.c         | 41 +++------------------------
 6 files changed, 54 insertions(+), 87 deletions(-)

diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 268c73e4f..b1d35e98a 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -3511,17 +3511,6 @@ int lxc_clear_hooks(struct lxc_conf *c, const char *key)
 	return 0;
 }
 
-static void lxc_clear_saved_nics(struct lxc_conf *conf)
-{
-	int i;
-
-	if (!conf->saved_nics)
-		return;
-	for (i=0; i < conf->num_savednics; i++)
-		free(conf->saved_nics[i].orig_name);
-	free(conf->saved_nics);
-}
-
 static inline void lxc_clear_aliens(struct lxc_conf *conf)
 {
 	struct lxc_list *it,*next;
@@ -3575,7 +3564,6 @@ void lxc_conf_free(struct lxc_conf *conf)
 	lxc_clear_cgroups(conf, "lxc.cgroup");
 	lxc_clear_hooks(conf, "lxc.hook");
 	lxc_clear_mount_entries(conf);
-	lxc_clear_saved_nics(conf);
 	lxc_clear_idmaps(conf);
 	lxc_clear_groups(conf);
 	lxc_clear_includes(conf);
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index 43e836fa6..3aaf51440 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -211,8 +211,6 @@ struct lxc_conf {
 	struct lxc_list cgroup;
 	struct lxc_list id_map;
 	struct lxc_list network;
-	struct saved_nic *saved_nics;
-	int num_savednics;
 	int auto_mounts;
 	struct lxc_list mount_list;
 	struct lxc_list caps;
diff --git a/src/lxc/confile_utils.c b/src/lxc/confile_utils.c
index 8b7b3a5b4..dd3548e5b 100644
--- a/src/lxc/confile_utils.c
+++ b/src/lxc/confile_utils.c
@@ -278,6 +278,10 @@ void lxc_log_configured_netdevs(const struct lxc_conf *conf)
 			break;
 		case LXC_NET_PHYS:
 			TRACE("type: phys");
+			if (netdev->priv.phys_attr.ifindex > 0) {
+				TRACE("host side ifindex for phys device: %d",
+				      netdev->priv.phys_attr.ifindex);
+			}
 			break;
 		case LXC_NET_EMPTY:
 			TRACE("type: empty");
diff --git a/src/lxc/network.c b/src/lxc/network.c
index c965653df..f52a139b4 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -2277,7 +2277,7 @@ bool lxc_delete_network_unpriv(struct lxc_handler *handler)
 	char netns_path[6 + LXC_NUMSTRLEN64 + 4 + LXC_NUMSTRLEN64 + 1];
 	bool deleted_all = true;
 
-	if (!am_unpriv())
+	if (handler->root)
 		return true;
 
 	*netns_path = '\0';
@@ -2346,13 +2346,10 @@ bool lxc_delete_network_unpriv(struct lxc_handler *handler)
 
 int lxc_create_network_priv(struct lxc_handler *handler)
 {
-	bool am_root;
 	struct lxc_list *iterator;
 	struct lxc_list *network = &handler->conf->network;
 
-	/* We need to be root. */
-	am_root = (getuid() == 0);
-	if (!am_root)
+	if (!handler->root)
 		return 0;
 
 	lxc_list_for_each(iterator, network) {
@@ -2454,7 +2451,7 @@ bool lxc_delete_network_priv(struct lxc_handler *handler)
 	struct lxc_list *network = &handler->conf->network;
 	bool deleted_all = true;
 
-	if (am_unpriv())
+	if (!handler->root)
 		return true;
 
 	lxc_list_for_each(iterator, network) {
@@ -2471,12 +2468,12 @@ bool lxc_delete_network_priv(struct lxc_handler *handler)
 			ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
 			if (ret < 0)
 				WARN("Failed to rename interface with index %d "
-				     "to its initial name \"%s\"",
-				     netdev->ifindex, netdev->link);
+				     "from \"%s\" to its initial name \"%s\"",
+				     netdev->ifindex, netdev->name, netdev->link);
 			else
 				TRACE("Renamed interface with index %d to its "
-				      "initial name \"%s\"",
-				      netdev->ifindex, netdev->link);
+				      "from \"%s\" to its initial name \"%s\"",
+				      netdev->ifindex, netdev->name, netdev->link);
 			continue;
 		}
 
@@ -2572,51 +2569,69 @@ int lxc_requests_empty_network(struct lxc_handler *handler)
 }
 
 /* try to move physical nics to the init netns */
-void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf)
+int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
 {
 	int ret;
-	int i, oldfd;
+	int oldfd;
 	char ifname[IFNAMSIZ];
+	struct lxc_list *iterator;
+	int netnsfd = handler->netnsfd;
+	struct lxc_conf *conf = handler->conf;
 
-	if (netnsfd < 0 || conf->num_savednics == 0)
-		return;
+	/* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
+	 * the parent network namespace. We won't have this capability if we are
+	 * unprivileged.
+	 */
+	if (!handler->root)
+		return 0;
 
-	INFO("Trying to restore network device names in original namespace for "
-	     "%d network devices", conf->num_savednics);
+	TRACE("Moving physical network devices back to parent network namespace");
 
 	oldfd = lxc_preserve_ns(getpid(), "net");
 	if (oldfd < 0) {
 		SYSERROR("Failed to preserve network namespace");
-		return;
+		return -1;
 	}
 
-	ret = setns(netnsfd, 0);
+	ret = setns(netnsfd, CLONE_NEWNET);
 	if (ret < 0) {
 		SYSERROR("Failed to enter network namespace");
 		close(oldfd);
-		return;
+		return -1;
 	}
 
-	for (i = 0; i < conf->num_savednics; i++) {
-		struct saved_nic *s = &conf->saved_nics[i];
+	lxc_list_for_each(iterator, &conf->network) {
+		struct lxc_netdev *netdev = iterator->elem;
 
-		/* retrieve the name of the interface */
-		if (!if_indextoname(s->ifindex, ifname)) {
+		if (netdev->type != LXC_NET_PHYS)
+			continue;
+
+		/* Retrieve the name of the interface in the container's network
+		 * namespace.
+		 */
+		if (!if_indextoname(netdev->ifindex, ifname)) {
 			WARN("No interface corresponding to ifindex %d",
-			     s->ifindex);
+			     netdev->ifindex);
 			continue;
 		}
-		if (lxc_netdev_move_by_name(ifname, 1, s->orig_name))
+
+		ret = lxc_netdev_move_by_name(ifname, 1, netdev->link);
+		if (ret < 0)
 			WARN("Error moving network device \"%s\" back to "
 			     "network namespace", ifname);
-		free(s->orig_name);
+		else
+			TRACE("Moved network device \"%s\" back to network "
+			      "namespace", ifname);
 	}
-	conf->num_savednics = 0;
 
-	ret = setns(oldfd, 0);
-	if (ret < 0)
-		SYSERROR("Failed to enter network namespace");
+	ret = setns(oldfd, CLONE_NEWNET);
 	close(oldfd);
+	if (ret < 0) {
+		SYSERROR("Failed to enter network namespace");
+		return -1;
+	}
+
+	return 0;
 }
 
 static int setup_hw_addr(char *hwaddr, const char *ifname)
diff --git a/src/lxc/network.h b/src/lxc/network.h
index f80ea15cb..9badf14b8 100644
--- a/src/lxc/network.h
+++ b/src/lxc/network.h
@@ -175,11 +175,6 @@ struct lxc_netdev {
 	char *downscript;
 };
 
-struct saved_nic {
-	int ifindex;
-	char *orig_name;
-};
-
 /* Convert a string mac address to a socket structure. */
 extern int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr);
 
@@ -307,7 +302,7 @@ extern int lxc_find_gateway_addresses(struct lxc_handler *handler);
 extern int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
 				     struct lxc_list *network, pid_t pid);
 extern int lxc_requests_empty_network(struct lxc_handler *handler);
-extern void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf);
+extern int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler);
 extern int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
 						 struct lxc_list *network);
 
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 616651187..116b1ebd8 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -1162,36 +1162,6 @@ static int do_start(void *data)
 	return -1;
 }
 
-static int save_phys_nics(struct lxc_conf *conf)
-{
-	struct lxc_list *iterator;
-	int am_root = (getuid() == 0);
-
-	if (!am_root)
-		return 0;
-
-	lxc_list_for_each(iterator, &conf->network) {
-		struct lxc_netdev *netdev = iterator->elem;
-
-		if (netdev->type != LXC_NET_PHYS)
-			continue;
-		conf->saved_nics = realloc(conf->saved_nics,
-				(conf->num_savednics+1)*sizeof(struct saved_nic));
-		if (!conf->saved_nics)
-			return -1;
-		conf->saved_nics[conf->num_savednics].ifindex = netdev->ifindex;
-		conf->saved_nics[conf->num_savednics].orig_name = strdup(netdev->link);
-		if (!conf->saved_nics[conf->num_savednics].orig_name)
-			return -1;
-		INFO("Stored saved_nic #%d idx %d name %s.", conf->num_savednics,
-			conf->saved_nics[conf->num_savednics].ifindex,
-			conf->saved_nics[conf->num_savednics].orig_name);
-		conf->num_savednics++;
-	}
-
-	return 0;
-}
-
 static int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
 {
 	struct lxc_list *iterator, *network;
@@ -1359,11 +1329,6 @@ static int lxc_spawn(struct lxc_handler *handler)
 				return -1;
 			}
 		}
-
-		if (save_phys_nics(handler->conf)) {
-			ERROR("Failed to save physical nic info.");
-			goto out_abort;
-		}
 	}
 
 	if (!cgroup_init(handler)) {
@@ -1689,8 +1654,10 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
 		}
 	}
 
-	DEBUG("Pushing physical nics back to host namespace");
-	lxc_restore_phys_nics_to_netns(handler->netnsfd, handler->conf);
+	err = lxc_restore_phys_nics_to_netns(handler);
+	if (err < 0)
+		ERROR("Failed to move physical network devices back to parent "
+		      "network namespace");
 
 	if (handler->pinfd >= 0) {
 		close(handler->pinfd);

From 604c11cfcc7417eb72a9da42b63c1f18439c65e9 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 1 Sep 2017 19:34:43 +0200
Subject: [PATCH 17/32] network: use correct network device name

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/network.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/lxc/network.c b/src/lxc/network.c
index f52a139b4..680588508 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -2204,6 +2204,7 @@ static int lxc_delete_network_unpriv_exec(const char *lxcpath, char *lxcname,
 	}
 
 	if (child == 0) {
+		char *hostveth;
 		int ret;
 
 		close(pipefd[0]);
@@ -2217,7 +2218,11 @@ static int lxc_delete_network_unpriv_exec(const char *lxcpath, char *lxcname,
 			exit(EXIT_FAILURE);
 		}
 
-		if (netdev->priv.veth_attr.veth1[0] == '\0') {
+		if (netdev->priv.veth_attr.pair[0] != '\0')
+			hostveth = netdev->priv.veth_attr.pair;
+		else
+			hostveth = netdev->priv.veth_attr.veth1;
+		if (hostveth[0] == '\0') {
 			SYSERROR("Host side veth device name is missing");
 			exit(EXIT_FAILURE);
 		}
@@ -2229,10 +2234,10 @@ static int lxc_delete_network_unpriv_exec(const char *lxcpath, char *lxcname,
 		}
 
 		INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
-		     lxcname, netns_path, netdev->link, netdev->priv.veth_attr.veth1);
+		     lxcname, netns_path, netdev->link, hostveth);
 		execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
-		       lxcname, netns_path, "veth", netdev->link,
-		       netdev->priv.veth_attr.veth1, (char *)NULL);
+		       lxcname, netns_path, "veth", netdev->link, hostveth,
+		       (char *)NULL);
 		SYSERROR("Failed to exec lxc-user-nic.");
 		exit(EXIT_FAILURE);
 	}
@@ -2327,14 +2332,20 @@ bool lxc_delete_network_unpriv(struct lxc_handler *handler)
 		if (!is_ovs_bridge(netdev->link))
 			continue;
 
+		if (netdev->priv.veth_attr.pair[0] != '\0')
+			hostveth = netdev->priv.veth_attr.pair;
+		else
+			hostveth = netdev->priv.veth_attr.veth1;
+		if (hostveth[0] == '\0')
+			continue;
+
 		ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
 						     handler->name, netdev,
 						     netns_path);
 		if (ret < 0) {
 			deleted_all = false;
 			WARN("Failed to remove port \"%s\" from openvswitch "
-			     "bridge \"%s\"",
-			     netdev->priv.veth_attr.veth1, netdev->link);
+			     "bridge \"%s\"", hostveth, netdev->link);
 			continue;
 		}
 		INFO("Removed interface \"%s\" from \"%s\"", hostveth,

From a02ea680da28f8fe6c4f3bf93e461d7a8dcbffe6 Mon Sep 17 00:00:00 2001
From: Dimitri John Ledkov <xnox at ubuntu.com>
Date: Wed, 30 Aug 2017 13:45:27 +0100
Subject: [PATCH 18/32] templates/ubuntu: support netplan in newer releases by
 default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If netplan is present in the container, configure default networking
with neplan instead of ifupdown. Also, do not install ifupdown when
boostrapping minbase variant, unless using currently support
non-netplan releases (trusty, zenial, zesty).

Signed-off-by: Dimitri John Ledkov <xnox at ubuntu.com>
Acked-by: Christian Brauner <christian.brauner at ubuntu.com>
Acked-by: Stéphane Graber <stgraber at ubuntu.com>
---
 templates/lxc-ubuntu.in | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/templates/lxc-ubuntu.in b/templates/lxc-ubuntu.in
index 226427d9e..dff6836b4 100644
--- a/templates/lxc-ubuntu.in
+++ b/templates/lxc-ubuntu.in
@@ -92,7 +92,15 @@ configure_ubuntu()
     password=$5
 
     # configure the network using the dhcp
-    cat <<EOF > $rootfs/etc/network/interfaces
+    if [ -d $rootfs/etc/netplan ]; then
+        cat <<EOF > $rootfs/etc/netplan/10-lxc.yaml
+network:
+  ethernets:
+    eth0: {dhcp4: true}
+  version: 2
+EOF
+    else
+        cat <<EOF > $rootfs/etc/network/interfaces
 # This file describes the network interfaces available on your system
 # and how to activate them. For more information, see interfaces(5).
 
@@ -103,6 +111,7 @@ iface lo inet loopback
 auto eth0
 iface eth0 inet dhcp
 EOF
+    fi
 
     # set the hostname
     cat <<EOF > $rootfs/etc/hostname
@@ -366,7 +375,13 @@ download_ubuntu()
         debootstrap_parameters="$debootstrap_parameters --variant=$variant"
     fi
     if [ "$variant" = 'minbase' ]; then
-        packages_template="${packages_template},sudo,ifupdown,isc-dhcp-client"
+        packages_template="${packages_template},sudo"
+        # Newer releases use netplan, EOL releases not supported
+        case $release in
+          trusty|xenial|zesty)
+                packages_template="${packages_template},ifupdown,isc-dhcp-client"
+                ;;
+        esac
     fi
 
     echo "Installing packages in template: ${packages_template}"

From 0f4fc8edd3e4e4bd3a4d15ea9e93bc5750522031 Mon Sep 17 00:00:00 2001
From: Dimitri John Ledkov <xnox at ubuntu.com>
Date: Thu, 31 Aug 2017 12:40:58 +0100
Subject: [PATCH 19/32] Check that there is netplan binary, rather than just
 just a config directory.

Signed-off-by: Dimitri John Ledkov <xnox at ubuntu.com>
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 templates/lxc-ubuntu.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/templates/lxc-ubuntu.in b/templates/lxc-ubuntu.in
index dff6836b4..0e146dea3 100644
--- a/templates/lxc-ubuntu.in
+++ b/templates/lxc-ubuntu.in
@@ -92,7 +92,7 @@ configure_ubuntu()
     password=$5
 
     # configure the network using the dhcp
-    if [ -d $rootfs/etc/netplan ]; then
+    if chroot $rootfs which netplan >/dev/null 2>&1; then
         cat <<EOF > $rootfs/etc/netplan/10-lxc.yaml
 network:
   ethernets:

From 2e67a1c92c39f67e920493d79dba5ec0b9a4b2d5 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Mon, 31 Jul 2017 23:04:54 +0200
Subject: [PATCH 20/32] start: remove utmp watch

Closes #1616.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/Makefile.am |   1 -
 src/lxc/conf.h      |   1 -
 src/lxc/lxcutmp.c   | 489 ----------------------------------------------------
 src/lxc/lxcutmp.h   |  34 ----
 src/lxc/start.c     | 102 -----------
 5 files changed, 627 deletions(-)
 delete mode 100644 src/lxc/lxcutmp.c
 delete mode 100644 src/lxc/lxcutmp.h

diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am
index 0a4f34676..af9b21528 100644
--- a/src/lxc/Makefile.am
+++ b/src/lxc/Makefile.am
@@ -122,7 +122,6 @@ liblxc_la_SOURCES = \
 	mainloop.c mainloop.h \
 	af_unix.c af_unix.h \
 	\
-	lxcutmp.c lxcutmp.h \
 	lxclock.h lxclock.c \
 	lxccontainer.c lxccontainer.h \
 	version.h \
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index 3aaf51440..34bb8af99 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -205,7 +205,6 @@ struct lxc_conf {
 	unsigned int tty;
 	unsigned int pts;
 	int reboot;
-	int need_utmp_watch;
 	signed long personality;
 	struct utsname *utsname;
 	struct lxc_list cgroup;
diff --git a/src/lxc/lxcutmp.c b/src/lxc/lxcutmp.c
deleted file mode 100644
index ba65654ad..000000000
--- a/src/lxc/lxcutmp.c
+++ /dev/null
@@ -1,489 +0,0 @@
-/*
- * lxc: linux Container library
- *
- * (C) Copyright IBM Corp. 2007, 2008
- *
- * Authors:
- * Daniel Lezcano <daniel.lezcano at free.fr>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "config.h"
-
-#include <stdio.h>
-#include <unistd.h>
-#include <errno.h>
-#include <signal.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <sys/inotify.h>
-#include <sys/ioctl.h>
-#ifdef HAVE_SYS_TIMERFD_H
-#include <sys/timerfd.h>
-#else
-#include <sys/syscall.h>
-#ifndef TFD_NONBLOCK
-#define TFD_NONBLOCK O_NONBLOCK
-#endif
-
-#ifndef TFD_CLOEXEC
-#define TFD_CLOEXEC O_CLOEXEC
-#endif
-static int timerfd_create (clockid_t __clock_id, int __flags) {
-	return syscall(__NR_timerfd_create, __clock_id, __flags);
-}
-
-static int timerfd_settime (int __ufd, int __flags,
-			    const struct itimerspec *__utmr,
-			    struct itimerspec *__otmr) {
-
-	return syscall(__NR_timerfd_settime, __ufd, __flags,
-			    __utmr, __otmr);
-}
-
-#endif
-
-#include "conf.h"
-#include "cgroup.h"
-#include "start.h"
-#include "mainloop.h"
-#include "lxc.h"
-#include "log.h"
-
-#ifndef __USE_GNU
-#define __USE_GNU
-#endif
-#ifdef HAVE_UTMPX_H
-#include <utmpx.h>
-#ifndef HAVE_UTMPXNAME
-#include <utmp.h>
-#endif
-
-#else
-#include <utmp.h>
-
-#ifndef RUN_LVL
-#define RUN_LVL 1
-#endif
-
-static void setutxent(void) {
-	return setutent();
-}
-
-static struct utmp * getutxent (void) {
-	return (struct utmp *) getutent();
-}
-
-static void endutxent (void) {
-#ifdef IS_BIONIC
-	/* bionic isn't exporting endutend */
-	return;
-#else
-	return endutent();
-#endif
-}
-#endif
-
-#ifndef HAVE_UTMPXNAME
-static int utmpxname(const char *file) {
-	int result;
-	result = utmpname(file);
-
-#ifdef IS_BIONIC
-	/* Yeah bionic is that weird */
-	result = result - 1;
-#endif
-
-	return result;
-}
-#endif
-
-#undef __USE_GNU
-
-/* This file watches the /var/run/utmp file in the container
- * (that should probably be configurable)
- * We use inotify to put a watch on the /var/run directory for
- * create and modify events. These can trigger a read of the
- * utmp file looking for runlevel changes. If a runlevel change
- * to reboot or halt states is detected, we set up an itimer to
- * regularly check for the container shutdown, and reboot or halt
- * as appropriate when we get down to 1 task remaining.
- */
-
-lxc_log_define(lxc_utmp, lxc);
-
-struct lxc_utmp {
-	struct lxc_handler *handler;
-#define CONTAINER_STARTING  0
-#define CONTAINER_REBOOTING 1
-#define CONTAINER_HALTING   2
-#define CONTAINER_RUNNING   4
-	char container_state;
-	int timer_fd;
-	int prev_runlevel, curr_runlevel;
-};
-
-typedef void (*lxc_mainloop_timer_t) (void *data);
-
-static int utmp_get_runlevel(struct lxc_utmp *utmp_data);
-static int utmp_get_ntasks(struct lxc_handler *handler);
-static int utmp_shutdown_handler(int fd, uint32_t events, void *data,
-				 struct lxc_epoll_descr *descr);
-static int lxc_utmp_add_timer(struct lxc_epoll_descr *descr,
-			      lxc_mainloop_callback_t callback, void *data);
-static int lxc_utmp_del_timer(struct lxc_epoll_descr *descr,
-			      struct lxc_utmp *utmp_data);
-
-static int utmp_handler(int fd, uint32_t events, void *data,
-			struct lxc_epoll_descr *descr)
-{
-	struct inotify_event *ie;
-	int size, ret, length;
-
-	struct lxc_utmp *utmp_data = (struct lxc_utmp *)data;
-
-	/*
-	 * we're monitoring a directory. ie->name is not included in
-	 * sizeof(struct inotify_event) if we don't read it all at once,
-	 * read gives us EINVAL, so we read and cast to struct ie
-	 */
-	char buffer[MAXPATHLEN];
-
-	if (ioctl(fd, FIONREAD, &size) < 0) {
-		SYSERROR("cannot determine the size of this notification");
-		return -1;
-	}
-
-	if (read(fd, buffer, size) < size) {
-		SYSERROR("failed to read notification");
-		return -1;
-	}
-
-	ie = (struct inotify_event *)buffer;
-
-	if (ie->len <= 0) {
-
-		if (ie->mask & IN_UNMOUNT) {
-			DEBUG("watched directory removed");
-			goto out;
-		}
-
-		SYSERROR("inotify event with no name (mask %d)", ie->mask);
-		return -1;
-	}
-
-	ret = 0;
-
-	DEBUG("got inotify event %d for %s", ie->mask, ie->name);
-
-	length = (4 < ie->len) ? 4 : ie->len;
-
-	/* only care about utmp */
-
-	if (strncmp(ie->name, "utmp", length))
-		return 0;
-
-	if (ie->mask & (IN_MODIFY | IN_CREATE))
-		ret = utmp_get_runlevel(utmp_data);
-
-	if (ret < 0)
-		goto out;
-
-	/* container halting, from running or starting state */
-	if (utmp_data->curr_runlevel == '0'
-	    && ((utmp_data->container_state == CONTAINER_RUNNING)
-		|| (utmp_data->container_state == CONTAINER_STARTING))) {
-		utmp_data->container_state = CONTAINER_HALTING;
-		if (utmp_data->timer_fd == -1)
-			lxc_utmp_add_timer(descr, utmp_shutdown_handler, data);
-		DEBUG("Container halting");
-		goto out;
-	}
-
-	/* container rebooting, from running or starting state */
-	if (utmp_data->curr_runlevel == '6'
-	    && ((utmp_data->container_state == CONTAINER_RUNNING)
-		|| (utmp_data->container_state == CONTAINER_STARTING))) {
-		utmp_data->container_state = CONTAINER_REBOOTING;
-		if (utmp_data->timer_fd == -1)
-			lxc_utmp_add_timer(descr, utmp_shutdown_handler, data);
-		DEBUG("Container rebooting");
-		goto out;
-	}
-
-	/* normal operation, running, from starting state. */
-	if (utmp_data->curr_runlevel > '0' && utmp_data->curr_runlevel < '6') {
-		utmp_data->container_state = CONTAINER_RUNNING;
-		if (utmp_data->timer_fd > 0)
-			lxc_utmp_del_timer(descr, utmp_data);
-		DEBUG("Container running");
-		goto out;
-	}
-
-out:
-	return 0;
-}
-
-static int utmp_get_runlevel(struct lxc_utmp *utmp_data)
-{
-	#if HAVE_UTMPX_H
-	struct utmpx *utmpx;
-	#else
-	struct utmp *utmpx;
-	#endif
-	char path[MAXPATHLEN];
-	struct lxc_handler *handler = utmp_data->handler;
-
-	if (snprintf(path, MAXPATHLEN, "/proc/%d/root/run/utmp",
-		     handler->pid) > MAXPATHLEN) {
-		ERROR("path is too long");
-		return -1;
-	}
-
-	if (!access(path, F_OK) && !utmpxname(path))
-		goto utmp_ok;
-
-	if (snprintf(path, MAXPATHLEN, "/proc/%d/root/var/run/utmp",
-		     handler->pid) > MAXPATHLEN) {
-		ERROR("path is too long");
-		return -1;
-	}
-
-	if (utmpxname(path)) {
-		SYSERROR("failed to 'utmpxname'");
-		return -1;
-	}
-
-utmp_ok:
-
-	setutxent();
-
-	while ((utmpx = getutxent())) {
-
-		if (utmpx->ut_type == RUN_LVL) {
-			utmp_data->prev_runlevel = utmpx->ut_pid / 256;
-			utmp_data->curr_runlevel = utmpx->ut_pid % 256;
-			DEBUG("utmp handler - run level is %c/%c",
-			      utmp_data->prev_runlevel,
-			      utmp_data->curr_runlevel);
-		}
-	}
-
-	endutxent();
-
-	return 0;
-}
-
-static int utmp_get_ntasks(struct lxc_handler *handler)
-{
-	int ntasks;
-
-	ntasks = cgroup_nrtasks(handler);
-
-	if (ntasks < 0) {
-		ERROR("failed to get the number of tasks");
-		return -1;
-	}
-
-	DEBUG("there are %d tasks running", ntasks);
-
-	return ntasks;
-}
-
-int lxc_utmp_mainloop_add(struct lxc_epoll_descr *descr,
-			  struct lxc_handler *handler)
-{
-	char path[MAXPATHLEN];
-	char path2[MAXPATHLEN];
-	int fd, wd;
-	struct lxc_utmp *utmp_data;
-
-	/* We set up a watch for the /var/run directory. We're only interested
-	 * in utmp at the moment, but want to watch for delete and create
-	 * events as well.
-	 */
-	if (snprintf(path, MAXPATHLEN, "/proc/%d/root/run",
-		     handler->pid) > MAXPATHLEN) {
-		ERROR("path is too long");
-		return -1;
-	}
-	if (snprintf(path2, MAXPATHLEN, "/proc/%d/root/run/utmp",
-		     handler->pid) > MAXPATHLEN) {
-		ERROR("path is too long");
-		return -1;
-	}
-	if (!access(path2, F_OK))
-		goto run_ok;
-
-	if (snprintf(path, MAXPATHLEN, "/proc/%d/root/var/run",
-		     handler->pid) > MAXPATHLEN) {
-		ERROR("path is too long");
-		return -1;
-	}
-
-	if (access(path, F_OK)) {
-		WARN("'%s' not found", path);
-		return 0;
-	}
-
-run_ok:
-
-	utmp_data = (struct lxc_utmp *)malloc(sizeof(struct lxc_utmp));
-
-	if (NULL == utmp_data) {
-		SYSERROR("failed to malloc handler utmp_data");
-		return -1;
-	}
-
-	memset(utmp_data, 0, sizeof(struct lxc_utmp));
-
-	fd = inotify_init();
-	if (fd < 0) {
-		SYSERROR("failed to inotify_init");
-		goto out;
-	}
-
-	if (fcntl(fd, F_SETFD, FD_CLOEXEC)) {
-		SYSERROR("failed to set inotify fd to close-on-exec");
-		goto out_close;
-
-	}
-
-	wd = inotify_add_watch(fd, path, IN_MODIFY | IN_CREATE);
-	if (wd < 0) {
-		SYSERROR("failed to add watch for '%s'", path);
-		goto out_close;
-	}
-
-	utmp_data->handler = handler;
-	utmp_data->container_state = CONTAINER_STARTING;
-	utmp_data->timer_fd = -1;
-	utmp_data->prev_runlevel = 'N';
-	utmp_data->curr_runlevel = 'N';
-
-	if (lxc_mainloop_add_handler
-	    (descr, fd, utmp_handler, (void *)utmp_data)) {
-		SYSERROR("failed to add mainloop");
-		goto out_close;
-	}
-
-	DEBUG("Added '%s' to inotifywatch", path);
-
-	return 0;
-out_close:
-	close(fd);
-out:
-	free(utmp_data);
-	return -1;
-}
-
-static int utmp_shutdown_handler(int fd, uint32_t events, void *data,
-				 struct lxc_epoll_descr *descr)
-{
-	int ntasks;
-	ssize_t nread;
-	struct lxc_utmp *utmp_data = (struct lxc_utmp *)data;
-	struct lxc_handler *handler = utmp_data->handler;
-	struct lxc_conf *conf = handler->conf;
-	uint64_t expirations;
-
-	/* read and clear notifications */
-	nread = read(fd, &expirations, sizeof(expirations));
-	if (nread < 0)
-		SYSERROR("Failed to read timer notification");
-
-	ntasks = utmp_get_ntasks(handler);
-
-	if (ntasks == 1 && (utmp_data->container_state == CONTAINER_HALTING)) {
-		INFO("container has shutdown");
-		/* shutdown timer */
-		lxc_utmp_del_timer(descr, utmp_data);
-
-		kill(handler->pid, SIGKILL);
-	}
-
-	if (ntasks == 1 && (utmp_data->container_state == CONTAINER_REBOOTING)) {
-		INFO("container has rebooted");
-		conf->reboot = 1;
-		/* shutdown timer */
-		lxc_utmp_del_timer(descr, utmp_data);
-		/* this seems a bit rough. */
-		kill(handler->pid, SIGKILL);
-	}
-	return 0;
-
-}
-
-int lxc_utmp_add_timer(struct lxc_epoll_descr *descr,
-		       lxc_mainloop_callback_t callback, void *data)
-{
-	int fd, result;
-	struct itimerspec timeout;
-	struct lxc_utmp *utmp_data = (struct lxc_utmp *)data;
-
-	fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
-	if (fd < 0) {
-		SYSERROR("failed to create timer");
-		return -1;
-	}
-
-	DEBUG("Setting up utmp shutdown timer");
-
-	/* set a one second timeout. Repeated. */
-	timeout.it_value.tv_sec = 1;
-	timeout.it_value.tv_nsec = 0;
-
-	timeout.it_interval.tv_sec = 1;
-	timeout.it_interval.tv_nsec = 0;
-
-	result = timerfd_settime(fd, 0, &timeout, NULL);
-
-	if (result < 0) {
-		SYSERROR("timerfd_settime:");
-		return -1;
-	}
-
-	if (lxc_mainloop_add_handler(descr, fd, callback, utmp_data)) {
-		SYSERROR("failed to add utmp timer to mainloop");
-		close(fd);
-		return -1;
-	}
-
-	utmp_data->timer_fd = fd;
-
-	return 0;
-}
-
-int lxc_utmp_del_timer(struct lxc_epoll_descr *descr,
-		       struct lxc_utmp *utmp_data)
-{
-	int result;
-
-	DEBUG("Clearing utmp shutdown timer");
-
-	result = lxc_mainloop_del_handler(descr, utmp_data->timer_fd);
-	if (result < 0)
-		SYSERROR("failed to del utmp timer from mainloop");
-
-	/* shutdown timer_fd */
-	close(utmp_data->timer_fd);
-	utmp_data->timer_fd = -1;
-
-	if (result < 0)
-		return -1;
-	else
-		return 0;
-}
diff --git a/src/lxc/lxcutmp.h b/src/lxc/lxcutmp.h
deleted file mode 100644
index 062ecdf38..000000000
--- a/src/lxc/lxcutmp.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * lxc: linux Container library
- *
- * (C) Copyright IBM Corp. 2007, 2008
- *
- * Authors:
- * Daniel Lezcano <daniel.lezcano at free.fr>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef __LXC_LXCUTMP_H
-#define __LXC_LXCUTMP_H
-
-#include "config.h"
-
-struct lxc_handler;
-struct lxc_epoll_descr;
-
-int lxc_utmp_mainloop_add(struct lxc_epoll_descr *descr,
-			  struct lxc_handler *handler);
-#endif
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 116b1ebd8..de7a37c87 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -67,7 +67,6 @@
 #include "lxccontainer.h"
 #include "lxclock.h"
 #include "lxcseccomp.h"
-#include "lxcutmp.h"
 #include "mainloop.h"
 #include "monitor.h"
 #include "namespace.h"
@@ -477,16 +476,6 @@ int lxc_poll(const char *name, struct lxc_handler *handler)
 		goto out_mainloop_open;
 	}
 
-	if (handler->conf->need_utmp_watch) {
-		#if HAVE_LIBCAP
-		if (lxc_utmp_mainloop_add(&descr, handler)) {
-			ERROR("Failed to add utmp handler to LXC mainloop.");
-			goto out_mainloop_open;
-		}
-		#else
-			DEBUG("Not starting utmp handler as CAP_SYS_BOOT cannot be dropped without capabilities support.");
-		#endif
-	}
 	TRACE("lxc mainloop is ready");
 
 	return lxc_mainloop(&descr, -1);
@@ -785,75 +774,6 @@ void lxc_abort(const char *name, struct lxc_handler *handler)
 	}
 }
 
-#include <sys/reboot.h>
-#include <linux/reboot.h>
-
-/* reboot(LINUX_REBOOT_CMD_CAD_ON) will return -EINVAL in a child pid namespace
- * if container reboot support exists.  Otherwise, it will either succeed or
- * return -EPERM.
- */
-static int container_reboot_supported(void *arg)
-{
-	int *cmd = arg;
-	int ret;
-
-	ret = reboot(*cmd);
-	if (ret == -1 && errno == EINVAL)
-		return 1;
-	return 0;
-}
-
-static int must_drop_cap_sys_boot(struct lxc_conf *conf)
-{
-	FILE *f;
-	int ret, cmd, v, flags;
-	long stack_size = 4096;
-	void *stack = alloca(stack_size);
-	int status;
-	pid_t pid;
-
-	f = fopen("/proc/sys/kernel/ctrl-alt-del", "r");
-	if (!f) {
-		DEBUG("failed to open /proc/sys/kernel/ctrl-alt-del");
-		return 1;
-	}
-
-	ret = fscanf(f, "%d", &v);
-	fclose(f);
-	if (ret != 1) {
-		DEBUG("Failed to read /proc/sys/kernel/ctrl-alt-del.");
-		return 1;
-	}
-	cmd = v ? LINUX_REBOOT_CMD_CAD_ON : LINUX_REBOOT_CMD_CAD_OFF;
-
-	flags = CLONE_NEWPID | SIGCHLD;
-	if (!lxc_list_empty(&conf->id_map))
-		flags |= CLONE_NEWUSER;
-
-#ifdef __ia64__
-	pid = __clone2(container_reboot_supported, stack, stack_size, flags,  &cmd);
-#else
-	stack += stack_size;
-	pid = clone(container_reboot_supported, stack, flags, &cmd);
-#endif
-	if (pid < 0) {
-		if (flags & CLONE_NEWUSER)
-			ERROR("Failed to clone (%#x): %s (includes CLONE_NEWUSER).", flags, strerror(errno));
-		else
-			ERROR("Failed to clone (%#x): %s.", flags, strerror(errno));
-		return -1;
-	}
-	if (wait(&status) < 0) {
-		SYSERROR("Unexpected wait error: %s.", strerror(errno));
-		return -1;
-	}
-
-	if (WEXITSTATUS(status) != 1)
-		return 1;
-
-	return 0;
-}
-
 /* netpipe is used in the unprivileged case to transfer the ifindexes from
  * parent to child
  */
@@ -970,16 +890,6 @@ static int do_start(void *data)
 		goto out_warn_father;
 	}
 
-	#if HAVE_LIBCAP
-	if (handler->conf->need_utmp_watch) {
-		if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)) {
-			SYSERROR("Failed to remove the CAP_SYS_BOOT capability.");
-			goto out_warn_father;
-		}
-		DEBUG("Dropped the CAP_SYS_BOOT capability.");
-	}
-	#endif
-
 	ret = snprintf(path, sizeof(path), "%s/dev/null", handler->conf->rootfs.mount);
 	if (ret < 0 || ret >= sizeof(path))
 		goto out_warn_father;
@@ -1581,17 +1491,6 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
 	handler->backgrounded = backgrounded;
 	handler->netnsfd = -1;
 
-	if (must_drop_cap_sys_boot(handler->conf)) {
-		#if HAVE_LIBCAP
-		DEBUG("Dropping CAP_SYS_BOOT capability.");
-		#else
-		DEBUG("Not dropping CAP_SYS_BOOT capability as capabilities aren't supported.");
-		#endif
-	} else {
-		DEBUG("Not dropping CAP_SYS_BOOT or watching utmp.");
-		handler->conf->need_utmp_watch = 0;
-	}
-
 	if (!attach_block_device(handler->conf)) {
 		ERROR("Failed to attach block device.");
 		goto out_fini_nonet;
@@ -1727,7 +1626,6 @@ int lxc_start(const char *name, char *const argv[], struct lxc_handler *handler,
 		.argv = argv,
 	};
 
-	handler->conf->need_utmp_watch = 1;
 	return __lxc_start(name, handler, &start_ops, &start_arg, lxcpath, backgrounded);
 }
 

From 79ab61eae306710ff53e20fe9074e2b4d4bb52c4 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 1 Sep 2017 22:33:21 +0200
Subject: [PATCH 21/32] network: remove netpipe

We use data_sock for all things we need to send around between parent and child
now. It doesn't make sense to have so many different pipes and sockets if one
will do just fine.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/network.c | 63 ++++++++++++++++++++++++++++++++++++++++++++
 src/lxc/network.h |  2 ++
 src/lxc/start.c   | 78 ++++++-------------------------------------------------
 3 files changed, 73 insertions(+), 70 deletions(-)

diff --git a/src/lxc/network.c b/src/lxc/network.c
index 680588508..78fe83c38 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -45,6 +45,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
+#include "af_unix.h"
 #include "conf.h"
 #include "config.h"
 #include "log.h"
@@ -2963,3 +2964,65 @@ int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
 
 	return 0;
 }
+
+int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
+{
+	struct lxc_list *iterator;
+	struct lxc_list *network = &handler->conf->network;
+	int data_sock = handler->data_sock[0];
+
+	if (handler->root)
+		return 0;
+
+	lxc_list_for_each(iterator, network) {
+		int ret;
+		struct lxc_netdev *netdev = iterator->elem;
+
+		if (netdev->type != LXC_NET_VETH)
+			continue;
+
+		ret = lxc_abstract_unix_send_credential(data_sock, netdev->name,
+							IFNAMSIZ);
+		if (ret < 0) {
+			close(handler->data_sock[0]);
+			close(handler->data_sock[1]);
+			return -1;
+		} else {
+			TRACE("Sent network device name \"%s\" to child",
+			      netdev->name);
+		}
+	}
+
+	return 0;
+}
+
+int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
+{
+	struct lxc_list *iterator;
+	struct lxc_list *network = &handler->conf->network;
+	int data_sock = handler->data_sock[1];
+
+	if (handler->root)
+		return 0;
+
+	lxc_list_for_each(iterator, network) {
+		int ret;
+		struct lxc_netdev *netdev = iterator->elem;
+
+		if (netdev->type != LXC_NET_VETH)
+			continue;
+
+		ret = lxc_abstract_unix_rcv_credential(data_sock, netdev->name,
+						       IFNAMSIZ);
+		if (ret < 0) {
+			close(handler->data_sock[0]);
+			close(handler->data_sock[1]);
+			return -1;
+		} else {
+			TRACE("Received network device name \"%s\" from parent",
+			      netdev->name);
+		}
+	}
+
+	return 0;
+}
diff --git a/src/lxc/network.h b/src/lxc/network.h
index 9badf14b8..5dd48fb2a 100644
--- a/src/lxc/network.h
+++ b/src/lxc/network.h
@@ -305,5 +305,7 @@ extern int lxc_requests_empty_network(struct lxc_handler *handler);
 extern int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler);
 extern int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
 						 struct lxc_list *network);
+extern int lxc_network_send_veth_names_to_child(struct lxc_handler *handler);
+extern int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler);
 
 #endif /* __LXC_NETWORK_H */
diff --git a/src/lxc/start.c b/src/lxc/start.c
index de7a37c87..fdb1d16ab 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -774,48 +774,6 @@ void lxc_abort(const char *name, struct lxc_handler *handler)
 	}
 }
 
-/* netpipe is used in the unprivileged case to transfer the ifindexes from
- * parent to child
- */
-static int netpipe = -1;
-
-static inline int count_veths(struct lxc_list *network)
-{
-	struct lxc_list *iterator;
-	struct lxc_netdev *netdev;
-	int count = 0;
-
-	lxc_list_for_each(iterator, network) {
-		netdev = iterator->elem;
-		if (netdev->type != LXC_NET_VETH)
-			continue;
-		count++;
-	}
-	return count;
-}
-
-static int read_unpriv_netifindex(struct lxc_list *network)
-{
-	struct lxc_list *iterator;
-	struct lxc_netdev *netdev;
-
-	if (netpipe == -1)
-		return 0;
-
-	lxc_list_for_each(iterator, network) {
-		netdev = iterator->elem;
-		if (netdev->type != LXC_NET_VETH)
-			continue;
-
-		if (read(netpipe, netdev->name, IFNAMSIZ) != IFNAMSIZ) {
-			close(netpipe);
-			return -1;
-		}
-	}
-	close(netpipe);
-	return 0;
-}
-
 static int do_start(void *data)
 {
 	struct lxc_list *iterator;
@@ -868,8 +826,10 @@ static int do_start(void *data)
 	if (lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE))
 		return -1;
 
-	if (read_unpriv_netifindex(&handler->conf->network) < 0)
+	if (lxc_network_recv_veth_names_from_parent(handler) < 0) {
+		ERROR("Failed to receive veth names from parent");
 		goto out_warn_father;
+	}
 
 	/* If we are in a new user namespace, become root there to have
 	 * privilege over our namespace.
@@ -1188,15 +1148,14 @@ void resolve_clone_flags(struct lxc_handler *handler)
  */
 static int lxc_spawn(struct lxc_handler *handler)
 {
-	int i, flags, nveths, ret;
+	int i, flags, ret;
 	const char *name = handler->name;
 	bool wants_to_map_ids;
-	int netpipepair[2], saved_ns_fd[LXC_NS_MAX];
+	int saved_ns_fd[LXC_NS_MAX];
 	struct lxc_list *id_map;
 	int failed_before_rename = 0, preserve_mask = 0;
 	bool cgroups_connected = false;
 
-	netpipe = -1;
 	id_map = &handler->conf->id_map;
 	wants_to_map_ids = !lxc_list_empty(id_map);
 
@@ -1269,15 +1228,6 @@ static int lxc_spawn(struct lxc_handler *handler)
 	if (attach_ns(handler->conf->inherit_ns_fd) < 0)
 		goto out_delete_net;
 
-	if (!handler->root && (nveths = count_veths(&handler->conf->network))) {
-		if (pipe(netpipepair) < 0) {
-			SYSERROR("Failed to create pipe.");
-			goto out_delete_net;
-		}
-		/* Store netpipe in the global var for do_start's use. */
-		netpipe = netpipepair[0];
-	}
-
 	/* Create a process in a new set of namespaces. */
 	flags = handler->clone_flags;
 	if (handler->clone_flags & CLONE_NEWUSER) {
@@ -1369,21 +1319,9 @@ static int lxc_spawn(struct lxc_handler *handler)
 		}
 	}
 
-	if (netpipe != -1) {
-		struct lxc_list *iterator;
-		struct lxc_netdev *netdev;
-
-		close(netpipe);
-		lxc_list_for_each(iterator, &handler->conf->network) {
-			netdev = iterator->elem;
-			if (netdev->type != LXC_NET_VETH)
-				continue;
-			if (write(netpipepair[1], netdev->name, IFNAMSIZ) != IFNAMSIZ) {
-				ERROR("Error writing veth name to container.");
-				goto out_delete_net;
-			}
-		}
-		close(netpipepair[1]);
+	if (lxc_network_send_veth_names_to_child(handler) < 0) {
+		ERROR("Failed to send veth names to child");
+		goto out_delete_net;
 	}
 
 	/* Tell the child to continue its initialization. We'll get

From 85343613829e3ed9500872154ea62a1a9aea11ce Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Sat, 2 Sep 2017 02:26:28 +0200
Subject: [PATCH 22/32] lxc-user-nic: fix adding database entries

The code before inserted \0-bytes after every new line which made the db
basically unusable.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/lxc_user_nic.c | 73 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 50 insertions(+), 23 deletions(-)

diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c
index 9164e44dc..39c7daabf 100644
--- a/src/lxc/lxc_user_nic.c
+++ b/src/lxc/lxc_user_nic.c
@@ -678,7 +678,7 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid,
 			      char *intype, char *br, int allowed, char **cnic)
 {
 	int ret;
-	off_t len, slen;
+	size_t slen;
 	char *newline, *nicname, *owner;
 	struct stat sb;
 	struct alloted_s *n;
@@ -693,31 +693,32 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid,
 
 	owner = names->name;
 
-	if (fstat(fd, &sb) < 0) {
+	ret = fstat(fd, &sb);
+	if (ret < 0) {
 		usernic_error("Failed to fstat: %s\n", strerror(errno));
 		return NULL;
 	}
 
-	len = sb.st_size;
-	if (len > 0) {
-		buf =
-		    mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (sb.st_size > 0) {
+		buf = lxc_strmmap(NULL, sb.st_size, PROT_READ | PROT_WRITE,
+				  MAP_SHARED, fd, 0);
 		if (buf == MAP_FAILED) {
-			usernic_error("Failed to establish shared memory mapping: %s\n",
-				      strerror(errno));
+			usernic_error("Failed to establish shared memory "
+				      "mapping: %s\n", strerror(errno));
 			return NULL;
 		}
 
 		owner = NULL;
 		for (n = names; n != NULL; n = n->next) {
-			count = count_entries(buf, len, n->name, intype, br);
-
+			count = count_entries(buf, sb.st_size, n->name, intype, br);
 			if (count >= n->allowed)
 				continue;
 
 			owner = n->name;
 			break;
 		}
+
+		lxc_strmunmap(buf, sb.st_size);
 	}
 
 	if (owner == NULL)
@@ -729,41 +730,67 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid,
 		return NULL;
 	}
 
-	/* owner  ' ' intype ' ' br ' ' *nicname + '\n' + '\0' */
-	slen = strlen(owner) + strlen(intype) + strlen(br) + strlen(nicname) + 5;
-	newline = alloca(slen);
+	/* strlen(owner)
+	 * +
+	 * " "
+	 * +
+	 * strlen(intype)
+	 * +
+	 * " "
+	 * +
+	 * strlen(br)
+	 * +
+	 * " "
+	 * +
+	 * strlen(nicname)
+	 * +
+	 * \n
+	 * +
+	 * \0
+	 */
+	slen = strlen(owner) + strlen(intype) + strlen(br) + strlen(nicname) + 4;
+	newline = malloc(slen + 1);
 	if (!newline) {
 		free(nicname);
+		free(newline);
 		usernic_error("Failed allocate memory: %s\n", strerror(errno));
 		return NULL;
 	}
 
-	ret = snprintf(newline, slen, "%s %s %s %s\n", owner, intype, br, nicname);
-	if (ret < 0 || ret >= slen) {
+	ret = snprintf(newline, slen + 1, "%s %s %s %s\n", owner, intype, br, nicname);
+	if (ret < 0 || (size_t)ret >= (slen + 1)) {
 		if (lxc_netdev_delete_by_name(nicname) != 0)
 			usernic_error("Error unlinking %s\n", nicname);
 		free(nicname);
+		free(newline);
 		return NULL;
 	}
-	if (len)
-		munmap(buf, len);
 
-	if (ftruncate(fd, len + slen))
-		usernic_error("Failed to set new file size: %s\n",
-			      strerror(errno));
+	/* Note that the file needs to be truncated to the size **without** the
+	 * \0 byte! Files are not \0-terminated!
+	 */
+	ret = ftruncate(fd, sb.st_size + slen);
+	if (ret < 0)
+		usernic_error("Failed to truncate file: %s\n", strerror(errno));
 
-	buf = mmap(NULL, len + slen, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	buf = lxc_strmmap(NULL, sb.st_size + slen, PROT_READ | PROT_WRITE,
+			  MAP_SHARED, fd, 0);
 	if (buf == MAP_FAILED) {
 		usernic_error("Failed to establish shared memory mapping: %s\n",
 			      strerror(errno));
 		if (lxc_netdev_delete_by_name(nicname) != 0)
 			usernic_error("Error unlinking %s\n", nicname);
 		free(nicname);
+		free(newline);
 		return NULL;
 	}
 
-	strcpy(buf + len, newline);
-	munmap(buf, len + slen);
+	/* Note that the memory needs to be moved in the buffer **without** the
+	 * \0 byte! Files are not \0-terminated!
+	 */
+	memmove(buf + sb.st_size, newline, slen);
+	free(newline);
+	lxc_strmunmap(buf, sb.st_size + slen);
 
 	return nicname;
 }

From 7391103f26d1dee4be6784b7ca03ae16331e7fca Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Sat, 2 Sep 2017 19:44:10 +0200
Subject: [PATCH 23/32] lxc-user-nic: keep lines from other {users,links}

Assume the db contained the following entries:

    chb veth lxcbr0 veth1
    chb veth lxcbr0 veth2
    chb veth lxdbr0 veth3
    chb veth lxdbr0 veth2
    didi veth lxcbr0 veth4

And you request

    cull_entries("chb", "veth", "lxdbr0", "veth3");

lxc-user-nic would wipe any entries that did not match irrespective of whether
they existed or not. Let's fix that.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/lxc_user_nic.c | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c
index 39c7daabf..c4388a741 100644
--- a/src/lxc/lxc_user_nic.c
+++ b/src/lxc/lxc_user_nic.c
@@ -370,27 +370,24 @@ static char *find_line(char *p, char *e, char *u, char *t, char *l)
 			p++;
 
 		p2 = get_eow(p, e);
-		if (!p2 || ((size_t)(p2 - p)) != strlen(u) ||
-		    strncmp(p, u, strlen(u)))
-			goto next;
+		if (!p2 || ((size_t)(p2 - p)) != strlen(u) || strncmp(p, u, strlen(u)))
+			return ret;
 
 		p = p2 + 1;
 		while ((p < e) && isblank(*p))
 			p++;
 
 		p2 = get_eow(p, e);
-		if (!p2 || ((size_t)(p2 - p)) != strlen(t) ||
-		    strncmp(p, t, strlen(t)))
-			goto next;
+		if (!p2 || ((size_t)(p2 - p)) != strlen(t) || strncmp(p, t, strlen(t)))
+			return ret;
 
 		p = p2 + 1;
 		while ((p < e) && isblank(*p))
 			p++;
 
 		p2 = get_eow(p, e);
-		if (!p2 || ((size_t)(p2 - p)) != strlen(l) ||
-		    strncmp(p, l, strlen(l)))
-			goto next;
+		if (!p2 || ((size_t)(p2 - p)) != strlen(l) || strncmp(p, l, strlen(l)))
+			return ret;
 
 		return ret;
 	next:
@@ -410,7 +407,7 @@ static bool nic_exists(char *nic)
 		return true;
 
 	ret = snprintf(path, MAXPATHLEN, "/sys/class/net/%s", nic);
-	if (ret < 0 || ret >= MAXPATHLEN)
+	if (ret < 0 || (size_t)ret >= MAXPATHLEN)
 		return false;
 
 	ret = stat(path, &sb);
@@ -562,8 +559,10 @@ static bool get_nic_from_line(char *p, char **nic)
 
 	ret = sscanf(p, "%99[^ \t\n] %99[^ \t\n] %99[^ \t\n] %99[^ \t\n]", user,
 		     type, br, *nic);
-	if (ret != 4)
+	if (ret != 4) {
+		*nic[0] = '\0';
 		return false;
+	}
 
 	return true;
 }
@@ -609,6 +608,7 @@ static bool cull_entries(int fd, char *me, char *t, char *br, char *nicname,
 	e = buf + len;
 	while ((p = find_line(p, e, me, t, br))) {
 		struct entry_line *newe;
+		bool exists = false;
 
 		newe = realloc(entry_lines, sizeof(*entry_lines) * (n + 1));
 		if (!newe) {
@@ -624,10 +624,13 @@ static bool cull_entries(int fd, char *me, char *t, char *br, char *nicname,
 		if (!get_nic_from_line(p, &nic))
 			continue;
 
-		if (nic && !nic_exists(nic))
+		if (nic[0] != '\0')
+			exists = nic_exists(nic);
+
+		if (!exists)
 			entry_lines[n - 1].keep = false;
 
-		if (nicname)
+		if (exists && nicname)
 			if (!strcmp(nic, nicname))
 				*found_nicname = true;
 
@@ -1206,8 +1209,8 @@ int main(int argc, char *argv[])
 		free_alloted(&alloted);
 
 		if (!found_nicname) {
-			usernic_error("%s", "Caller is not allowed to delete "
-				      "network device\n");
+			usernic_error("Caller is not allowed to delete network "
+				      "device \"%s\"\n", args.veth_name);
 			exit(EXIT_FAILURE);
 		}
 

From eb30b69bf77c4a5146c5b6b7e4e62a8f9a48c692 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Sun, 3 Sep 2017 16:35:48 +0200
Subject: [PATCH 24/32] utils: add lxc_nic_exists()

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/lxc_user_nic.c | 22 +---------------------
 src/lxc/utils.c        | 21 +++++++++++++++++++++
 src/lxc/utils.h        |  5 +++--
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c
index c4388a741..a2ad03080 100644
--- a/src/lxc/lxc_user_nic.c
+++ b/src/lxc/lxc_user_nic.c
@@ -397,26 +397,6 @@ static char *find_line(char *p, char *e, char *u, char *t, char *l)
 	return NULL;
 }
 
-static bool nic_exists(char *nic)
-{
-	char path[MAXPATHLEN];
-	int ret;
-	struct stat sb;
-
-	if (!strcmp(nic, "none"))
-		return true;
-
-	ret = snprintf(path, MAXPATHLEN, "/sys/class/net/%s", nic);
-	if (ret < 0 || (size_t)ret >= MAXPATHLEN)
-		return false;
-
-	ret = stat(path, &sb);
-	if (ret < 0)
-		return false;
-
-	return true;
-}
-
 static int instantiate_veth(char *n1, char **n2)
 {
 	int err;
@@ -625,7 +605,7 @@ static bool cull_entries(int fd, char *me, char *t, char *br, char *nicname,
 			continue;
 
 		if (nic[0] != '\0')
-			exists = nic_exists(nic);
+			exists = lxc_nic_exists(nic);
 
 		if (!exists)
 			entry_lines[n - 1].keep = false;
diff --git a/src/lxc/utils.c b/src/lxc/utils.c
index d36107020..959481ee2 100644
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -2406,3 +2406,24 @@ bool has_fs_type(const char *path, fs_type_magic magic_val)
 
 	return has_type;
 }
+
+bool lxc_nic_exists(char *nic)
+{
+#define __LXC_SYS_CLASS_NET_LEN 15 + IFNAMSIZ + 1
+	char path[__LXC_SYS_CLASS_NET_LEN];
+	int ret;
+	struct stat sb;
+
+	if (!strcmp(nic, "none"))
+		return true;
+
+	ret = snprintf(path, __LXC_SYS_CLASS_NET_LEN, "/sys/class/net/%s", nic);
+	if (ret < 0 || (size_t)ret >= __LXC_SYS_CLASS_NET_LEN)
+		return false;
+
+	ret = stat(path, &sb);
+	if (ret < 0)
+		return false;
+
+	return true;
+}
diff --git a/src/lxc/utils.h b/src/lxc/utils.h
index 4408c6d69..3f4d90479 100644
--- a/src/lxc/utils.h
+++ b/src/lxc/utils.h
@@ -390,7 +390,8 @@ void *must_realloc(void *orig, size_t sz);
 
 /* __typeof__ should be safe to use with all compilers. */
 typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic;
-bool has_fs_type(const char *path, fs_type_magic magic_val);
-bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val);
+extern bool has_fs_type(const char *path, fs_type_magic magic_val);
+extern bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val);
+extern bool lxc_nic_exists(char *nic);
 
 #endif /* __LXC_UTILS_H */

From 237b59af71ace9e9299240f9e9d4e14abc773850 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Sun, 3 Sep 2017 16:40:11 +0200
Subject: [PATCH 25/32] lxc-user-nic: bugfixes

Since find_line() was changed before count_entries() started counting lines
wrong. It would report maximum reached before you actually reached your alloted
maximum.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/lxc_user_nic.c | 199 ++++++++++++++++++++++++++++---------------------
 1 file changed, 114 insertions(+), 85 deletions(-)

diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c
index a2ad03080..6920c5462 100644
--- a/src/lxc/lxc_user_nic.c
+++ b/src/lxc/lxc_user_nic.c
@@ -357,41 +357,94 @@ static char *get_eow(char *s, char *e)
 	return s;
 }
 
-static char *find_line(char *p, char *e, char *u, char *t, char *l)
+static char *find_line(char *buf_start, char *buf_end, char *name,
+		       char *net_type, char *net_link, char *net_dev,
+		       bool *owner, bool *found, bool *keep)
 {
-	char *p1, *p2, *ret;
+	char *end_of_line, *end_of_word, *line;
 
-	while ((p < e) && (p1 = get_eol(p, e)) < e) {
-		ret = p;
-		if (*p == '#')
+	while (buf_start < buf_end) {
+		size_t len;
+		char netdev_name[IFNAMSIZ];
+
+		*found = false;
+		*keep = true;
+		*owner = false;
+
+		end_of_line = get_eol(buf_start, buf_end);
+		if (end_of_line >= buf_end)
+			return NULL;
+
+		line = buf_start;
+		if (*buf_start == '#')
 			goto next;
 
-		while ((p < e) && isblank(*p))
-			p++;
+		while ((buf_start < buf_end) && isblank(*buf_start))
+			buf_start++;
+
+		/* Check whether the line contains the caller's name. */
+		end_of_word = get_eow(buf_start, buf_end);
+		/* corrupt db */
+		if (!end_of_word)
+			return NULL;
+
+		if (strncmp(buf_start, name, strlen(name)))
+			*found = false;
+
+		*owner = true;
+
+		buf_start = end_of_word + 1;
+		while ((buf_start < buf_end) && isblank(*buf_start))
+			buf_start++;
+
+		/* Check whether line is of the right network type. */
+		end_of_word = get_eow(buf_start, buf_end);
+		/* corrupt db */
+		if (!end_of_word)
+			return NULL;
+
+		if (strncmp(buf_start, net_type, strlen(net_type)))
+			*found = false;
+
+		buf_start = end_of_word + 1;
+		while ((buf_start < buf_end) && isblank(*buf_start))
+			buf_start++;
+
+		/* Check whether line is contains the right link. */
+		end_of_word = get_eow(buf_start, buf_end);
+		/* corrupt db */
+		if (!end_of_word)
+			return NULL;
 
-		p2 = get_eow(p, e);
-		if (!p2 || ((size_t)(p2 - p)) != strlen(u) || strncmp(p, u, strlen(u)))
-			return ret;
+		if (strncmp(buf_start, net_link, strlen(net_link)))
+			*found = false;
 
-		p = p2 + 1;
-		while ((p < e) && isblank(*p))
-			p++;
+		buf_start = end_of_word + 1;
+		while ((buf_start < buf_end) && isblank(*buf_start))
+			buf_start++;
 
-		p2 = get_eow(p, e);
-		if (!p2 || ((size_t)(p2 - p)) != strlen(t) || strncmp(p, t, strlen(t)))
-			return ret;
+		/* Check whether line contains the right network device. */
+		end_of_word = get_eow(buf_start, buf_end);
+		/* corrupt db */
+		if (!end_of_word)
+			return NULL;
+
+		len = end_of_word - buf_start;
+		/* corrupt db */
+		if (len >= IFNAMSIZ)
+			return NULL;
 
-		p = p2 + 1;
-		while ((p < e) && isblank(*p))
-			p++;
+		memcpy(netdev_name, buf_start, len);
+		netdev_name[len] = '\0';
+		*keep = lxc_nic_exists(netdev_name);
 
-		p2 = get_eow(p, e);
-		if (!p2 || ((size_t)(p2 - p)) != strlen(l) || strncmp(p, l, strlen(l)))
-			return ret;
+		if (net_dev && !strcmp(netdev_name, net_dev))
+			*found = true;
+
+		return line;
 
-		return ret;
 	next:
-		p = p1 + 1;
+		buf_start = end_of_line + 1;
 	}
 
 	return NULL;
@@ -532,50 +585,30 @@ static char *get_new_nicname(char *br, int pid, char **cnic)
 	return nicname;
 }
 
-static bool get_nic_from_line(char *p, char **nic)
-{
-	int ret;
-	char user[100], type[100], br[100];
-
-	ret = sscanf(p, "%99[^ \t\n] %99[^ \t\n] %99[^ \t\n] %99[^ \t\n]", user,
-		     type, br, *nic);
-	if (ret != 4) {
-		*nic[0] = '\0';
-		return false;
-	}
-
-	return true;
-}
-
 struct entry_line {
 	char *start;
 	int len;
 	bool keep;
 };
 
-static bool cull_entries(int fd, char *me, char *t, char *br, char *nicname,
-			 bool *found_nicname)
+static bool cull_entries(int fd, char *name, char *net_type, char *net_link,
+			 char *net_dev, bool *found_nicname)
 {
 	int i, ret;
-	off_t len;
-	char *buf, *e, *nic, *p;
+	char *buf, *buf_end, *buf_start;
 	struct stat sb;
 	int n = 0;
+	bool found, keep;
 	struct entry_line *entry_lines = NULL;
 
-	nic = alloca(100);
-	if (!nic)
-		return false;
-
 	ret = fstat(fd, &sb);
 	if (ret < 0) {
 		usernic_error("Failed to fstat: %s\n", strerror(errno));
 		return false;
 	}
 
-	len = sb.st_size;
-	if (len == 0)
-		return true;
+	if (!sb.st_size)
+		return false;
 
 	buf = lxc_strmmap(NULL, sb.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
 	if (buf == MAP_FAILED) {
@@ -584,55 +617,48 @@ static bool cull_entries(int fd, char *me, char *t, char *br, char *nicname,
 		return false;
 	}
 
-	p = buf;
-	e = buf + len;
-	while ((p = find_line(p, e, me, t, br))) {
+	buf_start = buf;
+	buf_end = buf + sb.st_size;
+	while ((buf_start = find_line(buf_start, buf_end, name, net_type,
+				      net_link, net_dev, &(bool){true}, &found,
+				      &keep))) {
 		struct entry_line *newe;
-		bool exists = false;
 
 		newe = realloc(entry_lines, sizeof(*entry_lines) * (n + 1));
 		if (!newe) {
 			free(entry_lines);
+			lxc_strmunmap(buf, sb.st_size);
 			return false;
 		}
 
+		if (found)
+			*found_nicname = true;
+
 		entry_lines = newe;
-		entry_lines[n].start = p;
-		entry_lines[n].len = get_eol(p, e) - entry_lines[n].start;
-		entry_lines[n].keep = true;
+		entry_lines[n].start = buf_start;
+		entry_lines[n].len = get_eol(buf_start, buf_end) - entry_lines[n].start;
+		entry_lines[n].keep = keep;
 		n++;
-		if (!get_nic_from_line(p, &nic))
-			continue;
-
-		if (nic[0] != '\0')
-			exists = lxc_nic_exists(nic);
 
-		if (!exists)
-			entry_lines[n - 1].keep = false;
-
-		if (exists && nicname)
-			if (!strcmp(nic, nicname))
-				*found_nicname = true;
-
-		p += entry_lines[n - 1].len + 1;
-		if (p >= e)
+		buf_start += entry_lines[n - 1].len + 1;
+		if (buf_start >= buf_end)
 			break;
 	}
 
-	p = buf;
+	buf_start = buf;
 	for (i = 0; i < n; i++) {
 		if (!entry_lines[i].keep)
 			continue;
 
-		memcpy(p, entry_lines[i].start, entry_lines[i].len);
-		p += entry_lines[i].len;
-		*p = '\n';
-		p++;
+		memcpy(buf_start, entry_lines[i].start, entry_lines[i].len);
+		buf_start += entry_lines[i].len;
+		*buf_start = '\n';
+		buf_start++;
 	}
 	free(entry_lines);
 
 	lxc_strmunmap(buf, sb.st_size);
-	ret = ftruncate(fd, p - buf);
+	ret = ftruncate(fd, buf_start - buf);
 	if (ret < 0)
 		usernic_error("Failed to set new file size: %s\n",
 			      strerror(errno));
@@ -640,16 +666,19 @@ static bool cull_entries(int fd, char *me, char *t, char *br, char *nicname,
 	return true;
 }
 
-static int count_entries(char *buf, off_t len, char *me, char *t, char *br)
+static int count_entries(char *buf, off_t len, char *name, char *net_type, char *net_link)
 {
-	char *e;
 	int count = 0;
-
-	e = &buf[len];
-	while ((buf = find_line(buf, e, me, t, br))) {
-		count++;
-		buf = get_eol(buf, e) + 1;
-		if (buf >= e)
+	bool owner = false;;
+	char *buf_end = &buf[len];
+
+	buf_end = &buf[len];
+	while ((buf = find_line(buf, buf_end, name, net_type, net_link, NULL,
+				&owner, &(bool){true}, &(bool){true}))) {
+		if (owner)
+			count++;
+		buf = get_eol(buf, buf_end) + 1;
+		if (buf >= buf_end)
 			break;
 	}
 

From b5fe01dff5b5853d15eaa2dceeec571d86cc5702 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Sun, 3 Sep 2017 16:44:41 +0200
Subject: [PATCH 26/32] handler: root -> am_root

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/conf.c    |  2 +-
 src/lxc/network.c | 12 ++++++------
 src/lxc/start.c   |  8 ++++----
 src/lxc/start.h   |  2 +-
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index b1d35e98a..136d2b807 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -3050,7 +3050,7 @@ static int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handl
 	struct lxc_list *iterator, *network;
 	int data_sock = handler->data_sock[0];
 
-	if (!handler->root)
+	if (!handler->am_root)
 		return 0;
 
 	network = &handler->conf->network;
diff --git a/src/lxc/network.c b/src/lxc/network.c
index 78fe83c38..9e9c4191b 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -2283,7 +2283,7 @@ bool lxc_delete_network_unpriv(struct lxc_handler *handler)
 	char netns_path[6 + LXC_NUMSTRLEN64 + 4 + LXC_NUMSTRLEN64 + 1];
 	bool deleted_all = true;
 
-	if (handler->root)
+	if (handler->am_root)
 		return true;
 
 	*netns_path = '\0';
@@ -2361,7 +2361,7 @@ int lxc_create_network_priv(struct lxc_handler *handler)
 	struct lxc_list *iterator;
 	struct lxc_list *network = &handler->conf->network;
 
-	if (!handler->root)
+	if (!handler->am_root)
 		return 0;
 
 	lxc_list_for_each(iterator, network) {
@@ -2463,7 +2463,7 @@ bool lxc_delete_network_priv(struct lxc_handler *handler)
 	struct lxc_list *network = &handler->conf->network;
 	bool deleted_all = true;
 
-	if (!handler->root)
+	if (!handler->am_root)
 		return true;
 
 	lxc_list_for_each(iterator, network) {
@@ -2594,7 +2594,7 @@ int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
 	 * the parent network namespace. We won't have this capability if we are
 	 * unprivileged.
 	 */
-	if (!handler->root)
+	if (!handler->am_root)
 		return 0;
 
 	TRACE("Moving physical network devices back to parent network namespace");
@@ -2971,7 +2971,7 @@ int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
 	struct lxc_list *network = &handler->conf->network;
 	int data_sock = handler->data_sock[0];
 
-	if (handler->root)
+	if (handler->am_root)
 		return 0;
 
 	lxc_list_for_each(iterator, network) {
@@ -3002,7 +3002,7 @@ int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
 	struct lxc_list *network = &handler->conf->network;
 	int data_sock = handler->data_sock[1];
 
-	if (handler->root)
+	if (handler->am_root)
 		return 0;
 
 	lxc_list_for_each(iterator, network) {
diff --git a/src/lxc/start.c b/src/lxc/start.c
index fdb1d16ab..ac4c74924 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -525,7 +525,7 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
 	 * care if we are real root only if we are running as root so this
 	 * should be fine.
 	 */
-	handler->root = !am_unpriv();
+	handler->am_root = !am_unpriv();
 	handler->data_sock[0] = handler->data_sock[1] = -1;
 	handler->conf = conf;
 	handler->lxcpath = lxcpath;
@@ -1037,7 +1037,7 @@ static int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *hand
 	struct lxc_list *iterator, *network;
 	int data_sock = handler->data_sock[1];
 
-	if (!handler->root)
+	if (!handler->am_root)
 		return 0;
 
 	network = &handler->conf->network;
@@ -1601,7 +1601,7 @@ static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
 		}
 	}
 
-	if (!handler->root)
+	if (!handler->am_root)
 		ret = userns_exec_1(handler->conf, lxc_rmdir_onedev_wrapper,
 				    destroy, "lxc_rmdir_onedev_wrapper");
 	else
@@ -1621,7 +1621,7 @@ static int lxc_rmdir_onedev_wrapper(void *data)
 }
 
 static bool do_destroy_container(struct lxc_handler *handler) {
-	if (!handler->root) {
+	if (!handler->am_root) {
 		if (userns_exec_1(handler->conf, storage_destroy_wrapper,
 				  handler->conf, "storage_destroy_wrapper") < 0)
 			return false;
diff --git a/src/lxc/start.h b/src/lxc/start.h
index 99cadd851..1524c554b 100644
--- a/src/lxc/start.h
+++ b/src/lxc/start.h
@@ -35,7 +35,7 @@
 #include "namespace.h"
 
 struct lxc_handler {
-	bool root;
+	bool am_root;
 	pid_t pid;
 	char *name;
 	lxc_state_t state;

From 6935ca89f122e1996735f8734964174b3f40eb83 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Sun, 3 Sep 2017 16:51:54 +0200
Subject: [PATCH 27/32] network: user send()/recv()

Also move all functions to network.{c,h}.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/conf.c    | 37 --------------------------
 src/lxc/network.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 src/lxc/network.h |  2 ++
 src/lxc/start.c   | 37 --------------------------
 4 files changed, 75 insertions(+), 78 deletions(-)

diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 136d2b807..f9c61e1e4 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -3045,43 +3045,6 @@ static int lxc_send_ttys_to_parent(struct lxc_handler *handler)
 	return ret;
 }
 
-static int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
-{
-	struct lxc_list *iterator, *network;
-	int data_sock = handler->data_sock[0];
-
-	if (!handler->am_root)
-		return 0;
-
-	network = &handler->conf->network;
-	lxc_list_for_each(iterator, network) {
-		int ret;
-		struct lxc_netdev *netdev = iterator->elem;
-
-		/* Send network device name in the child's namespace to parent. */
-		ret = lxc_abstract_unix_send_credential(data_sock, netdev->name,
-							IFNAMSIZ);
-		if (ret < 0)
-			goto on_error;
-
-		/* Send network device ifindex in the child's namespace to
-		 * parent.
-		 */
-		ret = lxc_abstract_unix_send_credential(data_sock, &netdev->ifindex,
-							sizeof(netdev->ifindex));
-		if (ret < 0)
-			goto on_error;
-	}
-
-	TRACE("Sent network device names and ifindeces to parent");
-	return 0;
-
-on_error:
-	close(handler->data_sock[0]);
-	close(handler->data_sock[1]);
-	return -1;
-}
-
 int lxc_setup(struct lxc_handler *handler)
 {
 	const char *name = handler->name;
diff --git a/src/lxc/network.c b/src/lxc/network.c
index 9e9c4191b..caedcbbc1 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -2981,8 +2981,7 @@ int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
 		if (netdev->type != LXC_NET_VETH)
 			continue;
 
-		ret = lxc_abstract_unix_send_credential(data_sock, netdev->name,
-							IFNAMSIZ);
+		ret = send(data_sock, netdev->name, IFNAMSIZ, 0);
 		if (ret < 0) {
 			close(handler->data_sock[0]);
 			close(handler->data_sock[1]);
@@ -3012,8 +3011,7 @@ int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
 		if (netdev->type != LXC_NET_VETH)
 			continue;
 
-		ret = lxc_abstract_unix_rcv_credential(data_sock, netdev->name,
-						       IFNAMSIZ);
+		ret = recv(data_sock, netdev->name, IFNAMSIZ, 0);
 		if (ret < 0) {
 			close(handler->data_sock[0]);
 			close(handler->data_sock[1]);
@@ -3026,3 +3024,74 @@ int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
 
 	return 0;
 }
+
+int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
+{
+	struct lxc_list *iterator, *network;
+	int data_sock = handler->data_sock[0];
+
+	if (!handler->am_root)
+		return 0;
+
+	network = &handler->conf->network;
+	lxc_list_for_each(iterator, network) {
+		int ret;
+		struct lxc_netdev *netdev = iterator->elem;
+
+		/* Send network device name in the child's namespace to parent. */
+		ret = send(data_sock, netdev->name, IFNAMSIZ, 0);
+		if (ret < 0)
+			goto on_error;
+
+		/* Send network device ifindex in the child's namespace to
+		 * parent.
+		 */
+		ret = send(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
+		if (ret < 0)
+			goto on_error;
+	}
+
+	TRACE("Sent network device names and ifindeces to parent");
+	return 0;
+
+on_error:
+	close(handler->data_sock[0]);
+	close(handler->data_sock[1]);
+	return -1;
+}
+
+int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
+{
+	struct lxc_list *iterator, *network;
+	int data_sock = handler->data_sock[1];
+
+	if (!handler->am_root)
+		return 0;
+
+	network = &handler->conf->network;
+	lxc_list_for_each(iterator, network) {
+		int ret;
+		struct lxc_netdev *netdev = iterator->elem;
+
+		/* Receive network device name in the child's namespace to
+		 * parent.
+		 */
+		ret = recv(data_sock, netdev->name, IFNAMSIZ, 0);
+		if (ret < 0)
+			goto on_error;
+
+		/* Receive network device ifindex in the child's namespace to
+		 * parent.
+		 */
+		ret = recv(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
+		if (ret < 0)
+			goto on_error;
+	}
+
+	return 0;
+
+on_error:
+	close(handler->data_sock[0]);
+	close(handler->data_sock[1]);
+	return -1;
+}
diff --git a/src/lxc/network.h b/src/lxc/network.h
index 5dd48fb2a..ffe318125 100644
--- a/src/lxc/network.h
+++ b/src/lxc/network.h
@@ -307,5 +307,7 @@ extern int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
 						 struct lxc_list *network);
 extern int lxc_network_send_veth_names_to_child(struct lxc_handler *handler);
 extern int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler);
+extern int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler);
+extern int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler);
 
 #endif /* __LXC_NETWORK_H */
diff --git a/src/lxc/start.c b/src/lxc/start.c
index ac4c74924..6aba02022 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -1032,43 +1032,6 @@ static int do_start(void *data)
 	return -1;
 }
 
-static int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
-{
-	struct lxc_list *iterator, *network;
-	int data_sock = handler->data_sock[1];
-
-	if (!handler->am_root)
-		return 0;
-
-	network = &handler->conf->network;
-	lxc_list_for_each(iterator, network) {
-		int ret;
-		struct lxc_netdev *netdev = iterator->elem;
-
-		/* Receive network device name in the child's namespace to
-		 * parent.
-		 */
-		ret = lxc_abstract_unix_rcv_credential(data_sock, netdev->name, IFNAMSIZ);
-		if (ret < 0)
-			goto on_error;
-
-		/* Receive network device ifindex in the child's namespace to
-		 * parent.
-		 */
-		ret = lxc_abstract_unix_rcv_credential(data_sock, &netdev->ifindex,
-						       sizeof(netdev->ifindex));
-		if (ret < 0)
-			goto on_error;
-	}
-
-	return 0;
-
-on_error:
-	close(handler->data_sock[0]);
-	close(handler->data_sock[1]);
-	return -1;
-}
-
 static int lxc_recv_ttys_from_child(struct lxc_handler *handler)
 {
 	int i;

From 25ed919d06738e17c8997379c41a07fb52dbec4f Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Sun, 3 Sep 2017 17:08:23 +0200
Subject: [PATCH 28/32] network: fix grammar

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/network.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/lxc/network.c b/src/lxc/network.c
index caedcbbc1..871502cf7 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -2483,9 +2483,10 @@ bool lxc_delete_network_priv(struct lxc_handler *handler)
 				     "from \"%s\" to its initial name \"%s\"",
 				     netdev->ifindex, netdev->name, netdev->link);
 			else
-				TRACE("Renamed interface with index %d to its "
-				      "from \"%s\" to its initial name \"%s\"",
-				      netdev->ifindex, netdev->name, netdev->link);
+				TRACE("Renamed interface with index %d from "
+				      "\"%s\" to its initial name \"%s\"",
+				      netdev->ifindex, netdev->name,
+				      netdev->link);
 			continue;
 		}
 

From 2d821b827dec3d60913b1a5fbe297be3ebd71d3e Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Sun, 3 Sep 2017 20:37:21 +0200
Subject: [PATCH 29/32] network: remove allocation from lxc_mkifname()

lxc_mkifname() really doesn't need to allocate any memory.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/criu.c         |  8 ++-----
 src/lxc/lxc_user_nic.c | 13 +++++------
 src/lxc/network.c      | 60 ++++++++++++++++++++++----------------------------
 src/lxc/network.h      |  2 +-
 4 files changed, 34 insertions(+), 49 deletions(-)

diff --git a/src/lxc/criu.c b/src/lxc/criu.c
index b7354a33b..f229369ca 100644
--- a/src/lxc/criu.c
+++ b/src/lxc/criu.c
@@ -766,14 +766,10 @@ static bool restore_net_info(struct lxc_container *c)
 
 		if (netdev->priv.veth_attr.pair[0] == '\0' &&
 		    netdev->priv.veth_attr.veth1[0] == '\0') {
-			char *tmp;
-
-			tmp = lxc_mkifname(template);
-			if (!tmp)
+			if (!lxc_mkifname(template))
 				goto out_unlock;
 
-			strcpy(netdev->priv.veth_attr.veth1, tmp);
-			free(tmp);
+			strcpy(netdev->priv.veth_attr.veth1, template);
 		}
 	}
 
diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c
index 6920c5462..9026bc425 100644
--- a/src/lxc/lxc_user_nic.c
+++ b/src/lxc/lxc_user_nic.c
@@ -566,23 +566,20 @@ static bool create_nic(char *nic, char *br, int pid, char **cnic)
 static char *get_new_nicname(char *br, int pid, char **cnic)
 {
 	int ret;
-	char *nicname;
-	char template[IFNAMSIZ];
+	char nicname[IFNAMSIZ];
 
-	ret = snprintf(template, sizeof(template), "vethXXXXXX");
-	if (ret < 0 || (size_t)ret >= sizeof(template))
+	ret = snprintf(nicname, sizeof(nicname), "vethXXXXXX");
+	if (ret < 0 || (size_t)ret >= sizeof(nicname))
 		return NULL;
 
-	nicname = lxc_mkifname(template);
-	if (!nicname)
+	if (!lxc_mkifname(nicname))
 		return NULL;
 
 	if (!create_nic(nicname, br, pid, cnic)) {
-		free(nicname);
 		return NULL;
 	}
 
-	return nicname;
+	return strdup(nicname);
 }
 
 struct entry_line {
diff --git a/src/lxc/network.c b/src/lxc/network.c
index 871502cf7..6f87d07a3 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -218,9 +218,6 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
 out_delete:
 	if (netdev->ifindex != 0)
 		lxc_netdev_delete_by_name(veth1);
-	if (netdev->priv.veth_attr.pair != veth1)
-		free(veth1);
-	free(veth2);
 	return -1;
 }
 
@@ -247,29 +244,29 @@ static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *n
 	if (err) {
 		ERROR("Failed to create macvlan interface \"%s\" on \"%s\": %s",
 		      peer, netdev->link, strerror(-err));
-		goto out;
+		goto on_error;
 	}
 
 	netdev->ifindex = if_nametoindex(peer);
 	if (!netdev->ifindex) {
 		ERROR("Failed to retrieve ifindex for \"%s\"", peer);
-		goto out;
+		goto on_error;
 	}
 
 	if (netdev->upscript) {
 		err = run_script(handler->name, "net", netdev->upscript, "up",
 				 "macvlan", netdev->link, (char*) NULL);
 		if (err)
-			goto out;
+			goto on_error;
 	}
 
 	DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
 	      peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
 
 	return 0;
-out:
+
+on_error:
 	lxc_netdev_delete_by_name(peer);
-	free(peer);
 	return -1;
 }
 
@@ -1890,17 +1887,16 @@ const char *lxc_net_type_to_str(int type)
 	return lxc_network_types[type];
 }
 
-static const char padchar[] =
-"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
 
-char *lxc_mkifname(const char *template)
+char *lxc_mkifname(char *template)
 {
-	char *name = NULL;
-	size_t i = 0;
-	FILE *urandom;
 	unsigned int seed;
-	struct ifaddrs *ifaddr, *ifa;
-	int ifexists = 0;
+	FILE *urandom;
+	struct ifaddrs *ifa, *ifaddr;
+	char name[IFNAMSIZ];
+	bool exists = false;
+	size_t i = 0;
 
 	if (strlen(template) >= IFNAMSIZ)
 		return NULL;
@@ -1908,28 +1904,26 @@ char *lxc_mkifname(const char *template)
 	/* Get all the network interfaces. */
 	getifaddrs(&ifaddr);
 
-	/* Initialize the random number generator */
-	urandom = fopen ("/dev/urandom", "r");
+	/* Initialize the random number generator. */
+	urandom = fopen("/dev/urandom", "r");
 	if (urandom != NULL) {
-		if (fread (&seed, sizeof(seed), 1, urandom) <= 0)
+		if (fread(&seed, sizeof(seed), 1, urandom) <= 0)
 			seed = time(0);
 		fclose(urandom);
-	}
-	else
+	} else {
 		seed = time(0);
+	}
 
 #ifndef HAVE_RAND_R
 	srand(seed);
 #endif
 
-	/* Generate random names until we find one that doesn't exist */
-	while(1) {
-		ifexists = 0;
-		name = strdup(template);
-
-		if (name == NULL)
-			return NULL;
+	/* Generate random names until we find one that doesn't exist. */
+	while (true) {
+		name[0] = '\0';
+		strcpy(name, template);
 
+		exists = false;
 		for (i = 0; i < strlen(name); i++) {
 			if (name[i] == 'X') {
 #ifdef HAVE_RAND_R
@@ -1941,20 +1935,18 @@ char *lxc_mkifname(const char *template)
 		}
 
 		for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
-			if (strcmp(ifa->ifa_name, name) == 0) {
-				ifexists = 1;
+			if (!strcmp(ifa->ifa_name, name)) {
+				exists = true;
 				break;
 			}
 		}
 
-		if (ifexists == 0)
+		if (!exists)
 			break;
-
-		free(name);
 	}
 
 	freeifaddrs(ifaddr);
-	return name;
+	return strcpy(template, name);
 }
 
 int setup_private_host_hw_addr(char *veth1)
diff --git a/src/lxc/network.h b/src/lxc/network.h
index ffe318125..c745bfd73 100644
--- a/src/lxc/network.h
+++ b/src/lxc/network.h
@@ -286,7 +286,7 @@ extern int lxc_neigh_proxy_off(const char *name, int family);
 /* Generate a new unique network interface name.
  * Allocated memory must be freed by caller.
  */
-extern char *lxc_mkifname(const char *template);
+extern char *lxc_mkifname(char *template);
 
 extern const char *lxc_net_type_to_str(int type);
 extern int setup_private_host_hw_addr(char *veth1);

From fcbd0111f5c026d60cc718fd8665bfb84b3c1b82 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Sun, 3 Sep 2017 20:49:54 +0200
Subject: [PATCH 30/32] lxc-user-nic: simplify

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/lxc_user_nic.c | 103 +++++++++++++++++++------------------------------
 1 file changed, 40 insertions(+), 63 deletions(-)

diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c
index 9026bc425..0f79451d1 100644
--- a/src/lxc/lxc_user_nic.c
+++ b/src/lxc/lxc_user_nic.c
@@ -61,8 +61,8 @@ static void usage(char *me, bool fail)
 {
 	fprintf(stderr, "Usage: %s create {lxcpath} {name} {pid} {type} "
 			"{bridge} {nicname}\n", me);
-	fprintf(stderr, "Usage: %s delete {lxcpath} {name} {pid} {type} "
-			"{bridge} {nicname}\n", me);
+	fprintf(stderr, "Usage: %s delete {lxcpath} {name} "
+			"{/proc/<pid>/ns/net} {type} {bridge} {nicname}\n", me);
 	fprintf(stderr, "{nicname} is the name to use inside the container\n");
 
 	if (fail)
@@ -450,32 +450,26 @@ static char *find_line(char *buf_start, char *buf_end, char *name,
 	return NULL;
 }
 
-static int instantiate_veth(char *n1, char **n2)
+static int instantiate_veth(char *veth1, char *veth2)
 {
-	int err;
-
-	err = snprintf(*n2, IFNAMSIZ, "%sp", n1);
-	if (err < 0 || err >= IFNAMSIZ) {
-		usernic_error("%s\n", "Could not create nic name");
-		return -1;
-	}
+	int ret;
 
-	err = lxc_veth_create(n1, *n2);
-	if (err) {
-		usernic_error("Failed to create %s-%s : %s.\n", n1, *n2,
-			      strerror(-err));
+	ret = lxc_veth_create(veth1, veth2);
+	if (ret < 0) {
+		usernic_error("Failed to create %s-%s : %s.\n", veth1, veth2,
+			      strerror(-ret));
 		return -1;
 	}
 
 	/* Changing the high byte of the mac address to 0xfe, the bridge
 	 * interface will always keep the host's mac address and not take the
 	 * mac address of a container. */
-	err = setup_private_host_hw_addr(n1);
-	if (err)
+	ret = setup_private_host_hw_addr(veth1);
+	if (ret < 0)
 		usernic_error("Failed to change mac address of host interface "
-			      "%s : %s\n", n1, strerror(-err));
+			      "%s : %s\n", veth1, strerror(-ret));
 
-	return netdev_set_flag(n1, IFF_UP);
+	return netdev_set_flag(veth1, IFF_UP);
 }
 
 static int get_mtu(char *name)
@@ -488,28 +482,27 @@ static int get_mtu(char *name)
 	return netdev_get_mtu(idx);
 }
 
-static bool create_nic(char *nic, char *br, int pid, char **cnic)
+static int create_nic(char *nic, char *br, int pid, char **cnic)
 {
-	char *veth1buf, *veth2buf;
+	char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
 	int mtu, ret;
 
-	veth1buf = alloca(IFNAMSIZ);
-	veth2buf = alloca(IFNAMSIZ);
-	if (!veth1buf || !veth2buf) {
-		usernic_error("Failed allocate memory: %s\n", strerror(errno));
-		return false;
-	}
-
 	ret = snprintf(veth1buf, IFNAMSIZ, "%s", nic);
 	if (ret < 0 || ret >= IFNAMSIZ) {
 		usernic_error("%s", "Could not create nic name\n");
-		return false;
+		return -1;
 	}
 
+	ret = snprintf(veth2buf, IFNAMSIZ, "%sp", veth1buf);
+	if (ret < 0 || ret >= IFNAMSIZ) {
+		usernic_error("%s\n", "Could not create nic name");
+		return -1;
+	}
 	/* create the nics */
-	if (instantiate_veth(veth1buf, &veth2buf) < 0) {
+	ret = instantiate_veth(veth1buf, veth2buf);
+	if (ret < 0) {
 		usernic_error("%s", "Error creating veth tunnel\n");
-		return false;
+		return -1;
 	}
 
 	if (strcmp(br, "none")) {
@@ -550,36 +543,14 @@ static bool create_nic(char *nic, char *br, int pid, char **cnic)
 	*cnic = strdup(veth2buf);
 	if (!*cnic) {
 		usernic_error("Failed to copy string \"%s\"\n", veth2buf);
-		return false;
+		return -1;
 	}
 
-	return true;
+	return 0;
 
 out_del:
 	lxc_netdev_delete_by_name(veth1buf);
-	return false;
-}
-
-/* get_new_nicname() will return the name (vethXXXXXX) which is attached on the
- * host to the lxc bridge. The returned string must be freed by caller.
- */
-static char *get_new_nicname(char *br, int pid, char **cnic)
-{
-	int ret;
-	char nicname[IFNAMSIZ];
-
-	ret = snprintf(nicname, sizeof(nicname), "vethXXXXXX");
-	if (ret < 0 || (size_t)ret >= sizeof(nicname))
-		return NULL;
-
-	if (!lxc_mkifname(nicname))
-		return NULL;
-
-	if (!create_nic(nicname, br, pid, cnic)) {
-		return NULL;
-	}
-
-	return strdup(nicname);
+	return -1;
 }
 
 struct entry_line {
@@ -688,7 +659,8 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid,
 {
 	int ret;
 	size_t slen;
-	char *newline, *nicname, *owner;
+	char *newline, *owner;
+	char nicname[IFNAMSIZ];
 	struct stat sb;
 	struct alloted_s *n;
 	int count = 0;
@@ -733,9 +705,16 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid,
 	if (owner == NULL)
 		return NULL;
 
-	nicname = get_new_nicname(br, pid, cnic);
-	if (!nicname) {
-		usernic_error("%s", "Failed to get new nic name\n");
+	ret = snprintf(nicname, sizeof(nicname), "vethXXXXXX");
+	if (ret < 0 || (size_t)ret >= sizeof(nicname))
+		return NULL;
+
+	if (!lxc_mkifname(nicname))
+		return NULL;
+
+	ret = create_nic(nicname, br, pid, cnic);
+	if (ret < 0) {
+		usernic_error("%s", "Failed to create new nic\n");
 		return NULL;
 	}
 
@@ -760,7 +739,6 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid,
 	slen = strlen(owner) + strlen(intype) + strlen(br) + strlen(nicname) + 4;
 	newline = malloc(slen + 1);
 	if (!newline) {
-		free(nicname);
 		free(newline);
 		usernic_error("Failed allocate memory: %s\n", strerror(errno));
 		return NULL;
@@ -770,7 +748,6 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid,
 	if (ret < 0 || (size_t)ret >= (slen + 1)) {
 		if (lxc_netdev_delete_by_name(nicname) != 0)
 			usernic_error("Error unlinking %s\n", nicname);
-		free(nicname);
 		free(newline);
 		return NULL;
 	}
@@ -789,7 +766,6 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid,
 			      strerror(errno));
 		if (lxc_netdev_delete_by_name(nicname) != 0)
 			usernic_error("Error unlinking %s\n", nicname);
-		free(nicname);
 		free(newline);
 		return NULL;
 	}
@@ -801,7 +777,7 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid,
 	free(newline);
 	lxc_strmunmap(buf, sb.st_size + slen);
 
-	return nicname;
+	return strdup(nicname);
 }
 
 static bool create_db_dir(char *fnam)
@@ -1252,6 +1228,7 @@ int main(int argc, char *argv[])
 		free(nicname);
 		exit(EXIT_FAILURE);
 	}
+
 	host_veth_ifidx = if_nametoindex(nicname);
 	if (!host_veth_ifidx) {
 		free(newname);

From f440dacd116d10ee6ed0443ff2030638ff273b30 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Mon, 4 Sep 2017 01:27:04 +0200
Subject: [PATCH 31/32] conf: send ttys in batches of 2

I thought we could send all ttys at once but this limits the number of ttys
users can use because of iovec_len restrictions. So let's sent them in batches
of 2.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/conf.c  | 38 ++++++++++++++++----------------------
 src/lxc/start.c | 32 ++++++++++++++------------------
 2 files changed, 30 insertions(+), 40 deletions(-)

diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index f9c61e1e4..0328d1759 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -3006,41 +3006,35 @@ static bool verify_start_hooks(struct lxc_conf *conf)
 static int lxc_send_ttys_to_parent(struct lxc_handler *handler)
 {
 	int i;
-	int *ttyfds;
-	struct lxc_pty_info *pty_info;
 	struct lxc_conf *conf = handler->conf;
-	const struct lxc_tty_info *tty_info = &conf->tty_info;
+	struct lxc_tty_info *tty_info = &conf->tty_info;
 	int sock = handler->data_sock[0];
 	int ret = -1;
-	size_t num_ttyfds = (2 * conf->tty);
 
-	ttyfds = malloc(num_ttyfds * sizeof(int));
-	if (!ttyfds)
-		return -1;
+	for (i = 0; i < conf->tty; i++) {
+		int ttyfds[2];
+		struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
 
-	for (i = 0; i < num_ttyfds; i++) {
-		pty_info = &tty_info->pty_info[i / 2];
-		ttyfds[i++] = pty_info->slave;
-		ttyfds[i] = pty_info->master;
-		TRACE("send pty \"%s\" with master fd %d and slave fd %d to "
-		      "parent",
-		      pty_info->name, pty_info->master, pty_info->slave);
+		ttyfds[0] = pty_info->master;
+		ttyfds[1] = pty_info->slave;
+
+		ret = lxc_abstract_unix_send_fds(sock, ttyfds, 2, NULL, 0);
+		if (ret < 0)
+			break;
+
+		TRACE("Send pty \"%s\" with master fd %d and slave fd %d to "
+		      "parent", pty_info->name, pty_info->master, pty_info->slave);
 	}
 
-	ret = lxc_abstract_unix_send_fds(sock, ttyfds, num_ttyfds, NULL, 0);
 	if (ret < 0)
-		ERROR("failed to send %d ttys to parent: %s", conf->tty,
+		ERROR("Failed to send %d ttys to parent: %s", conf->tty,
 		      strerror(errno));
 	else
-		TRACE("sent %d ttys to parent", conf->tty);
+		TRACE("Sent %d ttys to parent", conf->tty);
 
 	close(handler->data_sock[0]);
 	close(handler->data_sock[1]);
-
-	for (i = 0; i < num_ttyfds; i++)
-		close(ttyfds[i]);
-
-	free(ttyfds);
+	lxc_delete_tty(tty_info);
 
 	return ret;
 }
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 6aba02022..ec2f779cc 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -1035,13 +1035,11 @@ static int do_start(void *data)
 static int lxc_recv_ttys_from_child(struct lxc_handler *handler)
 {
 	int i;
-	int *ttyfds;
 	struct lxc_pty_info *pty_info;
 	int ret = -1;
 	int sock = handler->data_sock[1];
 	struct lxc_conf *conf = handler->conf;
 	struct lxc_tty_info *tty_info = &conf->tty_info;
-	size_t num_ttyfds = (2 * conf->tty);
 
 	if (!conf->tty)
 		return 0;
@@ -1050,29 +1048,27 @@ static int lxc_recv_ttys_from_child(struct lxc_handler *handler)
 	if (!tty_info->pty_info)
 		return -1;
 
-	ttyfds = malloc(num_ttyfds * sizeof(int));
-	if (!ttyfds)
-		return -1;
+	for (i = 0; i < conf->tty; i++) {
+		int ttyfds[2];
+
+		ret = lxc_abstract_unix_recv_fds(sock, ttyfds, 2, NULL, 0);
+		if (ret < 0)
+			break;
 
-	ret = lxc_abstract_unix_recv_fds(sock, ttyfds, num_ttyfds, NULL, 0);
-	for (i = 0; (ret >= 0 && *ttyfds != -1) && (i < num_ttyfds); i++) {
-		pty_info = &tty_info->pty_info[i / 2];
+		pty_info = &tty_info->pty_info[i];
 		pty_info->busy = 0;
-		pty_info->slave = ttyfds[i++];
-		pty_info->master = ttyfds[i];
-		TRACE("received pty with master fd %d and slave fd %d from "
+		pty_info->master = ttyfds[0];
+		pty_info->slave = ttyfds[1];
+		TRACE("Received pty with master fd %d and slave fd %d from "
 		      "parent", pty_info->master, pty_info->slave);
 	}
-
-	tty_info->nbtty = conf->tty;
-
-	free(ttyfds);
-
 	if (ret < 0)
-		ERROR("failed to receive %d ttys from child: %s", conf->tty,
+		ERROR("Failed to receive %d ttys from child: %s", conf->tty,
 		      strerror(errno));
 	else
-		TRACE("received %d ttys from child", conf->tty);
+		TRACE("Received %d ttys from child", conf->tty);
+
+	tty_info->nbtty = conf->tty;
 
 	return ret;
 }

From 0fdd51c41293915c8ac6c8a51bd30f0939357838 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Mon, 4 Sep 2017 01:27:30 +0200
Subject: [PATCH 32/32] start: switch from SOCK_DGRAM to SOCK_STREAM

Writes < PIPE_BUF will be atomic. PIPE_BUF is guaranteed to be 512 by POSIX and
Linux guarantess 4096. Nothing we send around goes over this limit.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/start.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lxc/start.c b/src/lxc/start.c
index ec2f779cc..a6a638c41 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -1125,7 +1125,7 @@ static int lxc_spawn(struct lxc_handler *handler)
 	if (lxc_sync_init(handler))
 		return -1;
 
-	ret = socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0,
+	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0,
 			 handler->data_sock);
 	if (ret < 0) {
 		lxc_sync_fini(handler);


More information about the lxc-devel mailing list