[lxc-devel] [PATCH 1/1] Fix unprivileged networking

Stéphane Graber stgraber at ubuntu.com
Tue Feb 18 21:35:41 UTC 2014


On Tue, Feb 18, 2014 at 03:12:52PM -0600, Serge Hallyn wrote:
> If we are unprivileged and have asked for a veth device, then create
> a pipe over which to pass the veth names.
> 
> Network-related todos:
> 1. set mtu on the container side of veth device

> 2. set mtu in lxc-user-nic.  Note that this probably requires an
>    update to the /etc/lxc/lxc-usernet file :(

Hmm, that's an interesting problem and even without that change, we
actually have a bug at the moment which may or may not qualify as a
security issue.

The bridge will set its own MTU to the lowest of all devices inside it
(or so it looks like anyway), so say that a bridge has an MTU of 9000
(jumbo) and a user can join a container to it, that'll decrease the MTU
to 1500 possibly breaking the other containers in the bridge.

To fix that it looks like we indeed want an extra column in lxc-usernet
which would specify the min and max MTU, a value of 0 (same as no value)
would tell lxc-user-nic to copy that of the bridge, an value of
1500:4000 would mean that the mtu may not be set below 1500 or above
4000.

Unfortunately as this would result in a rather user visible change as
well as documentation changes, if we are going to do this, we really
should do it before 1.0.


Alternatively we could state that unprivileged containers may not use a
custom MTU and that they will always default to the bridge's MTU value
for both sides of the veth device.

In which case we still need to change both lxc and lxc-user-nic to get
the current MTU from the bridge and set it on both side of the veth
device.

> Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>

Acked-by: Stéphane Graber <stgraber at ubuntu.com>

> ---
>  src/lxc/conf.c  | 16 ++++++++++--
>  src/lxc/start.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 90 insertions(+), 2 deletions(-)
> 
> diff --git a/src/lxc/conf.c b/src/lxc/conf.c
> index d482d22..c9dd7ac 100644
> --- a/src/lxc/conf.c
> +++ b/src/lxc/conf.c
> @@ -3114,13 +3114,23 @@ static int unpriv_assign_nic(struct lxc_netdev *netdev, pid_t pid)
>  	token = strtok_r(buffer, ":", &saveptr);
>  	if (!token)
>  		return -1;
> -	netdev->name = strdup(token);
> +	netdev->name = malloc(IFNAMSIZ+1);
> +	if (!netdev->name) {
> +		ERROR("Out of memory");
> +		return -1;
> +	}
> +	memset(netdev->name, 0, IFNAMSIZ+1);
> +	strncpy(netdev->name, token, IFNAMSIZ);
>  
>  	/* fill netdev->veth_attr.pair field */
>  	token = strtok_r(NULL, ":", &saveptr);
>  	if (!token)
>  		return -1;
>  	netdev->priv.veth_attr.pair = strdup(token);
> +	if (!netdev->priv.veth_attr.pair) {
> +		ERROR("Out of memory");
> +		return -1;
> +	}
>  
>  	return 0;
>  }
> @@ -3139,7 +3149,9 @@ int lxc_assign_network(struct lxc_list *network, pid_t pid)
>  		if (netdev->type == LXC_NET_VETH && !am_root) {
>  			if (unpriv_assign_nic(netdev, pid))
>  				return -1;
> -			// TODO fill in netdev->ifindex and name
> +			// lxc-user-nic has moved the nic to the new ns.
> +			// unpriv_assign_nic() fills in netdev->name.
> +			// netdev->ifindex will be filed in at setup_netdev.
>  			continue;
>  		}
>  
> diff --git a/src/lxc/start.c b/src/lxc/start.c
> index 5b3b6eb..2faad8e 100644
> --- a/src/lxc/start.c
> +++ b/src/lxc/start.c
> @@ -563,6 +563,52 @@ static int must_drop_cap_sys_boot(struct lxc_conf *conf)
>  	return 0;
>  }
>  
> +/*
> + * netpipe is used in the unprivileged case to transfer the ifindexes
> + * from parent to child
> + */
> +static int netpipe = -1;
> +
> +static inline int count_veths(struct lxc_list *network)
> +{
> +	struct lxc_list *iterator;
> +	struct lxc_netdev *netdev;
> +	int count = 0;
> +
> +	lxc_list_for_each(iterator, network) {
> +		netdev = iterator->elem;
> +		if (netdev->type != LXC_NET_VETH)
> +			continue;
> +		count++;
> +	}
> +	return count;
> +}
> +
> +static int read_unpriv_netifindex(struct lxc_list *network)
> +{
> +	struct lxc_list *iterator;
> +	struct lxc_netdev *netdev;
> +
> +	if (netpipe == -1)
> +		return 0;
> +	lxc_list_for_each(iterator, network) {
> +		netdev = iterator->elem;
> +		if (netdev->type != LXC_NET_VETH)
> +			continue;
> +		if (!(netdev->name = malloc(IFNAMSIZ))) {
> +			ERROR("Out of memory");
> +			close(netpipe);
> +			return -1;
> +		}
> +		if (read(netpipe, netdev->name, IFNAMSIZ) != IFNAMSIZ) {
> +			close(netpipe);
> +			return -1;
> +		}
> +	}
> +	close(netpipe);
> +	return 0;
> +}
> +
>  static int do_start(void *data)
>  {
>  	struct lxc_handler *handler = data;
> @@ -597,6 +643,9 @@ static int do_start(void *data)
>  	if (lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE))
>  		return -1;
>  
> +	if (read_unpriv_netifindex(&handler->conf->network) < 0)
> +		goto out_warn_father;
> +
>  	/*
>  	 * if we are in a new user namespace, become root there to have
>  	 * privilege over our namespace
> @@ -728,6 +777,7 @@ static int lxc_spawn(struct lxc_handler *handler)
>  	const char *name = handler->name;
>  	int saved_ns_fd[LXC_NS_MAX];
>  	int preserve_mask = 0, i;
> +	int netpipepair[2], nveths;
>  
>  	for (i = 0; i < LXC_NS_MAX; i++)
>  		if (handler->conf->inherit_ns_fd[i] != -1)
> @@ -816,6 +866,15 @@ static int lxc_spawn(struct lxc_handler *handler)
>  	if (attach_ns(handler->conf->inherit_ns_fd) < 0)
>  		goto out_delete_net;
>  
> +	if (am_unpriv() && (nveths = count_veths(&handler->conf->network))) {
> +		if (pipe(netpipepair) < 0) {
> +			SYSERROR("Error creating pipe");
> +			goto out_delete_net;
> +		}
> +		/* store netpipe in the global var for do_start's use */
> +		netpipe = netpipepair[0];
> +	}
> +
>  	/* Create a process in a new set of namespaces */
>  	handler->pid = lxc_clone(do_start, handler, handler->clone_flags);
>  	if (handler->pid < 0) {
> @@ -857,6 +916,23 @@ static int lxc_spawn(struct lxc_handler *handler)
>  		}
>  	}
>  
> +	if (netpipe != -1) {
> +		struct lxc_list *iterator;
> +		struct lxc_netdev *netdev;
> +
> +		close(netpipe);
> +		lxc_list_for_each(iterator, &handler->conf->network) {
> +			netdev = iterator->elem;
> +			if (netdev->type != LXC_NET_VETH)
> +				continue;
> +			if (write(netpipepair[1], netdev->name, IFNAMSIZ) != IFNAMSIZ) {
> +				ERROR("Error writing veth name to container");
> +				goto out_delete_net;
> +			}
> +		}
> +		close(netpipepair[1]);
> +	}
> +
>  	/* map the container uids - the container became an invalid
>  	 * userid the moment it was cloned with CLONE_NEWUSER - this
>  	 * call doesn't change anything immediately, but allows the
> -- 
> 1.9.rc1
> 
> _______________________________________________
> lxc-devel mailing list
> lxc-devel at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel

-- 
Stéphane Graber
Ubuntu developer
http://www.ubuntu.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: Digital signature
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20140218/b89939f2/attachment.pgp>


More information about the lxc-devel mailing list