[lxc-devel] [PATCH] seccomp: support 'all' arch sections (plus bugfixes)

Stéphane Graber stgraber at ubuntu.com
Fri Jun 20 18:35:08 UTC 2014


On Thu, Jun 19, 2014 at 08:52:34PM +0000, Serge Hallyn wrote:
> seccomp_ctx is already a void*, so don't use 'scmp_filter_ctx *'
> 
> Separately track the native arch from the arch a rule is aimed at.
> 
> Clearly ignore irrelevant architectures (i.e. arm rules on x86)
> 
> Don't try to load seccomp (and don't fail) if we are already
> seccomp-confined.  Otherwise nested containers fail.
> 
> Make it clear that the extra seccomp ctx is only for compat calls
> on 64-bit arch.  (This will be extended to arm64 when libseccomp
> supports it).  Power may will complicate this (if ever it is supported)
> and require a new rethink and rewrite.
> 
> NOTE - currently when starting a 32-bit container on 64-bit host,
> rules pertaining to 32-bit syscalls (as opposed to once which have
> the same syscall #) appear to be ignored.  I can reproduce that without
> lxc, so either there is a bug in seccomp or a fundamental
> misunderstanding in how I"m merging the contexts.
> 
> Rereading the seccomp_rule_add manpage suggests that keeping the seccond
> seccomp context may not be necessary, but this is not something I care
> to test right now.  If it's true, then the code could be simplified, and
> it may solve my concerns about power.
> 
> With this patch I'm able to start nested containers (with seccomp
> policies defined) including 32-bit and 32-bit-in-64-bit.
> 
> [ this patch does not yet add the default seccomp policy ]
> 
> Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>

Acked-by: Stéphane Graber <stgraber at ubuntu.com>

> ---
>  src/lxc/conf.h    |   2 +-
>  src/lxc/seccomp.c | 287 ++++++++++++++++++++++++++++++++++++++++++++----------
>  2 files changed, 234 insertions(+), 55 deletions(-)
> 
> diff --git a/src/lxc/conf.h b/src/lxc/conf.h
> index ace1da0..3527c44 100644
> --- a/src/lxc/conf.h
> +++ b/src/lxc/conf.h
> @@ -312,7 +312,7 @@ struct lxc_conf {
>  	int tmp_umount_proc;
>  	char *seccomp;  // filename with the seccomp rules
>  #if HAVE_SCMP_FILTER_CTX
> -	scmp_filter_ctx *seccomp_ctx;
> +	scmp_filter_ctx seccomp_ctx;
>  #endif
>  	int maincmd_fd;
>  	int autodev;  // if 1, mount and fill a /dev at start
> diff --git a/src/lxc/seccomp.c b/src/lxc/seccomp.c
> index fadc190..5df37bc 100644
> --- a/src/lxc/seccomp.c
> +++ b/src/lxc/seccomp.c
> @@ -27,6 +27,7 @@
>  #include <seccomp.h>
>  #include <errno.h>
>  #include <seccomp.h>
> +#include <sys/utsname.h>
>  
>  #include "config.h"
>  #include "lxcseccomp.h"
> @@ -112,6 +113,98 @@ static uint32_t get_and_clear_v2_action(char *line, uint32_t def_action)
>  }
>  #endif
>  
> +#if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
> +enum lxc_hostarch_t {
> +	lxc_seccomp_arch_all = 0,
> +	lxc_seccomp_arch_native,
> +	lxc_seccomp_arch_i386,
> +	lxc_seccomp_arch_amd64,
> +	lxc_seccomp_arch_arm,
> +	lxc_seccomp_arch_unknown = 999,
> +};
> +
> +int get_hostarch(void)
> +{
> +	struct utsname uts;
> +	if (uname(&uts) < 0) {
> +		SYSERROR("Failed to read host arch");
> +		return -1;
> +	}
> +	if (strcmp(uts.machine, "i686") == 0)
> +		return lxc_seccomp_arch_i386;
> +	else if (strcmp(uts.machine, "x86_64") == 0)
> +		return lxc_seccomp_arch_amd64;
> +	else if (strncmp(uts.machine, "armv7", 5) == 0)
> +		return lxc_seccomp_arch_arm;
> +	return lxc_seccomp_arch_unknown;
> +}
> +
> +scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch, uint32_t default_policy_action)
> +{
> +	scmp_filter_ctx ctx;
> +	int ret;
> +	uint32_t arch;
> +
> +	switch(n_arch) {
> +	case lxc_seccomp_arch_i386: arch = SCMP_ARCH_X86; break;
> +	case lxc_seccomp_arch_amd64: arch = SCMP_ARCH_X86_64; break;
> +	case lxc_seccomp_arch_arm: arch = SCMP_ARCH_ARM; break;
> +	default: return NULL;
> +	}
> +
> +	if ((ctx = seccomp_init(default_policy_action)) == NULL) {
> +		ERROR("Error initializing seccomp context");
> +		return NULL;
> +	}
> +	if (seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0)) {
> +		ERROR("failed to turn off n-new-privs");
> +		seccomp_release(ctx);
> +		return NULL;
> +	}
> +	ret = seccomp_arch_add(ctx, arch);
> +	if (ret != 0) {
> +		ERROR("Seccomp error %d (%s) adding arch: %d", ret,
> +				strerror(ret), (int)n_arch);
> +		seccomp_release(ctx);
> +		return NULL;
> +	}
> +	if (seccomp_arch_remove(ctx, SCMP_ARCH_NATIVE) != 0) {
> +		ERROR("Seccomp error removing native arch");
> +		seccomp_release(ctx);
> +		return NULL;
> +	}
> +
> +	return ctx;
> +}
> +
> +bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx,
> +			uint32_t action)
> +{
> +	int nr, ret;
> +
> +	if (arch && !seccomp_arch_exist(ctx, arch)) {
> +		ERROR("BUG: seccomp: rule and context arch do not match (arch %d)", arch);
> +		return false;
> +	}
> +	nr = seccomp_syscall_resolve_name_arch(arch, line);
> +	if (nr == __NR_SCMP_ERROR) {
> +		WARN("Seccomp: failed to resolve syscall: %s", line);
> +		WARN("This syscall will NOT be blacklisted");
> +		return true;
> +	}
> +	if (nr < 0) {
> +		WARN("Seccomp: got negative # for syscall: %s", line);
> +		WARN("This syscall will NOT be blacklisted");
> +		return true;
> +	}
> +	ret = seccomp_rule_add_exact(ctx, action, nr, 0);
> +	if (ret < 0) {
> +		ERROR("failed (%d) loading rule for %s (nr %d action %d)", ret, line, nr, action);
> +		return false;
> +	}
> +	return true;
> +}
> +
>  /*
>   * v2 consists of
>   * [x86]
> @@ -128,13 +221,14 @@ static uint32_t get_and_clear_v2_action(char *line, uint32_t def_action)
>   */
>  static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
>  {
> -#if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
>  	char *p;
>  	int ret;
> -	scmp_filter_ctx *ctx = NULL;
> +	scmp_filter_ctx compat_ctx = NULL;
>  	bool blacklist = false;
>  	uint32_t default_policy_action = -1, default_rule_action = -1, action;
>  	uint32_t arch = SCMP_ARCH_NATIVE;
> +	enum lxc_hostarch_t native_arch = get_hostarch(),
> +			    cur_rule_arch = native_arch;
>  
>  	if (strncmp(line, "blacklist", 9) == 0)
>  		blacklist = true;
> @@ -162,6 +256,14 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
>  			default_rule_action = SCMP_ACT_ALLOW;
>  	}
>  
> +	if (native_arch == lxc_seccomp_arch_amd64) {
> +		cur_rule_arch = lxc_seccomp_arch_all;
> +		compat_ctx = get_new_ctx(lxc_seccomp_arch_i386,
> +				default_policy_action);
> +		if (!compat_ctx)
> +			goto bad;
> +	}
> +
>  	if (default_policy_action != SCMP_ACT_KILL) {
>  		ret = seccomp_reset(conf->seccomp_ctx, default_policy_action);
>  		if (ret != 0) {
> @@ -175,7 +277,6 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
>  	}
>  
>  	while (fgets(line, 1024, f)) {
> -		int nr;
>  
>  		if (line[0] == '#')
>  			continue;
> @@ -186,72 +287,112 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
>  		if (line[0] == '[') {
>  			// read the architecture for next set of rules
>  			if (strcmp(line, "[x86]") == 0 ||
> -					strcmp(line, "[X86]") == 0)
> +					strcmp(line, "[X86]") == 0) {
> +				if (native_arch != lxc_seccomp_arch_i386 &&
> +					native_arch != lxc_seccomp_arch_amd64) {
> +					cur_rule_arch = lxc_seccomp_arch_unknown;
> +					continue;
> +				}
> +				cur_rule_arch = lxc_seccomp_arch_i386;
>  				arch = SCMP_ARCH_X86;
> -			else if (strcmp(line, "[X86_64]") == 0 ||
> -					strcmp(line, "[x86_64]") == 0)
> +				if (native_arch == lxc_seccomp_arch_amd64) {
> +					if (compat_ctx)
> +						continue;
> +					compat_ctx = get_new_ctx(lxc_seccomp_arch_i386,
> +							default_policy_action);
> +					if (!compat_ctx)
> +						goto bad;
> +				}
> +			} else if (strcmp(line, "[X86_64]") == 0 ||
> +					strcmp(line, "[x86_64]") == 0) {
> +				if (native_arch != lxc_seccomp_arch_amd64) {
> +					cur_rule_arch = lxc_seccomp_arch_unknown;
> +					continue;
> +				}
> +				cur_rule_arch = lxc_seccomp_arch_amd64;
>  				arch = SCMP_ARCH_X86_64;
> +			} else if (strcmp(line, "[all]") == 0 ||
> +					strcmp(line, "[ALL]") == 0) {
> +				cur_rule_arch = lxc_seccomp_arch_all;
> +				if (native_arch == lxc_seccomp_arch_amd64 && !compat_ctx) {
> +					if (compat_ctx)
> +						continue;
> +					compat_ctx = get_new_ctx(lxc_seccomp_arch_i386,
> +							default_policy_action);
> +					if (!compat_ctx)
> +						goto bad;
> +				}
> +			}
>  #ifdef SCMP_ARCH_ARM
>  			else if (strcmp(line, "[arm]") == 0 ||
> -					strcmp(line, "[ARM]") == 0)
> +					strcmp(line, "[ARM]") == 0) {
> +				if (native_arch != lxc_seccomp_arch_arm) {
> +					cur_rule_arch = lxc_seccomp_arch_unknown;
> +					continue;
> +				}
> +				cur_rule_arch = lxc_seccomp_arch_arm;
>  				arch = SCMP_ARCH_ARM;
> +			}
>  #endif
>  			else
>  				goto bad_arch;
> -			if (ctx) {
> -				ERROR("Only two arch sections per policy supported");
> -				goto bad_arch;
> -			}
> -			if ((ctx = seccomp_init(default_policy_action)) == NULL) {
> -				ERROR("Error initializing seccomp context");
> -				return -1;
> -			}
> -			if (seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0)) {
> -				ERROR("failed to turn off n-new-privs");
> -				seccomp_release(ctx);
> -				return -1;
> -			}
> -			ret = seccomp_arch_add(ctx, arch);
> -			if (ret == -EEXIST) {
> -				seccomp_release(ctx);
> -				ctx = NULL;
> -				continue;
> -			}
> -			if (ret != 0) {
> -				ERROR("Error %d adding arch: %s", ret, line);
> -				goto bad_arch;
> -			}
> -			if (seccomp_arch_remove(ctx, SCMP_ARCH_NATIVE) != 0) {
> -				ERROR("Error removing native arch from %s", line);
> -				goto bad_arch;
> -			}
> +
>  			continue;
>  		}
>  
> +		/* irrelevant arch - i.e. arm on i386 */
> +		if (cur_rule_arch == lxc_seccomp_arch_unknown)
> +			continue;
> +
> +		/* read optional action which follows the syscall */
>  		action = get_and_clear_v2_action(line, default_rule_action);
>  		if (action == -1) {
>  			ERROR("Failed to interpret action");
>  			goto bad_rule;
>  		}
> -		nr = seccomp_syscall_resolve_name_arch(arch, line);
> -		if (nr < 0) {
> -			WARN("Seccomp: failed to resolve syscall: %s (returned %d)",
> -				line, nr);
> -			WARN("This syscall will NOT be blacklisted");
> -			continue;
> +
> +		if (cur_rule_arch == lxc_seccomp_arch_all)
> +			arch = SCMP_ARCH_NATIVE;
> +
> +		/*
> +		 * TODO generalize - if !is_compat_only(native_arch, cur_rule_arch)
> +		 */
> +		if (!(cur_rule_arch == lxc_seccomp_arch_i386 &&
> +			native_arch == lxc_seccomp_arch_amd64)) {
> +			INFO("Adding non-compat rule for %s action %d", line, action);
> +			if (!do_resolve_add_rule(arch, line, conf->seccomp_ctx, action))
> +				goto bad_rule;
>  		}
> -		ret = seccomp_rule_add(ctx ? ctx : conf->seccomp_ctx,
> -				action, nr, 0);
> -		if (ret < 0) {
> -			ERROR("failed (%d) loading rule for %s", ret, line);
> -			goto bad_rule;
> +
> +		/*
> +		 * TODO generalize - if need_compat(native_arch, cur_rule_arch)
> +		 */
> +		if (native_arch == lxc_seccomp_arch_amd64 &&
> +			cur_rule_arch != lxc_seccomp_arch_amd64) {
> +			int nr1, nr2;
> +			INFO("Adding compat rule for %s action %d", line, action);
> +			nr1 = seccomp_syscall_resolve_name_arch(lxc_seccomp_arch_amd64, line);
> +			nr2 = seccomp_syscall_resolve_name_arch(lxc_seccomp_arch_i386, line);
> +			if (nr1 == nr2) {
> +				/* sigh - if the syscall # is the same for 32- and 64-bit, then
> +				 * we get EFAULT if we try to aplly to compat_ctx.  So apply to
> +				 * the noncompat ctx.  We may already have done so, but that's ok
> +				 */
> +				if (!do_resolve_add_rule(arch, line, conf->seccomp_ctx, action))
> +					goto bad_rule;
> +				continue;
> +			}
> +			if (!do_resolve_add_rule(SCMP_ARCH_X86, line,
> +						compat_ctx, action))
> +				goto bad_rule;
>  		}
>  	}
> -	if (ctx) {
> -		if (seccomp_merge(conf->seccomp_ctx, ctx) != 0) {
> -			seccomp_release(ctx);
> -			ERROR("Error merging seccomp contexts");
> -			return -1;
> +
> +	if (compat_ctx) {
> +		INFO("Merging in the compat seccomp ctx into the main one");
> +		if (seccomp_merge(conf->seccomp_ctx, compat_ctx) != 0) {
> +			ERROR("Error merging i386 seccomp contexts");
> +			goto bad;
>  		}
>  	}
>  	return 0;
> @@ -259,13 +400,17 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
>  bad_arch:
>  	ERROR("Unsupported arch: %s", line);
>  bad_rule:
> -	if (ctx)
> -		seccomp_release(ctx);
> +bad:
> +	if (compat_ctx)
> +		seccomp_release(compat_ctx);
>  	return -1;
> -#else
> +}
> +#else /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
> +static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
> +{
>  	return -1;
> -#endif
>  }
> +#endif /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
>  
>  /*
>   * The first line of the config file has a policy language version
> @@ -304,6 +449,32 @@ static int parse_config(FILE *f, struct lxc_conf *conf)
>  	return parse_config_v2(f, line, conf);
>  }
>  
> +static bool seccomp_already_confined(void)
> +{
> +	FILE *f = fopen("/proc/self/status", "r");
> +	char line[1024];
> +	bool bret = false;
> +	int ret, v;
> +
> +	if (!f)
> +		return false;
> +
> +	while (fgets(line, 1024, f)) {
> +		if (strncmp(line, "Seccomp:", 8) == 0) {
> +			ret = sscanf(line+8, "%d", &v);
> +			if (ret != 1)
> +				continue;
> +			if (v != 0)
> +				bret = true;
> +			goto out;
> +		}
> +	}
> +
> +out:
> +	fclose(f);
> +	return bret;
> +}
> +
>  int lxc_read_seccomp_config(struct lxc_conf *conf)
>  {
>  	FILE *f;
> @@ -312,6 +483,10 @@ int lxc_read_seccomp_config(struct lxc_conf *conf)
>  	if (!conf->seccomp)
>  		return 0;
>  
> +	if (seccomp_already_confined()) {
> +		INFO("Already confined by seccomp;  not loading a new policy");
> +		return 0;
> +	}
>  #if HAVE_SCMP_FILTER_CTX
>  	/* XXX for debug, pass in SCMP_ACT_TRAP */
>  	conf->seccomp_ctx = seccomp_init(SCMP_ACT_KILL);
> @@ -350,6 +525,10 @@ int lxc_seccomp_load(struct lxc_conf *conf)
>  	int ret;
>  	if (!conf->seccomp)
>  		return 0;
> +	if (seccomp_already_confined()) {
> +		INFO("Already confined by seccomp;  not loading a new policy");
> +		return 0;
> +	}
>  	ret = seccomp_load(
>  #if HAVE_SCMP_FILTER_CTX
>  			conf->seccomp_ctx
> -- 
> 2.0.0
> 
> _______________________________________________
> lxc-devel mailing list
> lxc-devel at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel

-- 
Stéphane Graber
Ubuntu developer
http://www.ubuntu.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: Digital signature
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20140620/41284760/attachment.sig>


More information about the lxc-devel mailing list