[lxc-devel] [PATCH RFC] seccomp: introduce v2 policy

Stéphane Graber stgraber at ubuntu.com
Wed Feb 12 15:14:55 UTC 2014


On Wed, Feb 12, 2014 at 12:24:22AM -0600, Serge Hallyn wrote:
> v2 allows specifying system calls by name, and specifying
> architecture.  A policy might look like:
> 
> 2
> whitelist
> # native architecture is x86_64;  we could put
> # [x86-64] here but don't need to
> open
> read
> write
> close
> mount
> # let's also allow some 32-bit syscalls
> [x86]
> open
> read

Nice! That's already a huge step up from the v1 config.
Just wondering, does v2 also support blacklisting or do we still need to
list the whole list of possible syscalls minus the one we want to block?

> Also use SCMP_ACT_KILL rather than SCMP_ACT_ERRNO(31)  - which
> confusingly returns 'EMLINK' on x86_64.

I was wondering, is there a sane way we could make that configurable?

Currently there isn't a big use case for this, but if we ever get
syscall blacklisting, then it'd be very interesting to be able to set a
default (possibly something like "whitelist errno 31") and then override
it on a per-syscall basis.

This would allow things like blocking mknod yet have it return as if it
succeeded (handy for userns).

> 
> Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
> ---
>  src/lxc/seccomp.c | 164 +++++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 144 insertions(+), 20 deletions(-)
> 
> diff --git a/src/lxc/seccomp.c b/src/lxc/seccomp.c
> index ea23b3a..c8c232a 100644
> --- a/src/lxc/seccomp.c
> +++ b/src/lxc/seccomp.c
> @@ -34,6 +34,143 @@
>  
>  lxc_log_define(lxc_seccomp, lxc);
>  
> +static int parse_config_v1(FILE *f, struct lxc_conf *conf)
> +{
> +	char line[1024];
> +	int ret;
> +
> +	while (fgets(line, 1024, f)) {
> +		int nr;
> +		ret = sscanf(line, "%d", &nr);
> +		if (ret != 1)
> +			return -1;
> +		ret = seccomp_rule_add(
> +#if HAVE_SCMP_FILTER_CTX
> +			conf->seccomp_ctx,
> +#endif
> +			SCMP_ACT_ALLOW, nr, 0);
> +		if (ret < 0) {
> +			ERROR("failed loading allow rule for %d", nr);
> +			return ret;
> +		}
> +	}
> +	return 0;
> +}
> +
> +static void remove_trailing_newlines(char *l)
> +{
> +	char *p = l;
> +
> +	while (*p)
> +		p++;
> +	while (--p >= l && *p == '\n')
> +		*p = '\0';
> +}
> +
> +/*
> + * v2 consists of
> + * [x86]
> + * open
> + * read
> + * write
> + * close
> + * # a comment
> + * [x86_64]
> + * open
> + * read
> + * write
> + * close
> + */
> +static int parse_config_v2(FILE *f, struct lxc_conf *conf)
> +{
> +#if HAVE_SCMP_FILTER_CTX
> +	char line[1024];
> +	int ret;
> +	uint32_t arch = SCMP_ARCH_NATIVE;
> +	scmp_filter_ctx *ctx = NULL;
> +
> +	while (fgets(line, 1024, f)) {
> +		int nr;
> +
> +		if (line[0] == '#')
> +			continue;
> +		if (strlen(line) == 0)
> +			continue;
> +		remove_trailing_newlines(line);
> +		INFO("processing: .%s.", line);

^ This probably ought to be DEBUG, otherwise if I list all the possible
syscalls, this is going to spam my log files :)

> +		if (line[0] == '[') {
> +			if (strcmp(line, "[x86]") == 0 ||
> +					strcmp(line, "[X86]") == 0)
> +				arch = SCMP_ARCH_X86;
> +			else if (strcmp(line, "[X86_64]") == 0 ||
> +					strcmp(line, "[x86_64]") == 0)
> +				arch = SCMP_ARCH_X86_64;
> +			else if (strcmp(line, "[arm]") == 0 ||
> +					strcmp(line, "[ARM]") == 0)
> +				arch = SCMP_ARCH_ARM;

Is that all architectures supported by seccomp or did you just put the
most common ones?

I'm specifically wondering about arm64, powerpc and ppc64el all of which
Ubuntu supports now.

In theory, the following should be valid:

2
whitelist
open
read
[arm]
open
read

(armhf on arm64)

So ideally, we'd support:
 - amd64 (including i386 on amd64)
 - i386
 - armhf
 - arm64 (including armhf on arm64)
 - powerpc
 - ppc64el

> +			else
> +				goto bad_arch;
> +			if (ctx) {
> +				ERROR("Only two arch sections per policy supported");
> +				goto bad_arch;
> +			}
> +			if ((ctx = seccomp_init(SCMP_ACT_KILL)) == NULL) {
> +				ERROR("Error initializing seccomp context");
> +				return -1;
> +			}
> +			if (seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0)) {
> +				ERROR("failed to turn off n-new-privs");
> +				seccomp_release(ctx);
> +				return -1;
> +			}
> +			ret = seccomp_arch_add(ctx, arch);
> +			if (ret == -EEXIST) {
> +				seccomp_release(ctx);
> +				ctx = NULL;
> +				continue;
> +			}
> +			if (ret != 0) {
> +				ERROR("Error %d adding arch: %s", ret, line);
> +				goto bad_arch;
> +			}
> +			if (seccomp_arch_remove(ctx, SCMP_ARCH_NATIVE) != 0) {
> +				ERROR("Error removing native arch from %s", line);
> +				goto bad_arch;
> +			}
> +			continue;
> +		}
> +
> +		nr = seccomp_syscall_resolve_name_arch(arch, line);
> +		if (nr < 0) {
> +			ERROR("Failed to resolve syscall: %s", line);
> +			goto bad_rule;
> +		}
> +		ret = seccomp_rule_add(ctx ? ctx : conf->seccomp_ctx,
> +			SCMP_ACT_ALLOW, nr, 0);
> +		if (ret < 0) {
> +			ERROR("failed (%d) loading allow rule for %d", ret, nr);
> +			goto bad_rule;
> +		}
> +	}
> +	if (ctx) {
> +		if (seccomp_merge(conf->seccomp_ctx, ctx) != 0) {
> +			seccomp_release(ctx);
> +			ERROR("Error merging seccomp contexts");
> +			return -1;
> +		}
> +	}
> +	return 0;
> +bad_arch:
> +	ERROR("Unsupported arch: %s", line);
> +bad_rule:
> +	if (ctx)
> +		seccomp_release(ctx);
> +	return -1;
> +#else
> +	return -1;
> +#endif
> +}
> +
>  /*
>   * The first line of the config file has a policy language version
>   * the second line has some directives
> @@ -48,7 +185,7 @@ static int parse_config(FILE *f, struct lxc_conf *conf)
>  	int ret, version;
>  
>  	ret = fscanf(f, "%d\n", &version);
> -	if (ret != 1 || version != 1) {
> +	if (ret != 1 || (version != 1 && version != 2)) {
>  		ERROR("invalid version");
>  		return -1;
>  	}
> @@ -64,23 +201,10 @@ static int parse_config(FILE *f, struct lxc_conf *conf)
>  		ERROR("debug not yet implemented");
>  		return -1;
>  	}
> -	/* now read in the whitelist entries one per line */
> -	while (fgets(line, 1024, f)) {
> -		int nr;
> -		ret = sscanf(line, "%d", &nr);
> -		if (ret != 1)
> -			return -1;
> -		ret = seccomp_rule_add(
> -#if HAVE_SCMP_FILTER_CTX
> -			conf->seccomp_ctx,
> -#endif
> -			SCMP_ACT_ALLOW, nr, 0);
> -		if (ret < 0) {
> -			ERROR("failed loading allow rule for %d", nr);
> -			return ret;
> -		}
> -	}
> -	return 0;
> +
> +	if (version == 1)
> +		return parse_config_v1(f, conf);
> +	return parse_config_v2(f, conf);
>  }
>  
>  int lxc_read_seccomp_config(struct lxc_conf *conf)
> @@ -93,10 +217,10 @@ int lxc_read_seccomp_config(struct lxc_conf *conf)
>  
>  #if HAVE_SCMP_FILTER_CTX
>  	/* XXX for debug, pass in SCMP_ACT_TRAP */
> -	conf->seccomp_ctx = seccomp_init(SCMP_ACT_ERRNO(31));
> +	conf->seccomp_ctx = seccomp_init(SCMP_ACT_KILL);
>  	ret = !conf->seccomp_ctx;
>  #else
> -	ret = seccomp_init(SCMP_ACT_ERRNO(31)) < 0;
> +	ret = seccomp_init(SCMP_ACT_KILL) < 0;
>  #endif
>  	if (ret) {
>  		ERROR("failed initializing seccomp");
> -- 
> 1.9.rc1
> 
> _______________________________________________
> lxc-devel mailing list
> lxc-devel at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel

-- 
Stéphane Graber
Ubuntu developer
http://www.ubuntu.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: Digital signature
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20140212/1f74d320/attachment.pgp>


More information about the lxc-devel mailing list