[lxc-devel] [PATCH] seccomp: support 'all' arch sections (plus bugfixes)
Stéphane Graber
stgraber at ubuntu.com
Fri Jun 20 18:35:08 UTC 2014
On Thu, Jun 19, 2014 at 08:52:34PM +0000, Serge Hallyn wrote:
> seccomp_ctx is already a void*, so don't use 'scmp_filter_ctx *'
>
> Separately track the native arch from the arch a rule is aimed at.
>
> Clearly ignore irrelevant architectures (i.e. arm rules on x86)
>
> Don't try to load seccomp (and don't fail) if we are already
> seccomp-confined. Otherwise nested containers fail.
>
> Make it clear that the extra seccomp ctx is only for compat calls
> on 64-bit arch. (This will be extended to arm64 when libseccomp
> supports it). Power may will complicate this (if ever it is supported)
> and require a new rethink and rewrite.
>
> NOTE - currently when starting a 32-bit container on 64-bit host,
> rules pertaining to 32-bit syscalls (as opposed to once which have
> the same syscall #) appear to be ignored. I can reproduce that without
> lxc, so either there is a bug in seccomp or a fundamental
> misunderstanding in how I"m merging the contexts.
>
> Rereading the seccomp_rule_add manpage suggests that keeping the seccond
> seccomp context may not be necessary, but this is not something I care
> to test right now. If it's true, then the code could be simplified, and
> it may solve my concerns about power.
>
> With this patch I'm able to start nested containers (with seccomp
> policies defined) including 32-bit and 32-bit-in-64-bit.
>
> [ this patch does not yet add the default seccomp policy ]
>
> Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
Acked-by: Stéphane Graber <stgraber at ubuntu.com>
> ---
> src/lxc/conf.h | 2 +-
> src/lxc/seccomp.c | 287 ++++++++++++++++++++++++++++++++++++++++++++----------
> 2 files changed, 234 insertions(+), 55 deletions(-)
>
> diff --git a/src/lxc/conf.h b/src/lxc/conf.h
> index ace1da0..3527c44 100644
> --- a/src/lxc/conf.h
> +++ b/src/lxc/conf.h
> @@ -312,7 +312,7 @@ struct lxc_conf {
> int tmp_umount_proc;
> char *seccomp; // filename with the seccomp rules
> #if HAVE_SCMP_FILTER_CTX
> - scmp_filter_ctx *seccomp_ctx;
> + scmp_filter_ctx seccomp_ctx;
> #endif
> int maincmd_fd;
> int autodev; // if 1, mount and fill a /dev at start
> diff --git a/src/lxc/seccomp.c b/src/lxc/seccomp.c
> index fadc190..5df37bc 100644
> --- a/src/lxc/seccomp.c
> +++ b/src/lxc/seccomp.c
> @@ -27,6 +27,7 @@
> #include <seccomp.h>
> #include <errno.h>
> #include <seccomp.h>
> +#include <sys/utsname.h>
>
> #include "config.h"
> #include "lxcseccomp.h"
> @@ -112,6 +113,98 @@ static uint32_t get_and_clear_v2_action(char *line, uint32_t def_action)
> }
> #endif
>
> +#if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
> +enum lxc_hostarch_t {
> + lxc_seccomp_arch_all = 0,
> + lxc_seccomp_arch_native,
> + lxc_seccomp_arch_i386,
> + lxc_seccomp_arch_amd64,
> + lxc_seccomp_arch_arm,
> + lxc_seccomp_arch_unknown = 999,
> +};
> +
> +int get_hostarch(void)
> +{
> + struct utsname uts;
> + if (uname(&uts) < 0) {
> + SYSERROR("Failed to read host arch");
> + return -1;
> + }
> + if (strcmp(uts.machine, "i686") == 0)
> + return lxc_seccomp_arch_i386;
> + else if (strcmp(uts.machine, "x86_64") == 0)
> + return lxc_seccomp_arch_amd64;
> + else if (strncmp(uts.machine, "armv7", 5) == 0)
> + return lxc_seccomp_arch_arm;
> + return lxc_seccomp_arch_unknown;
> +}
> +
> +scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch, uint32_t default_policy_action)
> +{
> + scmp_filter_ctx ctx;
> + int ret;
> + uint32_t arch;
> +
> + switch(n_arch) {
> + case lxc_seccomp_arch_i386: arch = SCMP_ARCH_X86; break;
> + case lxc_seccomp_arch_amd64: arch = SCMP_ARCH_X86_64; break;
> + case lxc_seccomp_arch_arm: arch = SCMP_ARCH_ARM; break;
> + default: return NULL;
> + }
> +
> + if ((ctx = seccomp_init(default_policy_action)) == NULL) {
> + ERROR("Error initializing seccomp context");
> + return NULL;
> + }
> + if (seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0)) {
> + ERROR("failed to turn off n-new-privs");
> + seccomp_release(ctx);
> + return NULL;
> + }
> + ret = seccomp_arch_add(ctx, arch);
> + if (ret != 0) {
> + ERROR("Seccomp error %d (%s) adding arch: %d", ret,
> + strerror(ret), (int)n_arch);
> + seccomp_release(ctx);
> + return NULL;
> + }
> + if (seccomp_arch_remove(ctx, SCMP_ARCH_NATIVE) != 0) {
> + ERROR("Seccomp error removing native arch");
> + seccomp_release(ctx);
> + return NULL;
> + }
> +
> + return ctx;
> +}
> +
> +bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx,
> + uint32_t action)
> +{
> + int nr, ret;
> +
> + if (arch && !seccomp_arch_exist(ctx, arch)) {
> + ERROR("BUG: seccomp: rule and context arch do not match (arch %d)", arch);
> + return false;
> + }
> + nr = seccomp_syscall_resolve_name_arch(arch, line);
> + if (nr == __NR_SCMP_ERROR) {
> + WARN("Seccomp: failed to resolve syscall: %s", line);
> + WARN("This syscall will NOT be blacklisted");
> + return true;
> + }
> + if (nr < 0) {
> + WARN("Seccomp: got negative # for syscall: %s", line);
> + WARN("This syscall will NOT be blacklisted");
> + return true;
> + }
> + ret = seccomp_rule_add_exact(ctx, action, nr, 0);
> + if (ret < 0) {
> + ERROR("failed (%d) loading rule for %s (nr %d action %d)", ret, line, nr, action);
> + return false;
> + }
> + return true;
> +}
> +
> /*
> * v2 consists of
> * [x86]
> @@ -128,13 +221,14 @@ static uint32_t get_and_clear_v2_action(char *line, uint32_t def_action)
> */
> static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
> {
> -#if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH
> char *p;
> int ret;
> - scmp_filter_ctx *ctx = NULL;
> + scmp_filter_ctx compat_ctx = NULL;
> bool blacklist = false;
> uint32_t default_policy_action = -1, default_rule_action = -1, action;
> uint32_t arch = SCMP_ARCH_NATIVE;
> + enum lxc_hostarch_t native_arch = get_hostarch(),
> + cur_rule_arch = native_arch;
>
> if (strncmp(line, "blacklist", 9) == 0)
> blacklist = true;
> @@ -162,6 +256,14 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
> default_rule_action = SCMP_ACT_ALLOW;
> }
>
> + if (native_arch == lxc_seccomp_arch_amd64) {
> + cur_rule_arch = lxc_seccomp_arch_all;
> + compat_ctx = get_new_ctx(lxc_seccomp_arch_i386,
> + default_policy_action);
> + if (!compat_ctx)
> + goto bad;
> + }
> +
> if (default_policy_action != SCMP_ACT_KILL) {
> ret = seccomp_reset(conf->seccomp_ctx, default_policy_action);
> if (ret != 0) {
> @@ -175,7 +277,6 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
> }
>
> while (fgets(line, 1024, f)) {
> - int nr;
>
> if (line[0] == '#')
> continue;
> @@ -186,72 +287,112 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
> if (line[0] == '[') {
> // read the architecture for next set of rules
> if (strcmp(line, "[x86]") == 0 ||
> - strcmp(line, "[X86]") == 0)
> + strcmp(line, "[X86]") == 0) {
> + if (native_arch != lxc_seccomp_arch_i386 &&
> + native_arch != lxc_seccomp_arch_amd64) {
> + cur_rule_arch = lxc_seccomp_arch_unknown;
> + continue;
> + }
> + cur_rule_arch = lxc_seccomp_arch_i386;
> arch = SCMP_ARCH_X86;
> - else if (strcmp(line, "[X86_64]") == 0 ||
> - strcmp(line, "[x86_64]") == 0)
> + if (native_arch == lxc_seccomp_arch_amd64) {
> + if (compat_ctx)
> + continue;
> + compat_ctx = get_new_ctx(lxc_seccomp_arch_i386,
> + default_policy_action);
> + if (!compat_ctx)
> + goto bad;
> + }
> + } else if (strcmp(line, "[X86_64]") == 0 ||
> + strcmp(line, "[x86_64]") == 0) {
> + if (native_arch != lxc_seccomp_arch_amd64) {
> + cur_rule_arch = lxc_seccomp_arch_unknown;
> + continue;
> + }
> + cur_rule_arch = lxc_seccomp_arch_amd64;
> arch = SCMP_ARCH_X86_64;
> + } else if (strcmp(line, "[all]") == 0 ||
> + strcmp(line, "[ALL]") == 0) {
> + cur_rule_arch = lxc_seccomp_arch_all;
> + if (native_arch == lxc_seccomp_arch_amd64 && !compat_ctx) {
> + if (compat_ctx)
> + continue;
> + compat_ctx = get_new_ctx(lxc_seccomp_arch_i386,
> + default_policy_action);
> + if (!compat_ctx)
> + goto bad;
> + }
> + }
> #ifdef SCMP_ARCH_ARM
> else if (strcmp(line, "[arm]") == 0 ||
> - strcmp(line, "[ARM]") == 0)
> + strcmp(line, "[ARM]") == 0) {
> + if (native_arch != lxc_seccomp_arch_arm) {
> + cur_rule_arch = lxc_seccomp_arch_unknown;
> + continue;
> + }
> + cur_rule_arch = lxc_seccomp_arch_arm;
> arch = SCMP_ARCH_ARM;
> + }
> #endif
> else
> goto bad_arch;
> - if (ctx) {
> - ERROR("Only two arch sections per policy supported");
> - goto bad_arch;
> - }
> - if ((ctx = seccomp_init(default_policy_action)) == NULL) {
> - ERROR("Error initializing seccomp context");
> - return -1;
> - }
> - if (seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0)) {
> - ERROR("failed to turn off n-new-privs");
> - seccomp_release(ctx);
> - return -1;
> - }
> - ret = seccomp_arch_add(ctx, arch);
> - if (ret == -EEXIST) {
> - seccomp_release(ctx);
> - ctx = NULL;
> - continue;
> - }
> - if (ret != 0) {
> - ERROR("Error %d adding arch: %s", ret, line);
> - goto bad_arch;
> - }
> - if (seccomp_arch_remove(ctx, SCMP_ARCH_NATIVE) != 0) {
> - ERROR("Error removing native arch from %s", line);
> - goto bad_arch;
> - }
> +
> continue;
> }
>
> + /* irrelevant arch - i.e. arm on i386 */
> + if (cur_rule_arch == lxc_seccomp_arch_unknown)
> + continue;
> +
> + /* read optional action which follows the syscall */
> action = get_and_clear_v2_action(line, default_rule_action);
> if (action == -1) {
> ERROR("Failed to interpret action");
> goto bad_rule;
> }
> - nr = seccomp_syscall_resolve_name_arch(arch, line);
> - if (nr < 0) {
> - WARN("Seccomp: failed to resolve syscall: %s (returned %d)",
> - line, nr);
> - WARN("This syscall will NOT be blacklisted");
> - continue;
> +
> + if (cur_rule_arch == lxc_seccomp_arch_all)
> + arch = SCMP_ARCH_NATIVE;
> +
> + /*
> + * TODO generalize - if !is_compat_only(native_arch, cur_rule_arch)
> + */
> + if (!(cur_rule_arch == lxc_seccomp_arch_i386 &&
> + native_arch == lxc_seccomp_arch_amd64)) {
> + INFO("Adding non-compat rule for %s action %d", line, action);
> + if (!do_resolve_add_rule(arch, line, conf->seccomp_ctx, action))
> + goto bad_rule;
> }
> - ret = seccomp_rule_add(ctx ? ctx : conf->seccomp_ctx,
> - action, nr, 0);
> - if (ret < 0) {
> - ERROR("failed (%d) loading rule for %s", ret, line);
> - goto bad_rule;
> +
> + /*
> + * TODO generalize - if need_compat(native_arch, cur_rule_arch)
> + */
> + if (native_arch == lxc_seccomp_arch_amd64 &&
> + cur_rule_arch != lxc_seccomp_arch_amd64) {
> + int nr1, nr2;
> + INFO("Adding compat rule for %s action %d", line, action);
> + nr1 = seccomp_syscall_resolve_name_arch(lxc_seccomp_arch_amd64, line);
> + nr2 = seccomp_syscall_resolve_name_arch(lxc_seccomp_arch_i386, line);
> + if (nr1 == nr2) {
> + /* sigh - if the syscall # is the same for 32- and 64-bit, then
> + * we get EFAULT if we try to aplly to compat_ctx. So apply to
> + * the noncompat ctx. We may already have done so, but that's ok
> + */
> + if (!do_resolve_add_rule(arch, line, conf->seccomp_ctx, action))
> + goto bad_rule;
> + continue;
> + }
> + if (!do_resolve_add_rule(SCMP_ARCH_X86, line,
> + compat_ctx, action))
> + goto bad_rule;
> }
> }
> - if (ctx) {
> - if (seccomp_merge(conf->seccomp_ctx, ctx) != 0) {
> - seccomp_release(ctx);
> - ERROR("Error merging seccomp contexts");
> - return -1;
> +
> + if (compat_ctx) {
> + INFO("Merging in the compat seccomp ctx into the main one");
> + if (seccomp_merge(conf->seccomp_ctx, compat_ctx) != 0) {
> + ERROR("Error merging i386 seccomp contexts");
> + goto bad;
> }
> }
> return 0;
> @@ -259,13 +400,17 @@ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
> bad_arch:
> ERROR("Unsupported arch: %s", line);
> bad_rule:
> - if (ctx)
> - seccomp_release(ctx);
> +bad:
> + if (compat_ctx)
> + seccomp_release(compat_ctx);
> return -1;
> -#else
> +}
> +#else /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
> +static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf)
> +{
> return -1;
> -#endif
> }
> +#endif /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */
>
> /*
> * The first line of the config file has a policy language version
> @@ -304,6 +449,32 @@ static int parse_config(FILE *f, struct lxc_conf *conf)
> return parse_config_v2(f, line, conf);
> }
>
> +static bool seccomp_already_confined(void)
> +{
> + FILE *f = fopen("/proc/self/status", "r");
> + char line[1024];
> + bool bret = false;
> + int ret, v;
> +
> + if (!f)
> + return false;
> +
> + while (fgets(line, 1024, f)) {
> + if (strncmp(line, "Seccomp:", 8) == 0) {
> + ret = sscanf(line+8, "%d", &v);
> + if (ret != 1)
> + continue;
> + if (v != 0)
> + bret = true;
> + goto out;
> + }
> + }
> +
> +out:
> + fclose(f);
> + return bret;
> +}
> +
> int lxc_read_seccomp_config(struct lxc_conf *conf)
> {
> FILE *f;
> @@ -312,6 +483,10 @@ int lxc_read_seccomp_config(struct lxc_conf *conf)
> if (!conf->seccomp)
> return 0;
>
> + if (seccomp_already_confined()) {
> + INFO("Already confined by seccomp; not loading a new policy");
> + return 0;
> + }
> #if HAVE_SCMP_FILTER_CTX
> /* XXX for debug, pass in SCMP_ACT_TRAP */
> conf->seccomp_ctx = seccomp_init(SCMP_ACT_KILL);
> @@ -350,6 +525,10 @@ int lxc_seccomp_load(struct lxc_conf *conf)
> int ret;
> if (!conf->seccomp)
> return 0;
> + if (seccomp_already_confined()) {
> + INFO("Already confined by seccomp; not loading a new policy");
> + return 0;
> + }
> ret = seccomp_load(
> #if HAVE_SCMP_FILTER_CTX
> conf->seccomp_ctx
> --
> 2.0.0
>
> _______________________________________________
> lxc-devel mailing list
> lxc-devel at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel
--
Stéphane Graber
Ubuntu developer
http://www.ubuntu.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: Digital signature
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20140620/41284760/attachment.sig>
More information about the lxc-devel
mailing list