[lxc-devel] [lxd/master] add some seccomp knobs
tych0 on Github
lxc-bot at linuxcontainers.org
Mon May 23 17:00:22 UTC 2016
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 548 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20160523/0a5d1650/attachment.bin>
-------------- next part --------------
From 5b04f732976ff84ea54ee828274ef88aad066796 Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho.andersen at canonical.com>
Date: Mon, 23 May 2016 09:40:06 -0600
Subject: [PATCH] add some seccomp knobs
In particular,
* make the default seccomp config optional
* add a knob to block x32 syscalls on amd64 (enabled by default)
* add a way for users to inject their own seccomp stuff
Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
doc/configuration.md | 3 ++
lxd/container.go | 6 ++++
lxd/container_lxc.go | 10 +++---
lxd/seccomp.go | 92 +++++++++++++++++++++++++++++++++++++++++++++++++---
test/suites/basic.sh | 11 +++++++
5 files changed, 114 insertions(+), 8 deletions(-)
diff --git a/doc/configuration.md b/doc/configuration.md
index 9c94db7..d4a1afb 100644
--- a/doc/configuration.md
+++ b/doc/configuration.md
@@ -83,9 +83,12 @@ limits.processes | integer | - (max) | yes | Maximu
linux.kernel\_modules | string | - | yes | Comma separated list of kernel modules to load before starting the container
raw.apparmor | blob | - | yes | Apparmor profile entries to be appended to the generated profile
raw.lxc | blob | - | no | Raw LXC configuration to be appended to the generated one
+raw.seccomp | blob | - | no | Raw LXC seccomp profile string to append to the generated one
security.nesting | boolean | false | yes | Support running lxd (nested) inside the container
security.privileged | boolean | false | no | Runs the container in privileged mode
user.\* | string | - | n/a | Free form user key/value storage (can be used in search)
+security.syscalls.default | boolean | true | no | Enables the default syscall blacklist
+security.syscalls.compat | boolean | true | no | On x86\_64 this enables blocking of compat\_\* syscalls, it is a no-op on other arches
The following volatile keys are currently internally used by LXD:
diff --git a/lxd/container.go b/lxd/container.go
index 9c196ca..7da08ec 100644
--- a/lxd/container.go
+++ b/lxd/container.go
@@ -109,10 +109,16 @@ func containerValidConfigKey(key string, value string) error {
return isBool(key, value)
case "security.nesting":
return isBool(key, value)
+ case "security.syscalls.default":
+ return isBool(key, value)
+ case "security.syscalls.compat":
+ return isBool(key, value)
case "raw.apparmor":
return nil
case "raw.lxc":
return lxcValidConfig(value)
+ case "raw.seccomp":
+ return nil
case "volatile.apply_template":
return nil
case "volatile.base_image":
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 6aa3510..0cccd49 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -466,10 +466,12 @@ func (c *containerLXC) initLXC() error {
}
}
- // Setup Seccomp
- err = lxcSetConfigItem(cc, "lxc.seccomp", SeccompProfilePath(c))
- if err != nil {
- return err
+ // Setup Seccomp if necessary
+ if ContainerNeedsSeccomp(c) {
+ err = lxcSetConfigItem(cc, "lxc.seccomp", SeccompProfilePath(c))
+ if err != nil {
+ return err
+ }
}
// Setup idmap
diff --git a/lxd/seccomp.go b/lxd/seccomp.go
index 11817a9..9e4a18d 100644
--- a/lxd/seccomp.go
+++ b/lxd/seccomp.go
@@ -8,9 +8,12 @@ import (
"github.com/lxc/lxd/shared"
)
-const DEFAULT_SECCOMP_POLICY = `
+const SECCOMP_HEADER = `
2
blacklist
+`
+
+const DEFAULT_SECCOMP_POLICY = `
reject_force_umount # comment this to allow umount -f; not recommended
[all]
kexec_load errno 1
@@ -19,16 +22,93 @@ init_module errno 1
finit_module errno 1
delete_module errno 1
`
-
+const COMPAT_BLOCKING_POLICY = `
+[x86_64]
+compat_sys_rt_sigaction
+stub_x32_rt_sigreturn
+compat_sys_ioctl
+compat_sys_readv
+compat_sys_writev
+compat_sys_recvfrom
+compat_sys_sendmsg
+compat_sys_recvmsg
+stub_x32_execve
+compat_sys_ptrace
+compat_sys_rt_sigpending
+compat_sys_rt_sigtimedwait
+compat_sys_rt_sigqueueinfo
+compat_sys_sigaltstack
+compat_sys_timer_create
+compat_sys_mq_notify
+compat_sys_kexec_load
+compat_sys_waitid
+compat_sys_set_robust_list
+compat_sys_get_robust_list
+compat_sys_vmsplice
+compat_sys_move_pages
+compat_sys_preadv64
+compat_sys_pwritev64
+compat_sys_rt_tgsigqueueinfo
+compat_sys_recvmmsg
+compat_sys_sendmmsg
+compat_sys_process_vm_readv
+compat_sys_process_vm_writev
+compat_sys_setsockopt
+compat_sys_getsockopt
+compat_sys_io_setup
+compat_sys_io_submit
+stub_x32_execveat
+`
var seccompPath = shared.VarPath("security", "seccomp")
func SeccompProfilePath(c container) string {
return path.Join(seccompPath, c.Name())
}
+func ContainerNeedsSeccomp(c container) bool {
+ config := c.ExpandedConfig()
+
+ /* these are enabled by default, so if the keys aren't present, that
+ * means "true"
+ */
+ default_, ok := config["security.syscalls.default"]
+ if !ok || shared.IsTrue(default_) {
+ return true
+ }
+
+ compat, ok := config["security.syscalls.compat"]
+ if !ok || shared.IsTrue(compat) {
+ return true
+ }
+
+ raw := config["raw.seccomp"]
+ if raw != "" {
+ return true
+ }
+
+ return false
+}
+
func getSeccompProfileContent(c container) string {
- /* for now there are no seccomp knobs. */
- return DEFAULT_SECCOMP_POLICY
+ config := c.ExpandedConfig()
+ policy := SECCOMP_HEADER
+
+ default_, ok := config["security.syscalls.default"]
+ if !ok || shared.IsTrue(default_) {
+ policy += DEFAULT_SECCOMP_POLICY
+ }
+
+ compat, ok := config["security.syscalls.compat"]
+ if !ok || shared.IsTrue(compat) {
+ policy += COMPAT_BLOCKING_POLICY
+ }
+
+ raw := config["raw.seccomp"]
+ if raw != "" {
+ policy += raw
+ }
+
+ return policy
}
func SeccompCreateProfile(c container) error {
@@ -38,6 +118,10 @@ func SeccompCreateProfile(c container) error {
* the mtime on the file for any compiler purpose, so let's just write
* out the profile.
*/
+ if !ContainerNeedsSeccomp(c) {
+ return nil
+ }
+
profile := getSeccompProfileContent(c)
if err := os.MkdirAll(seccompPath, 0700); err != nil {
return err
diff --git a/test/suites/basic.sh b/test/suites/basic.sh
index b2f3eef..df52e82 100644
--- a/test/suites/basic.sh
+++ b/test/suites/basic.sh
@@ -250,6 +250,17 @@ test_basic_usage() {
lxc delete lxd-apparmor-test
[ ! -f "${LXD_DIR}/security/apparmor/profiles/lxd-lxd-apparmor-test" ]
+ lxc launch testimage lxd-seccomp-test
+ init=$(lxc info lxd-seccomp-test | grep Pid | cut -f2 -d" ")
+ [ "$(grep Seccomp /proc/${init}/status | cut -f2)" -eq "2" ]
+ lxc stop --force lxd-seccomp-test
+ lxc config set testimage security.syscalls.default false
+ lxc config set testimage security.syscalls.compat false
+ lxc start lxd-seccomp-test
+ init=$(lxc info lxd-seccomp-test | grep Pid | cut -f2 -d" ")
+ [ "$(grep Seccomp /proc/${init}/status | cut -f2)" -eq "0" ]
+ lxc stop --force lxd-seccomp-test
+
# make sure that privileged containers are not world-readable
lxc profile create unconfined
lxc profile set unconfined security.privileged true
More information about the lxc-devel
mailing list