[lxc-devel] [lxd/master] apparmor: create an apparmor namespace for each container
tych0 on Github
lxc-bot at linuxcontainers.org
Mon Sep 26 22:36:30 UTC 2016
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 361 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20160926/7498149e/attachment.bin>
-------------- next part --------------
From a00486e1e740de120edf734c3f9bb4f6a9452040 Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho.andersen at canonical.com>
Date: Fri, 17 Jun 2016 17:51:17 +0000
Subject: [PATCH] apparmor: create an apparmor namespace for each container
Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
lxd/apparmor.go | 101 ++++++++++++++++++++++++++++++++++++++++++++-------
lxd/container_lxc.go | 26 +++++++++++--
lxd/daemon.go | 43 ++++++++++++++++++++++
test/suites/basic.sh | 13 +++++--
4 files changed, 163 insertions(+), 20 deletions(-)
diff --git a/lxd/apparmor.go b/lxd/apparmor.go
index b4d73f3..1adeadd 100644
--- a/lxd/apparmor.go
+++ b/lxd/apparmor.go
@@ -94,7 +94,7 @@ const AA_PROFILE_BASE = `
mount fstype=sysfs -> /sys/,
mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
deny /sys/firmware/efi/efivars/** rwklx,
- deny /sys/kernel/security/** rwklx,
+ # note, /sys/kernel/security/** handled below
mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/,
mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/,
@@ -186,7 +186,7 @@ const AA_PROFILE_BASE = `
deny /proc/sys/n[^e]*{,/**} wklx,
deny /proc/sys/ne[^t]*{,/**} wklx,
deny /proc/sys/net?*{,/**} wklx,
- deny /sys/[^fdc]*{,/**} wklx,
+ deny /sys/[^fdck]*{,/**} wklx,
deny /sys/c[^l]*{,/**} wklx,
deny /sys/cl[^a]*{,/**} wklx,
deny /sys/cla[^s]*{,/**} wklx,
@@ -250,14 +250,28 @@ const AA_PROFILE_NESTING = `
mount options=bind /var/lib/lxd/shmounts/** -> /var/lib/lxd/**,
`
-func AAProfileFull(c container) string {
- lxddir := shared.VarPath("")
- if len(c.Name())+len(lxddir)+7 >= 253 {
+func mkApparmorName(name string) string {
+ if len(name)+7 >= 253 {
hash := sha256.New()
- io.WriteString(hash, lxddir)
- lxddir = fmt.Sprintf("%x", hash.Sum(nil))
+ io.WriteString(hash, name)
+ return fmt.Sprintf("%x", hash.Sum(nil))
}
+ return name
+}
+
+func AANamespace(c container) string {
+ /* / is not allowed in apparmor namespace names; let's also trim the
+ * leading / so it doesn't look like "-var-lib-lxd"
+ */
+ lxddir := strings.Replace(strings.Trim(shared.VarPath(""), "/"), "/", "-", -1)
+ lxddir = mkApparmorName(lxddir)
+ return fmt.Sprintf("lxd-%s_<%s>", c.Name(), lxddir)
+}
+
+func AAProfileFull(c container) string {
+ lxddir := shared.VarPath("")
+ lxddir = mkApparmorName(lxddir)
return fmt.Sprintf("lxd-%s_<%s>", c.Name(), lxddir)
}
@@ -289,11 +303,50 @@ func getAAProfileContent(c container) string {
profile += " mount fstype=cgroup -> /sys/fs/cgroup/**,\n"
}
- // Apply nesting bits
+ if aaStacking {
+ profile += "\n ### Feature: apparmor stacking\n"
+
+ if c.IsPrivileged() {
+ profile += "\n ### Configuration: apparmor loading disabled in privileged containers\n"
+ profile += " deny /sys/k*{,/**} rwklx,\n"
+ } else {
+ profile += ` ### Configuration: apparmor loading in unprivileged containers
+ deny /sys/k[^e]*{,/**} wklx,
+ deny /sys/ke[^r]*{,/**} wklx,
+ deny /sys/ker[^n]*{,/**} wklx,
+ deny /sys/kern[^e]*{,/**} wklx,
+ deny /sys/kerne[^l]*{,/**} wklx,
+ deny /sys/kernel/[^s]*{,/**} wklx,
+ deny /sys/kernel/s[^e]*{,/**} wklx,
+ deny /sys/kernel/se[^c]*{,/**} wklx,
+ deny /sys/kernel/sec[^u]*{,/**} wklx,
+ deny /sys/kernel/secu[^r]*{,/**} wklx,
+ deny /sys/kernel/secur[^i]*{,/**} wklx,
+ deny /sys/kernel/securi[^t]*{,/**} wklx,
+ deny /sys/kernel/securit[^y]*{,/**} wklx,
+ deny /sys/kernel/security/[^a]*{,/**} wklx,
+ deny /sys/kernel/security/a[^p]*{,/**} wklx,
+ deny /sys/kernel/security/ap[^p]*{,/**} wklx,
+ deny /sys/kernel/security/app[^a]*{,/**} wklx,
+ deny /sys/kernel/security/appa[^r]*{,/**} wklx,
+ deny /sys/kernel/security/appar[^m]*{,/**} wklx,
+ deny /sys/kernel/security/apparm[^o]*{,/**} wklx,
+ deny /sys/kernel/security/apparmo[^r]*{,/**} wklx,
+ deny /sys/kernel/security/apparmor?*{,/**} wklx,
+ deny /sys/kernel/security?*{,/**} wklx,
+ deny /sys/kernel?*{,/**} wklx,
+`
+ profile += fmt.Sprintf(" change_profile -> \":%s://*\",\n", AANamespace(c))
+ }
+ }
+
if c.IsNesting() {
+ // Apply nesting bits
profile += "\n ### Configuration: nesting\n"
profile += strings.TrimLeft(AA_PROFILE_NESTING, "\n")
- profile += fmt.Sprintf(" change_profile -> \"%s\",\n", AAProfileFull(c))
+ if !aaStacking || c.IsPrivileged() {
+ profile += fmt.Sprintf(" change_profile -> \"%s\",\n", AAProfileFull(c))
+ }
}
// Append raw.apparmor
@@ -317,12 +370,13 @@ func runApparmor(command string, c container) error {
return nil
}
- cmd := exec.Command("apparmor_parser", []string{
+ args := []string{
fmt.Sprintf("-%sWL", command),
path.Join(aaPath, "cache"),
path.Join(aaPath, "profiles", AAProfileShort(c)),
- }...)
+ }
+ cmd := exec.Command("apparmor_parser", args...)
output, err := cmd.CombinedOutput()
if err != nil {
shared.LogError("Running apparmor",
@@ -332,6 +386,14 @@ func runApparmor(command string, c container) error {
return err
}
+func mkApparmorNamespace(namespace string) error {
+ if !aaStacking {
+ return nil
+ }
+
+ return os.Mkdir(path.Join("/sys/kernel/security/apparmor/policy/namespaces", namespace), 0755)
+}
+
// Ensure that the container's policy is loaded into the kernel so the
// container can boot.
func AALoadProfile(c container) error {
@@ -339,6 +401,10 @@ func AALoadProfile(c container) error {
return nil
}
+ if err := mkApparmorNamespace(AANamespace(c)); err != nil {
+ return err
+ }
+
/* In order to avoid forcing a profile parse (potentially slow) on
* every container start, let's use apparmor's binary policy cache,
* which checks mtime of the files to figure out if the policy needs to
@@ -375,13 +441,20 @@ func AALoadProfile(c container) error {
return runApparmor(APPARMOR_CMD_LOAD, c)
}
-// Ensure that the container's policy is unloaded to free kernel memory. This
-// does not delete the policy from disk or cache.
-func AAUnloadProfile(c container) error {
+// Ensure that the container's policy namespace is unloaded to free kernel
+// memory. This does not delete the policy from disk or cache.
+func AADestroy(c container) error {
if !aaAdmin {
return nil
}
+ if aaStacking {
+ p := path.Join("/sys/kernel/security/apparmor/policy/namespaces", AANamespace(c))
+ if err := os.Remove(p); err != nil {
+ shared.LogError("error removing apparmor namespace", log.Ctx{"err": err, "ns": p})
+ }
+ }
+
return runApparmor(APPARMOR_CMD_UNLOAD, c)
}
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 99282dc..9159511 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -388,7 +388,12 @@ func (c *containerLXC) initLXC() error {
}
// Base config
- err = lxcSetConfigItem(cc, "lxc.cap.drop", "mac_admin mac_override sys_time sys_module sys_rawio")
+ toDrop := "sys_time sys_module sys_rawio"
+ if !aaStacking || c.IsPrivileged() {
+ toDrop = toDrop + " mac_admin mac_override"
+ }
+
+ err = lxcSetConfigItem(cc, "lxc.cap.drop", toDrop)
if err != nil {
return err
}
@@ -587,7 +592,20 @@ func (c *containerLXC) initLXC() error {
}
} else {
// If not currently confined, use the container's profile
- err := lxcSetConfigItem(cc, "lxc.aa_profile", AAProfileFull(c))
+ profile := AAProfileFull(c)
+
+ /* In the nesting case, we want to enable the inside
+ * LXD to load its profile. Unprivileged containers can
+ * load profiles, but privileged containers cannot, so
+ * let's not use a namespace so they can fall back to
+ * the old way of nesting, i.e. using the parent's
+ * profile.
+ */
+ if aaStacking && (!c.IsNesting() || !c.IsPrivileged()) {
+ profile = fmt.Sprintf("%s//&:%s:", profile, AANamespace(c))
+ }
+
+ err := lxcSetConfigItem(cc, "lxc.aa_profile", profile)
if err != nil {
return err
}
@@ -1702,7 +1720,9 @@ func (c *containerLXC) OnStop(target string) error {
}
// Unload the apparmor profile
- AAUnloadProfile(c)
+ if err := AADestroy(c); err != nil {
+ shared.LogError("failed to destroy apparmor namespace", log.Ctx{"container": c.Name(), "err": err})
+ }
// FIXME: The go routine can go away once we can rely on LXC_TARGET
go func(c *containerLXC, target string, op *lxcContainerOperation) {
diff --git a/lxd/daemon.go b/lxd/daemon.go
index 984d2e0..586c432 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -40,6 +40,7 @@ import (
var aaAdmin = true
var aaAvailable = true
var aaConfined = false
+var aaStacking = false
// CGroup
var cgBlkioController = false
@@ -626,6 +627,48 @@ func (d *Daemon) Init() error {
}
}
+ if aaAvailable {
+ canStack := func() bool {
+ contentBytes, err := ioutil.ReadFile("/sys/kernel/security/apparmor/features/domain/stack")
+ if err != nil {
+ return false
+ }
+
+ if string(contentBytes) != "yes\n" {
+ return false
+ }
+
+ contentBytes, err = ioutil.ReadFile("/sys/kernel/security/apparmor/features/domain/version")
+ if err != nil {
+ return false
+ }
+
+ content := string(contentBytes)
+
+ parts := strings.Split(strings.TrimSpace(content), ".")
+ if len(parts) != 2 {
+ shared.LogWarn("unknown apparmor domain version", log.Ctx{"version": content})
+ return false
+ }
+
+ major, err := strconv.Atoi(parts[0])
+ if err != nil {
+ shared.LogWarn("unknown apparmor domain version", log.Ctx{"version": content})
+ return false
+ }
+
+ minor, err := strconv.Atoi(parts[1])
+ if err != nil {
+ shared.LogWarn("unknown apparmor domain version", log.Ctx{"version": content})
+ return false
+ }
+
+ return major >= 1 && minor >= 1
+ }
+
+ aaStacking = canStack()
+ }
+
/* Detect CGroup support */
cgBlkioController = shared.PathExists("/sys/fs/cgroup/blkio/")
if !cgBlkioController {
diff --git a/test/suites/basic.sh b/test/suites/basic.sh
index 509260a..5bcfdc9 100644
--- a/test/suites/basic.sh
+++ b/test/suites/basic.sh
@@ -313,9 +313,16 @@ test_basic_usage() {
# check that an apparmor profile is created for this container, that it is
# unloaded on stop, and that it is deleted when the container is deleted
lxc launch testimage lxd-apparmor-test
- aa-status | grep "lxd-lxd-apparmor-test_<${LXD_DIR}>"
- lxc stop lxd-apparmor-test --force
- ! aa-status | grep -q "lxd-lxd-apparmor-test_<${LXD_DIR}>"
+ if [ -f /sys/kernel/security/apparmor/features/domain/version ]; then
+ aa_namespace="lxd-lxd-apparmor-test_<$(echo "${LXD_DIR}" | sed -e 's/\//-/g' -e 's/^.//')>"
+ aa-status | grep ":${aa_namespace}://unconfined"
+ lxc stop lxd-apparmor-test --force
+ ! aa-status | grep -q ":${aa_namespace}:"
+ else
+ aa-status | grep "lxd-lxd-apparmor-test_<${LXD_DIR}>"
+ lxc stop lxd-apparmor-test --force
+ ! aa-status | grep -q "lxd-lxd-apparmor-test_<${LXD_DIR}>"
+ fi
lxc delete lxd-apparmor-test
[ ! -f "${LXD_DIR}/security/apparmor/profiles/lxd-lxd-apparmor-test" ]
More information about the lxc-devel
mailing list