[lxc-devel] [lxd/master] apparmor: create an apparmor namespace for each container

tych0 on Github lxc-bot at linuxcontainers.org
Mon Sep 26 22:36:30 UTC 2016


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 361 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20160926/7498149e/attachment.bin>
-------------- next part --------------
From a00486e1e740de120edf734c3f9bb4f6a9452040 Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho.andersen at canonical.com>
Date: Fri, 17 Jun 2016 17:51:17 +0000
Subject: [PATCH] apparmor: create an apparmor namespace for each container

Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
 lxd/apparmor.go      | 101 ++++++++++++++++++++++++++++++++++++++++++++-------
 lxd/container_lxc.go |  26 +++++++++++--
 lxd/daemon.go        |  43 ++++++++++++++++++++++
 test/suites/basic.sh |  13 +++++--
 4 files changed, 163 insertions(+), 20 deletions(-)

diff --git a/lxd/apparmor.go b/lxd/apparmor.go
index b4d73f3..1adeadd 100644
--- a/lxd/apparmor.go
+++ b/lxd/apparmor.go
@@ -94,7 +94,7 @@ const AA_PROFILE_BASE = `
   mount fstype=sysfs -> /sys/,
   mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
   deny /sys/firmware/efi/efivars/** rwklx,
-  deny /sys/kernel/security/** rwklx,
+  # note, /sys/kernel/security/** handled below
   mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/,
   mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/,
 
@@ -186,7 +186,7 @@ const AA_PROFILE_BASE = `
   deny /proc/sys/n[^e]*{,/**} wklx,
   deny /proc/sys/ne[^t]*{,/**} wklx,
   deny /proc/sys/net?*{,/**} wklx,
-  deny /sys/[^fdc]*{,/**} wklx,
+  deny /sys/[^fdck]*{,/**} wklx,
   deny /sys/c[^l]*{,/**} wklx,
   deny /sys/cl[^a]*{,/**} wklx,
   deny /sys/cla[^s]*{,/**} wklx,
@@ -250,14 +250,28 @@ const AA_PROFILE_NESTING = `
   mount options=bind /var/lib/lxd/shmounts/** -> /var/lib/lxd/**,
 `
 
-func AAProfileFull(c container) string {
-	lxddir := shared.VarPath("")
-	if len(c.Name())+len(lxddir)+7 >= 253 {
+func mkApparmorName(name string) string {
+	if len(name)+7 >= 253 {
 		hash := sha256.New()
-		io.WriteString(hash, lxddir)
-		lxddir = fmt.Sprintf("%x", hash.Sum(nil))
+		io.WriteString(hash, name)
+		return fmt.Sprintf("%x", hash.Sum(nil))
 	}
 
+	return name
+}
+
+func AANamespace(c container) string {
+	/* / is not allowed in apparmor namespace names; let's also trim the
+	 * leading / so it doesn't look like "-var-lib-lxd"
+	 */
+	lxddir := strings.Replace(strings.Trim(shared.VarPath(""), "/"), "/", "-", -1)
+	lxddir = mkApparmorName(lxddir)
+	return fmt.Sprintf("lxd-%s_<%s>", c.Name(), lxddir)
+}
+
+func AAProfileFull(c container) string {
+	lxddir := shared.VarPath("")
+	lxddir = mkApparmorName(lxddir)
 	return fmt.Sprintf("lxd-%s_<%s>", c.Name(), lxddir)
 }
 
@@ -289,11 +303,50 @@ func getAAProfileContent(c container) string {
 		profile += "  mount fstype=cgroup -> /sys/fs/cgroup/**,\n"
 	}
 
-	// Apply nesting bits
+	if aaStacking {
+		profile += "\n  ### Feature: apparmor stacking\n"
+
+		if c.IsPrivileged() {
+			profile += "\n  ### Configuration: apparmor loading disabled in privileged containers\n"
+			profile += "  deny /sys/k*{,/**} rwklx,\n"
+		} else {
+			profile += `  ### Configuration: apparmor loading in unprivileged containers
+  deny /sys/k[^e]*{,/**} wklx,
+  deny /sys/ke[^r]*{,/**} wklx,
+  deny /sys/ker[^n]*{,/**} wklx,
+  deny /sys/kern[^e]*{,/**} wklx,
+  deny /sys/kerne[^l]*{,/**} wklx,
+  deny /sys/kernel/[^s]*{,/**} wklx,
+  deny /sys/kernel/s[^e]*{,/**} wklx,
+  deny /sys/kernel/se[^c]*{,/**} wklx,
+  deny /sys/kernel/sec[^u]*{,/**} wklx,
+  deny /sys/kernel/secu[^r]*{,/**} wklx,
+  deny /sys/kernel/secur[^i]*{,/**} wklx,
+  deny /sys/kernel/securi[^t]*{,/**} wklx,
+  deny /sys/kernel/securit[^y]*{,/**} wklx,
+  deny /sys/kernel/security/[^a]*{,/**} wklx,
+  deny /sys/kernel/security/a[^p]*{,/**} wklx,
+  deny /sys/kernel/security/ap[^p]*{,/**} wklx,
+  deny /sys/kernel/security/app[^a]*{,/**} wklx,
+  deny /sys/kernel/security/appa[^r]*{,/**} wklx,
+  deny /sys/kernel/security/appar[^m]*{,/**} wklx,
+  deny /sys/kernel/security/apparm[^o]*{,/**} wklx,
+  deny /sys/kernel/security/apparmo[^r]*{,/**} wklx,
+  deny /sys/kernel/security/apparmor?*{,/**} wklx,
+  deny /sys/kernel/security?*{,/**} wklx,
+  deny /sys/kernel?*{,/**} wklx,
+`
+			profile += fmt.Sprintf("  change_profile -> \":%s://*\",\n", AANamespace(c))
+		}
+	}
+
 	if c.IsNesting() {
+		// Apply nesting bits
 		profile += "\n  ### Configuration: nesting\n"
 		profile += strings.TrimLeft(AA_PROFILE_NESTING, "\n")
-		profile += fmt.Sprintf("  change_profile -> \"%s\",\n", AAProfileFull(c))
+		if !aaStacking || c.IsPrivileged() {
+			profile += fmt.Sprintf("  change_profile -> \"%s\",\n", AAProfileFull(c))
+		}
 	}
 
 	// Append raw.apparmor
@@ -317,12 +370,13 @@ func runApparmor(command string, c container) error {
 		return nil
 	}
 
-	cmd := exec.Command("apparmor_parser", []string{
+	args := []string{
 		fmt.Sprintf("-%sWL", command),
 		path.Join(aaPath, "cache"),
 		path.Join(aaPath, "profiles", AAProfileShort(c)),
-	}...)
+	}
 
+	cmd := exec.Command("apparmor_parser", args...)
 	output, err := cmd.CombinedOutput()
 	if err != nil {
 		shared.LogError("Running apparmor",
@@ -332,6 +386,14 @@ func runApparmor(command string, c container) error {
 	return err
 }
 
+func mkApparmorNamespace(namespace string) error {
+	if !aaStacking {
+		return nil
+	}
+
+	return os.Mkdir(path.Join("/sys/kernel/security/apparmor/policy/namespaces", namespace), 0755)
+}
+
 // Ensure that the container's policy is loaded into the kernel so the
 // container can boot.
 func AALoadProfile(c container) error {
@@ -339,6 +401,10 @@ func AALoadProfile(c container) error {
 		return nil
 	}
 
+	if err := mkApparmorNamespace(AANamespace(c)); err != nil {
+		return err
+	}
+
 	/* In order to avoid forcing a profile parse (potentially slow) on
 	 * every container start, let's use apparmor's binary policy cache,
 	 * which checks mtime of the files to figure out if the policy needs to
@@ -375,13 +441,20 @@ func AALoadProfile(c container) error {
 	return runApparmor(APPARMOR_CMD_LOAD, c)
 }
 
-// Ensure that the container's policy is unloaded to free kernel memory. This
-// does not delete the policy from disk or cache.
-func AAUnloadProfile(c container) error {
+// Ensure that the container's policy namespace is unloaded to free kernel
+// memory. This does not delete the policy from disk or cache.
+func AADestroy(c container) error {
 	if !aaAdmin {
 		return nil
 	}
 
+	if aaStacking {
+		p := path.Join("/sys/kernel/security/apparmor/policy/namespaces", AANamespace(c))
+		if err := os.Remove(p); err != nil {
+			shared.LogError("error removing apparmor namespace", log.Ctx{"err": err, "ns": p})
+		}
+	}
+
 	return runApparmor(APPARMOR_CMD_UNLOAD, c)
 }
 
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 99282dc..9159511 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -388,7 +388,12 @@ func (c *containerLXC) initLXC() error {
 	}
 
 	// Base config
-	err = lxcSetConfigItem(cc, "lxc.cap.drop", "mac_admin mac_override sys_time sys_module sys_rawio")
+	toDrop := "sys_time sys_module sys_rawio"
+	if !aaStacking || c.IsPrivileged() {
+		toDrop = toDrop + " mac_admin mac_override"
+	}
+
+	err = lxcSetConfigItem(cc, "lxc.cap.drop", toDrop)
 	if err != nil {
 		return err
 	}
@@ -587,7 +592,20 @@ func (c *containerLXC) initLXC() error {
 			}
 		} else {
 			// If not currently confined, use the container's profile
-			err := lxcSetConfigItem(cc, "lxc.aa_profile", AAProfileFull(c))
+			profile := AAProfileFull(c)
+
+			/* In the nesting case, we want to enable the inside
+			 * LXD to load its profile. Unprivileged containers can
+			 * load profiles, but privileged containers cannot, so
+			 * let's not use a namespace so they can fall back to
+			 * the old way of nesting, i.e. using the parent's
+			 * profile.
+			 */
+			if aaStacking && (!c.IsNesting() || !c.IsPrivileged()) {
+				profile = fmt.Sprintf("%s//&:%s:", profile, AANamespace(c))
+			}
+
+			err := lxcSetConfigItem(cc, "lxc.aa_profile", profile)
 			if err != nil {
 				return err
 			}
@@ -1702,7 +1720,9 @@ func (c *containerLXC) OnStop(target string) error {
 	}
 
 	// Unload the apparmor profile
-	AAUnloadProfile(c)
+	if err := AADestroy(c); err != nil {
+		shared.LogError("failed to destroy apparmor namespace", log.Ctx{"container": c.Name(), "err": err})
+	}
 
 	// FIXME: The go routine can go away once we can rely on LXC_TARGET
 	go func(c *containerLXC, target string, op *lxcContainerOperation) {
diff --git a/lxd/daemon.go b/lxd/daemon.go
index 984d2e0..586c432 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -40,6 +40,7 @@ import (
 var aaAdmin = true
 var aaAvailable = true
 var aaConfined = false
+var aaStacking = false
 
 // CGroup
 var cgBlkioController = false
@@ -626,6 +627,48 @@ func (d *Daemon) Init() error {
 		}
 	}
 
+	if aaAvailable {
+		canStack := func() bool {
+			contentBytes, err := ioutil.ReadFile("/sys/kernel/security/apparmor/features/domain/stack")
+			if err != nil {
+				return false
+			}
+
+			if string(contentBytes) != "yes\n" {
+				return false
+			}
+
+			contentBytes, err = ioutil.ReadFile("/sys/kernel/security/apparmor/features/domain/version")
+			if err != nil {
+				return false
+			}
+
+			content := string(contentBytes)
+
+			parts := strings.Split(strings.TrimSpace(content), ".")
+			if len(parts) != 2 {
+				shared.LogWarn("unknown apparmor domain version", log.Ctx{"version": content})
+				return false
+			}
+
+			major, err := strconv.Atoi(parts[0])
+			if err != nil {
+				shared.LogWarn("unknown apparmor domain version", log.Ctx{"version": content})
+				return false
+			}
+
+			minor, err := strconv.Atoi(parts[1])
+			if err != nil {
+				shared.LogWarn("unknown apparmor domain version", log.Ctx{"version": content})
+				return false
+			}
+
+			return major >= 1 && minor >= 1
+		}
+
+		aaStacking = canStack()
+	}
+
 	/* Detect CGroup support */
 	cgBlkioController = shared.PathExists("/sys/fs/cgroup/blkio/")
 	if !cgBlkioController {
diff --git a/test/suites/basic.sh b/test/suites/basic.sh
index 509260a..5bcfdc9 100644
--- a/test/suites/basic.sh
+++ b/test/suites/basic.sh
@@ -313,9 +313,16 @@ test_basic_usage() {
   # check that an apparmor profile is created for this container, that it is
   # unloaded on stop, and that it is deleted when the container is deleted
   lxc launch testimage lxd-apparmor-test
-  aa-status | grep "lxd-lxd-apparmor-test_<${LXD_DIR}>"
-  lxc stop lxd-apparmor-test --force
-  ! aa-status | grep -q "lxd-lxd-apparmor-test_<${LXD_DIR}>"
+  if [ -f /sys/kernel/security/apparmor/features/domain/version ]; then
+    aa_namespace="lxd-lxd-apparmor-test_<$(echo "${LXD_DIR}" | sed -e 's/\//-/g' -e 's/^.//')>"
+    aa-status | grep ":${aa_namespace}://unconfined"
+    lxc stop lxd-apparmor-test --force
+    ! aa-status | grep -q ":${aa_namespace}:"
+  else
+    aa-status | grep "lxd-lxd-apparmor-test_<${LXD_DIR}>"
+    lxc stop lxd-apparmor-test --force
+    ! aa-status | grep -q "lxd-lxd-apparmor-test_<${LXD_DIR}>"
+  fi
   lxc delete lxd-apparmor-test
   [ ! -f "${LXD_DIR}/security/apparmor/profiles/lxd-lxd-apparmor-test" ]
 


More information about the lxc-devel mailing list