[lxc-devel] Status of usability of lxc

Yamamoto - Joe's Web Hosting yamamoto at jwh.jp
Sat May 7 08:57:31 UTC 2011


Hi,

Here is my work-around for increasing security in LXC guest environment,
keeping some usability.

But, I had to modify some code both in kernel and LXC user-land tool to
fulfill this.

* Kernel patch(2.6.32.39)

diff -uwBr linux-2.6.32.39/fs/namespace.c linux-2.6.32.39-jwh/fs/namespace.c
--- linux-2.6.32.39/fs/namespace.c      2011-04-23 00:47:31.000000000 +0900
+++ linux-2.6.32.39-jwh/fs/namespace.c  2011-04-26 15:56:15.889236846 +0900
@@ -1140,6 +1140,10 @@
        if (!capable(CAP_SYS_ADMIN))
                goto dput_and_out;

+//JWH
+       if (!capable(CAP_SYS_BOOT))
+               goto dput_and_out;
+
        retval = do_umount(path.mnt, flags);
 dput_and_out:
        /* we mustn't call path_put() as that would clear mnt_expiry_mark */
@@ -1163,8 +1167,10 @@

 static int mount_is_safe(struct path *path)
 {
-       if (capable(CAP_SYS_ADMIN))
+// JWH
+       if (capable(CAP_SYS_ADMIN) && capable(CAP_SYS_BOOT))
                return 0;
+
        return -EPERM;
 #ifdef notyet
        if (S_ISLNK(path->dentry->d_inode->i_mode))
@@ -1427,6 +1433,10 @@
        int type = flag & ~MS_REC;
        int err = 0;

+//JWH
+        if (!capable(CAP_SYS_BOOT))
+                return -EPERM;
+
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;

@@ -1527,6 +1537,10 @@
        int err;
        struct super_block *sb = path->mnt->mnt_sb;

+//JWH
+        if (!capable(CAP_SYS_BOOT))
+                return -EPERM;
+
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;

@@ -1569,8 +1583,14 @@
        struct path old_path, parent_path;
        struct vfsmount *p;
        int err = 0;
+
+//JWH
+        if (!capable(CAP_SYS_BOOT))
+                return -EPERM;
+
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
+
        if (!old_name || !*old_name)
                return -EINVAL;
        err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
@@ -1650,6 +1670,10 @@
        if (!type)
                return -EINVAL;

+//JWH
+        if (!capable(CAP_SYS_BOOT))
+                return -EPERM;
+
        /* we need capabilities... */
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
@@ -1916,6 +1940,10 @@
        int retval = 0;
        int mnt_flags = 0;

+//JWH
+        if (!capable(CAP_SYS_BOOT))
+                return -EPERM;
+
        /* Discard magic */
        if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
                flags &= ~MS_MGC_MSK;
Only in linux-2.6.32.39-jwh/include: asm-x86
diff -uwBr linux-2.6.32.39/mm/swapfile.c linux-2.6.32.39-jwh/mm/swapfile.c
--- linux-2.6.32.39/mm/swapfile.c       2011-04-23 00:47:31.000000000 +0900
+++ linux-2.6.32.39-jwh/mm/swapfile.c   2011-04-26 15:29:12.176029216 +0900
@@ -1523,6 +1523,10 @@
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;

+// JWH
+       if (!capable(CAP_SYS_BOOT))
+               return -EPERM;
+
        pathname = getname(specialfile);
        err = PTR_ERR(pathname);
        if (IS_ERR(pathname))
@@ -1788,6 +1792,11 @@

        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
+
+// JWH
+       if (!capable(CAP_SYS_BOOT))
+               return -EPERM;
+
        spin_lock(&swap_lock);
        p = swap_info;
        for (type = 0 ; type < nr_swapfiles ; type++,p++)


* User land patch(LXC 0.7.4.1)


diff -uwBr lxc-0.7.4.1/src/lxc/start.c lxc-0.7.4.1-jwh/src/lxc/start.c
--- lxc-0.7.4.1/src/lxc/start.c 2011-03-09 05:32:16.000000000 +0900
+++ lxc-0.7.4.1-jwh/src/lxc/start.c     2011-04-29 10:12:25.180417562 +0900
@@ -452,10 +452,28 @@
                goto out_warn_father;
        }

-       if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)) {
-               SYSERROR("failed to remove CAP_SYS_BOOT capability");
+// JWH
+        if(getenv("JWH_ENABLE_INSECURE_MODE") != NULL && strlen(getenv("JWH_ENABLE_INSECURE_MODE")) > 0) {
+        if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)
+          || prctl(PR_CAPBSET_DROP, CAP_LINUX_IMMUTABLE, 0, 0, 0)
+          || prctl(PR_CAPBSET_DROP, CAP_MKNOD, 0, 0, 0)
+          || prctl(PR_CAPBSET_DROP, CAP_SETPCAP, 0, 0, 0)
+//          || prctl(PR_CAPBSET_DROP, CAP_SYS_ADMIN, 0, 0, 0)
+//          || prctl(PR_CAPBSET_DROP, CAP_SYS_CHROOT, 0, 0, 0)
+          || prctl(PR_CAPBSET_DROP, CAP_SYS_MODULE, 0, 0, 0)
+          || prctl(PR_CAPBSET_DROP, CAP_SYS_NICE, 0, 0, 0)
+          || prctl(PR_CAPBSET_DROP, CAP_SYS_PACCT, 0, 0, 0)
+          || prctl(PR_CAPBSET_DROP, CAP_SYS_PTRACE, 0, 0, 0)
+          || prctl(PR_CAPBSET_DROP, CAP_SYS_RAWIO, 0, 0, 0)
+          || prctl(PR_CAPBSET_DROP, CAP_SYS_TIME, 0, 0, 0)
+          || prctl(PR_CAPBSET_DROP, CAP_AUDIT_CONTROL, 0, 0, 0)
+          || prctl(PR_CAPBSET_DROP, CAP_MAC_OVERRIDE, 0, 0, 0)
+          || prctl(PR_CAPBSET_DROP, CAP_SETFCAP, 0, 0, 0)
+         ) {
+               SYSERROR("failed to remove several dengerous capabilities");
                return -1;
        }
+        }

        close(handler->sigfd);


But, this patch is still ugly. At least we need to newly define a new
CAP constant or a mechanism in which we can determine the current
context is in guest's to address container's issues in security.

Currently, I am utilizing CAP_SYS_BOOT in kernel to determine whether
the current context is in guest's process, because this CAP is usually
accompanied by host's root process: if this CAP does not exist in the
current process's capability-bits, then that should be container's
process-context including root's.

We had better mount /sys as readonly-mode as well as /proc in guest
environment, because /sys contains a lot of tunable entries which might
affect entire system stability.

Also, container's /dev needs to be prepared, removing some static block
dev nodes.

Hope this is informative for you all.

-- 
http://cpanel-plesk.net/lxc-series/
* Our VPS service using LXC

http://www.joeswebhosting.net/

Masahide Yamamoto - Joe's Web Hosting <yamamoto at jwh.jp>

> To disable the ability to trigger a reboot of the host system by sending
> "b" to /proc/sysrq-trigger inside a container, I've dropped
> CAP_SYS_ADMIN and set readonly for the /proc mount-point.
> 
> I'm interested what else capabilities are recommended to drop when using
> LXC as a system container?
> 
> Thanks,
> Christoph
> 
> On 04/19/2011 01:01 PM, richard -rw- weinberger wrote:
> > On Tue, Mar 22, 2011 at 10:20 AM, Nathan McSween <nwmcsween at gmail.com> wrote:
> >> Can I get a quick rundown of what is implemented w.r.t  UID/GID
> >> containerization, is it safe yet to give containerized root to an
> >> everyday user without huge security issues?
> > 
> > Drop all dangerous capabilities and mount /proc read-only.
> > 
> > HTH,
> > //richard
> > 
> >> ------------------------------------------------------------------------------
> >> Xperia(TM) PLAY
> >> It's a major breakthrough. An authentic gaming
> >> smartphone on the nation's most reliable network.
> >> And it wants your games.
> >> http://p.sf.net/sfu/verizon-sfdev
> >> _______________________________________________
> >> Lxc-devel mailing list
> >> Lxc-devel at lists.sourceforge.net
> >> https://lists.sourceforge.net/lists/listinfo/lxc-devel
> >>
> > 
> > 
> > 
> 
> ------------------------------------------------------------------------------
> WhatsUp Gold - Download Free Network Management Software
> The most intuitive, comprehensive, and cost-effective network 
> management toolset available today.  Delivers lowest initial 
> acquisition cost and overall TCO of any competing solution.
> http://p.sf.net/sfu/whatsupgold-sd
> _______________________________________________
> Lxc-devel mailing list
> Lxc-devel at lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/lxc-devel




More information about the lxc-devel mailing list