[lxc-devel] [lxd/master] lxd: enable safe native container terminal allocation

brauner on Github lxc-bot at linuxcontainers.org
Wed Aug 5 13:53:54 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 364 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200805/c9783b20/attachment.bin>
-------------- next part --------------
From 6565004945e5cec4e1e4070fb1110f44c38eedbd Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 5 Aug 2020 15:52:46 +0200
Subject: [PATCH] lxd: enable safe native container terminal allocation

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/daemon.go                      |  8 ++++++++
 lxd/instance/drivers/driver_lxc.go | 11 +++++++++++
 lxd/instance/instance_interface.go |  1 +
 lxd/instance_exec.go               | 12 +++++++++++-
 lxd/main_checkfeature.go           | 29 +++++++++++++++++++++++++++++
 lxd/sys/os.go                      |  1 +
 shared/util_linux.go               | 24 ++++++++++++++++++------
 7 files changed, 79 insertions(+), 7 deletions(-)

diff --git a/lxd/daemon.go b/lxd/daemon.go
index dde10b17e4..6b1e0f2cb1 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -630,6 +630,7 @@ func (d *Daemon) init() error {
 		"cgroup2",
 		"pidfd",
 		"seccomp_allow_deny_syntax",
+		"devpts_fd",
 	}
 	for _, extension := range lxcExtensions {
 		d.os.LXCFeatures[extension] = liblxc.HasApiExtension(extension)
@@ -674,6 +675,13 @@ func (d *Daemon) init() error {
 		logger.Infof(" - seccomp listener continue syscalls: no")
 	}
 
+	if canUseNativeTerminals() && d.os.LXCFeatures["devpts_fd"] {
+		d.os.NativeTerminals = true
+		logger.Infof(" - safe native terminal allocation : yes")
+	} else {
+		logger.Infof(" - safe native terminal allocation : no")
+	}
+
 	/*
 	 * During daemon startup we're the only thread that touches VFS3Fscaps
 	 * so we don't need to bother with atomic.StoreInt32() when touching
diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go
index 12fd09298d..88b87c359c 100644
--- a/lxd/instance/drivers/driver_lxc.go
+++ b/lxd/instance/drivers/driver_lxc.go
@@ -6654,6 +6654,17 @@ func (c *lxc) InitPidFd() (*os.File, error) {
 	return c.c.InitPidFd()
 }
 
+// DevptsFd returns pidfd of init process.
+func (c *lxc) DevptsFd() (*os.File, error) {
+	// Load the go-lxc struct
+	err := c.initLXC(false)
+	if err != nil {
+		return nil, err
+	}
+
+	return c.c.DevptsFd()
+}
+
 // LocalConfig returns local config.
 func (c *lxc) LocalConfig() map[string]string {
 	return c.localConfig
diff --git a/lxd/instance/instance_interface.go b/lxd/instance/instance_interface.go
index cf2d316c32..e03db25bfb 100644
--- a/lxd/instance/instance_interface.go
+++ b/lxd/instance/instance_interface.go
@@ -147,6 +147,7 @@ type Container interface {
 	NextIdmap() (*idmap.IdmapSet, error)
 	ConsoleLog(opts liblxc.ConsoleLogOptions) (string, error)
 	InsertSeccompUnixDevice(prefix string, m deviceConfig.Device, pid int) error
+	DevptsFd() (*os.File, error)
 }
 
 // CriuMigrationArgs arguments for CRIU migration.
diff --git a/lxd/instance_exec.go b/lxd/instance_exec.go
index ecf52ded68..5ca8006228 100644
--- a/lxd/instance_exec.go
+++ b/lxd/instance_exec.go
@@ -43,6 +43,7 @@ type execWs struct {
 	controlConnected     chan struct{}
 	controlConnectedDone bool
 	fds                  map[int]string
+	devptsFd             *os.File
 }
 
 func (s *execWs) Metadata() interface{} {
@@ -131,7 +132,11 @@ func (s *execWs) Do(op *operations.Operation) error {
 	if s.req.Interactive {
 		ttys = make([]*os.File, 1)
 		ptys = make([]*os.File, 1)
-		ptys[0], ttys[0], err = shared.OpenPty(s.rootUid, s.rootGid)
+		ptys[0], ttys[0], err = shared.OpenPtyInDevpts(int(s.devptsFd.Fd()), s.rootUid, s.rootGid)
+		if s.devptsFd != nil {
+			s.devptsFd.Close()
+			s.devptsFd = nil
+		}
 		if err != nil {
 			return err
 		}
@@ -444,6 +449,11 @@ func containerExecPost(d *Daemon, r *http.Request) response.Response {
 			if idmapset != nil {
 				ws.rootUid, ws.rootGid = idmapset.ShiftIntoNs(0, 0)
 			}
+
+			devptsFd, err := c.DevptsFd()
+			if err == nil {
+				ws.devptsFd = devptsFd
+			}
 		}
 
 		ws.conns = map[int]*websocket.Conn{}
diff --git a/lxd/main_checkfeature.go b/lxd/main_checkfeature.go
index 511d607ec2..c84c3abc5b 100644
--- a/lxd/main_checkfeature.go
+++ b/lxd/main_checkfeature.go
@@ -42,6 +42,7 @@ import (
 #include "include/process_utils.h"
 #include "include/syscall_numbers.h"
 
+__ro_after_init bool tiocgptpeer_aware = false;
 __ro_after_init bool netnsid_aware = false;
 __ro_after_init bool pidfd_aware = false;
 __ro_after_init bool uevent_aware = false;
@@ -331,6 +332,29 @@ static void is_pidfd_aware(void)
 	pidfd_aware = true;
 }
 
+static void is_tiocgptpeer_aware(void)
+{
+	__do_close int ptx_fd = -EBADF, pty_fd = -EBADF;
+	int ret;
+
+	ptx_fd = open("/dev/ptmx", O_RDWR | O_NOCTTY | O_CLOEXEC);
+	if (ptx_fd < 0)
+		return;
+
+	ret = grantpt(ptx_fd);
+	if (ret < 0)
+		return;
+
+	ret = unlockpt(ptx_fd);
+	if (ret < 0)
+		return;
+
+	pty_fd = ioctl(ptx_fd, TIOCGPTPEER, O_RDWR | O_NOCTTY | O_CLOEXEC);
+	if (pty_fd < 0)
+		return;
+
+	tiocgptpeer_aware = true;
+}
 
 void checkfeature(void)
 {
@@ -340,6 +364,7 @@ void checkfeature(void)
 	is_pidfd_aware();
 	is_uevent_aware();
 	is_seccomp_notify_aware();
+	is_tiocgptpeer_aware();
 
 	if (setns(hostnetns_fd, CLONE_NEWNET) < 0)
 		(void)sprintf(errbuf, "%s", "Failed to attach to host network namespace");
@@ -414,3 +439,7 @@ func canUseShiftfs() bool {
 
 	return true
 }
+
+func canUseNativeTerminals() bool {
+	return bool(C.tiocgptpeer_aware)
+}
diff --git a/lxd/sys/os.go b/lxd/sys/os.go
index 143a71b632..d20b6aef85 100644
--- a/lxd/sys/os.go
+++ b/lxd/sys/os.go
@@ -63,6 +63,7 @@ type OS struct {
 	CGInfo cgroup.Info
 
 	// Kernel features
+	NativeTerminals         bool
 	NetnsGetifaddrs         bool
 	PidFds                  bool
 	SeccompListener         bool
diff --git a/shared/util_linux.go b/shared/util_linux.go
index fcbc898d07..1b3205c413 100644
--- a/shared/util_linux.go
+++ b/shared/util_linux.go
@@ -396,14 +396,20 @@ func DeviceTotalMemory() (int64, error) {
 	return -1, fmt.Errorf("Couldn't find MemTotal")
 }
 
-// OpenPty creates a new PTS pair, configures them and returns them.
-func OpenPty(uid, gid int64) (*os.File, *os.File, error) {
+// OpenPtyInDevpts creates a new PTS pair, configures them and returns them.
+func OpenPtyInDevpts(devpts_fd int, uid, gid int64) (*os.File, *os.File, error) {
 	revert := true
+	var ptx *os.File
+	var err error
 
 	// Create a PTS pair.
-	ptx, err := os.OpenFile("/dev/ptmx", os.O_RDWR|unix.O_CLOEXEC, 0)
-	if err != nil {
-		return nil, nil, err
+	if devpts_fd >= 0 {
+		fd, err := unix.Openat(devpts_fd, "ptmx", os.O_RDWR|unix.O_CLOEXEC, 0)
+		if err == nil {
+			ptx = os.NewFile(uintptr(fd), "/dev/pts/ptmx")
+		}
+	} else {
+		ptx, err = os.OpenFile("/dev/ptmx", os.O_RDWR|unix.O_CLOEXEC, 0)
 	}
 	defer func() {
 		if revert {
@@ -420,6 +426,7 @@ func OpenPty(uid, gid int64) (*os.File, *os.File, error) {
 
 	var pty *os.File
 	ptyFd, _, errno := unix.Syscall(unix.SYS_IOCTL, uintptr(ptx.Fd()), unix.TIOCGPTPEER, uintptr(unix.O_NOCTTY|unix.O_CLOEXEC|os.O_RDWR))
+	// We can only fallback to looking up the fd in /dev/pts when we aren't dealing with the container's devpts instance.
 	if errno == 0 {
 		// Get the pty side.
 		id := 0
@@ -429,7 +436,7 @@ func OpenPty(uid, gid int64) (*os.File, *os.File, error) {
 		}
 
 		pty = os.NewFile(ptyFd, fmt.Sprintf("/dev/pts/%d", id))
-	} else {
+	} else if devpts_fd < 0 {
 		// Get the pty side.
 		id := 0
 		_, _, errno = unix.Syscall(unix.SYS_IOCTL, uintptr(ptx.Fd()), unix.TIOCGPTN, uintptr(unsafe.Pointer(&id)))
@@ -498,6 +505,11 @@ func OpenPty(uid, gid int64) (*os.File, *os.File, error) {
 	return ptx, pty, nil
 }
 
+// OpenPty creates a new PTS pair, configures them and returns them.
+func OpenPty(uid, gid int64) (*os.File, *os.File, error) {
+	return OpenPtyInDevpts(-1, uid, gid)
+}
+
 // Extensively commented directly in the code. Please leave the comments!
 // Looking at this in a couple of months noone will know why and how this works
 // anymore.


More information about the lxc-devel mailing list