[lxc-devel] [lxd/master] Split util_linux.go cgo specific code to seperate file

mjrider on Github lxc-bot at linuxcontainers.org
Mon Oct 1 04:40:18 UTC 2018


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 2010 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20181001/5090d2d6/attachment.bin>
-------------- next part --------------
From bdcbe41d913e6af68e0d2dcd61bfd0ea74f8da55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Robbert=20M=C3=BCller?= <robbert.muller at dealerdirect.nl>
Date: Wed, 26 Sep 2018 15:42:07 +0200
Subject: [PATCH] Split code in 2 seperate files

util_linux.go for all linux util functions, without cgo specific code
util_linux_cgo.go for all linux util functions depending on cgo

Reason:

github.com/sl1pm4t/terraform-provider-lxd uses the shared lxd code for
interactions with the lxd daemon. Compiling this for other
architectures, results in a build where archive_linux.go isn't
compilable because of missing functions defined in util_linux.go

This would fix this compile issue

```
gox
Number of parallel builds: 7

-->     windows/386: github.com/sl1pm4t/terraform-provider-lxd
-->       linux/arm: github.com/sl1pm4t/terraform-provider-lxd
-->   freebsd/amd64: github.com/sl1pm4t/terraform-provider-lxd
-->   windows/amd64: github.com/sl1pm4t/terraform-provider-lxd
-->      darwin/386: github.com/sl1pm4t/terraform-provider-lxd
-->     openbsd/386: github.com/sl1pm4t/terraform-provider-lxd
-->     freebsd/386: github.com/sl1pm4t/terraform-provider-lxd
-->       linux/386: github.com/sl1pm4t/terraform-provider-lxd
-->    darwin/amd64: github.com/sl1pm4t/terraform-provider-lxd
-->     linux/amd64: github.com/sl1pm4t/terraform-provider-lxd
-->      netbsd/arm: github.com/sl1pm4t/terraform-provider-lxd
-->     freebsd/arm: github.com/sl1pm4t/terraform-provider-lxd
-->      netbsd/386: github.com/sl1pm4t/terraform-provider-lxd
-->    netbsd/amd64: github.com/sl1pm4t/terraform-provider-lxd
-->   openbsd/amd64: github.com/sl1pm4t/terraform-provider-lxd
-->     linux/s390x: github.com/sl1pm4t/terraform-provider-lxd
```
---
 shared/util_linux.go     | 576 --------------------------------------
 shared/util_linux_cgo.go | 588 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 588 insertions(+), 576 deletions(-)
 create mode 100644 shared/util_linux_cgo.go

diff --git a/shared/util_linux.go b/shared/util_linux.go
index 043a408faf..2a71b7de56 100644
--- a/shared/util_linux.go
+++ b/shared/util_linux.go
@@ -1,421 +1,19 @@
 // +build linux
-// +build cgo
 
 package shared
 
 import (
 	"bufio"
-	"errors"
 	"fmt"
-	"io"
 	"os"
 	"path/filepath"
 	"reflect"
 	"strings"
-	"sync"
-	"sync/atomic"
 	"syscall"
 	"unsafe"
 
-	"github.com/lxc/lxd/shared/logger"
 )
 
-// #cgo LDFLAGS: -lutil -lpthread
-/*
-#define _GNU_SOURCE
-#include <errno.h>
-#include <fcntl.h>
-#include <grp.h>
-#include <limits.h>
-#include <poll.h>
-#include <pty.h>
-#include <pwd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <sys/un.h>
-
-#ifndef AT_SYMLINK_FOLLOW
-#define AT_SYMLINK_FOLLOW    0x400
-#endif
-
-#ifndef AT_EMPTY_PATH
-#define AT_EMPTY_PATH       0x1000
-#endif
-
-#define ABSTRACT_UNIX_SOCK_LEN sizeof(((struct sockaddr_un *)0)->sun_path)
-
-// This is an adaption from https://codereview.appspot.com/4589049, to be
-// included in the stdlib with the stdlib's license.
-
-static int mygetgrgid_r(int gid, struct group *grp,
-	char *buf, size_t buflen, struct group **result) {
-	return getgrgid_r(gid, grp, buf, buflen, result);
-}
-
-void configure_pty(int fd) {
-	struct termios term_settings;
-	struct winsize win;
-
-	if (tcgetattr(fd, &term_settings) < 0) {
-		fprintf(stderr, "Failed to get settings: %s\n", strerror(errno));
-		return;
-	}
-
-	term_settings.c_iflag |= IMAXBEL;
-	term_settings.c_iflag |= IUTF8;
-	term_settings.c_iflag |= BRKINT;
-	term_settings.c_iflag |= IXANY;
-
-	term_settings.c_cflag |= HUPCL;
-
-	if (tcsetattr(fd, TCSANOW, &term_settings) < 0) {
-		fprintf(stderr, "Failed to set settings: %s\n", strerror(errno));
-		return;
-	}
-
-	if (ioctl(fd, TIOCGWINSZ, &win) < 0) {
-		fprintf(stderr, "Failed to get the terminal size: %s\n", strerror(errno));
-		return;
-	}
-
-	win.ws_col = 80;
-	win.ws_row = 25;
-
-	if (ioctl(fd, TIOCSWINSZ, &win) < 0) {
-		fprintf(stderr, "Failed to set the terminal size: %s\n", strerror(errno));
-		return;
-	}
-
-	if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0) {
-		fprintf(stderr, "Failed to set FD_CLOEXEC: %s\n", strerror(errno));
-		return;
-	}
-
-	return;
-}
-
-void create_pty(int *master, int *slave, uid_t uid, gid_t gid) {
-	if (openpty(master, slave, NULL, NULL, NULL) < 0) {
-		fprintf(stderr, "Failed to openpty: %s\n", strerror(errno));
-		return;
-	}
-
-	configure_pty(*master);
-	configure_pty(*slave);
-
-	if (fchown(*slave, uid, gid) < 0) {
-		fprintf(stderr, "Warning: error chowning pty to container root\n");
-		fprintf(stderr, "Continuing...\n");
-	}
-	if (fchown(*master, uid, gid) < 0) {
-		fprintf(stderr, "Warning: error chowning pty to container root\n");
-		fprintf(stderr, "Continuing...\n");
-	}
-}
-
-void create_pipe(int *master, int *slave) {
-	int pipefd[2];
-
-	if (pipe2(pipefd, O_CLOEXEC) < 0) {
-		fprintf(stderr, "Failed to create a pipe: %s\n", strerror(errno));
-		return;
-	}
-
-	*master = pipefd[0];
-	*slave = pipefd[1];
-}
-
-int get_poll_revents(int lfd, int timeout, int flags, int *revents, int *saved_errno)
-{
-	int ret;
-	struct pollfd pfd = {lfd, flags, 0};
-
-again:
-	ret = poll(&pfd, 1, timeout);
-	if (ret < 0) {
-		if (errno == EINTR)
-			goto again;
-
-		*saved_errno = errno;
-		fprintf(stderr, "Failed to poll() on file descriptor.\n");
-		return -1;
-	}
-
-	*revents = pfd.revents;
-
-	return ret;
-}
-
-int lxc_abstract_unix_send_fds(int fd, int *sendfds, int num_sendfds,
-			       void *data, size_t size)
-{
-	int ret;
-	struct msghdr msg;
-	struct iovec iov;
-	struct cmsghdr *cmsg = NULL;
-	char buf[1] = {0};
-	char *cmsgbuf;
-	size_t cmsgbufsize = CMSG_SPACE(num_sendfds * sizeof(int));
-
-	memset(&msg, 0, sizeof(msg));
-	memset(&iov, 0, sizeof(iov));
-
-	cmsgbuf = malloc(cmsgbufsize);
-	if (!cmsgbuf)
-		return -1;
-
-	msg.msg_control = cmsgbuf;
-	msg.msg_controllen = cmsgbufsize;
-
-	cmsg = CMSG_FIRSTHDR(&msg);
-	cmsg->cmsg_level = SOL_SOCKET;
-	cmsg->cmsg_type = SCM_RIGHTS;
-	cmsg->cmsg_len = CMSG_LEN(num_sendfds * sizeof(int));
-
-	msg.msg_controllen = cmsg->cmsg_len;
-
-	memcpy(CMSG_DATA(cmsg), sendfds, num_sendfds * sizeof(int));
-
-	iov.iov_base = data ? data : buf;
-	iov.iov_len = data ? size : sizeof(buf);
-	msg.msg_iov = &iov;
-	msg.msg_iovlen = 1;
-
-	ret = sendmsg(fd, &msg, MSG_NOSIGNAL);
-	if (ret < 0)
-		fprintf(stderr, "%s - Failed to send file descriptor\n", strerror(errno));
-	free(cmsgbuf);
-	return ret;
-}
-
-int lxc_abstract_unix_recv_fds(int fd, int *recvfds, int num_recvfds,
-			       void *data, size_t size)
-{
-	int ret;
-	struct msghdr msg;
-	struct iovec iov;
-	struct cmsghdr *cmsg = NULL;
-	char buf[1] = {0};
-	char *cmsgbuf;
-	size_t cmsgbufsize = CMSG_SPACE(num_recvfds * sizeof(int));
-
-	memset(&msg, 0, sizeof(msg));
-	memset(&iov, 0, sizeof(iov));
-
-	cmsgbuf = malloc(cmsgbufsize);
-	if (!cmsgbuf)
-		return -1;
-
-	msg.msg_control = cmsgbuf;
-	msg.msg_controllen = cmsgbufsize;
-
-	iov.iov_base = data ? data : buf;
-	iov.iov_len = data ? size : sizeof(buf);
-	msg.msg_iov = &iov;
-	msg.msg_iovlen = 1;
-
-	ret = recvmsg(fd, &msg, 0);
-	if (ret <= 0) {
-		fprintf(stderr, "%s - Failed to receive file descriptor\n", strerror(errno));
-		goto out;
-	}
-
-	cmsg = CMSG_FIRSTHDR(&msg);
-
-	memset(recvfds, -1, num_recvfds * sizeof(int));
-	if (cmsg && cmsg->cmsg_len == CMSG_LEN(num_recvfds * sizeof(int)) &&
-	    cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
-		memcpy(recvfds, CMSG_DATA(cmsg), num_recvfds * sizeof(int));
-	}
-
-out:
-	free(cmsgbuf);
-	return ret;
-}
-*/
-// #cgo CFLAGS: -std=gnu11 -Wvla
-import "C"
-
-const ABSTRACT_UNIX_SOCK_LEN int = C.ABSTRACT_UNIX_SOCK_LEN
-
-const POLLIN int = C.POLLIN
-const POLLPRI int = C.POLLPRI
-const POLLNVAL int = C.POLLNVAL
-const POLLERR int = C.POLLERR
-const POLLHUP int = C.POLLHUP
-const POLLRDHUP int = C.POLLRDHUP
-
-func GetPollRevents(fd int, timeout int, flags int) (int, int, error) {
-	var err error
-	revents := C.int(0)
-	saved_errno := C.int(0)
-
-	ret := C.get_poll_revents(C.int(fd), C.int(timeout), C.int(flags), &revents, &saved_errno)
-	if int(ret) < 0 {
-		err = syscall.Errno(saved_errno)
-	}
-
-	return int(ret), int(revents), err
-}
-
-func AbstractUnixSendFd(sockFD int, sendFD int) error {
-	fd := C.int(sendFD)
-	sk_fd := C.int(sockFD)
-	ret := C.lxc_abstract_unix_send_fds(sk_fd, &fd, C.int(1), nil, C.size_t(0))
-	if ret < 0 {
-		return fmt.Errorf("Failed to send file descriptor via abstract unix socket")
-	}
-
-	return nil
-}
-
-func AbstractUnixReceiveFd(sockFD int) (*os.File, error) {
-	fd := C.int(-1)
-	sk_fd := C.int(sockFD)
-	ret := C.lxc_abstract_unix_recv_fds(sk_fd, &fd, C.int(1), nil, C.size_t(0))
-	if ret < 0 {
-		return nil, fmt.Errorf("Failed to receive file descriptor via abstract unix socket")
-	}
-
-	file := os.NewFile(uintptr(fd), "")
-	return file, nil
-}
-
-func OpenPty(uid, gid int64) (master *os.File, slave *os.File, err error) {
-	fd_master := C.int(-1)
-	fd_slave := C.int(-1)
-	rootUid := C.uid_t(uid)
-	rootGid := C.gid_t(gid)
-
-	C.create_pty(&fd_master, &fd_slave, rootUid, rootGid)
-
-	if fd_master == -1 || fd_slave == -1 {
-		return nil, nil, errors.New("Failed to create a new pts pair")
-	}
-
-	master = os.NewFile(uintptr(fd_master), "master")
-	slave = os.NewFile(uintptr(fd_slave), "slave")
-
-	return master, slave, nil
-}
-
-func Pipe() (master *os.File, slave *os.File, err error) {
-	fd_master := C.int(-1)
-	fd_slave := C.int(-1)
-
-	C.create_pipe(&fd_master, &fd_slave)
-
-	if fd_master == -1 || fd_slave == -1 {
-		return nil, nil, errors.New("Failed to create a new pipe")
-	}
-
-	master = os.NewFile(uintptr(fd_master), "master")
-	slave = os.NewFile(uintptr(fd_slave), "slave")
-
-	return master, slave, nil
-}
-
-// UserId is an adaption from https://codereview.appspot.com/4589049.
-func UserId(name string) (int, error) {
-	var pw C.struct_passwd
-	var result *C.struct_passwd
-
-	bufSize := C.sysconf(C._SC_GETPW_R_SIZE_MAX)
-	if bufSize < 0 {
-		bufSize = 4096
-	}
-
-	buf := C.malloc(C.size_t(bufSize))
-	if buf == nil {
-		return -1, fmt.Errorf("allocation failed")
-	}
-	defer C.free(buf)
-
-	cname := C.CString(name)
-	defer C.free(unsafe.Pointer(cname))
-
-again:
-	rv, errno := C.getpwnam_r(cname,
-		&pw,
-		(*C.char)(buf),
-		C.size_t(bufSize),
-		&result)
-	if rv < 0 {
-		// OOM killer will take care of us if we end up doing this too
-		// often.
-		if errno == syscall.ERANGE {
-			bufSize *= 2
-			tmp := C.realloc(buf, C.size_t(bufSize))
-			if tmp == nil {
-				return -1, fmt.Errorf("allocation failed")
-			}
-			buf = tmp
-			goto again
-		}
-		return -1, fmt.Errorf("failed user lookup: %s", syscall.Errno(rv))
-	}
-
-	if result == nil {
-		return -1, fmt.Errorf("unknown user %s", name)
-	}
-
-	return int(C.int(result.pw_uid)), nil
-}
-
-// GroupId is an adaption from https://codereview.appspot.com/4589049.
-func GroupId(name string) (int, error) {
-	var grp C.struct_group
-	var result *C.struct_group
-
-	bufSize := C.sysconf(C._SC_GETGR_R_SIZE_MAX)
-	if bufSize < 0 {
-		bufSize = 4096
-	}
-
-	buf := C.malloc(C.size_t(bufSize))
-	if buf == nil {
-		return -1, fmt.Errorf("allocation failed")
-	}
-
-	cname := C.CString(name)
-	defer C.free(unsafe.Pointer(cname))
-
-again:
-	rv, errno := C.getgrnam_r(cname,
-		&grp,
-		(*C.char)(buf),
-		C.size_t(bufSize),
-		&result)
-	if rv != 0 {
-		// OOM killer will take care of us if we end up doing this too
-		// often.
-		if errno == syscall.ERANGE {
-			bufSize *= 2
-			tmp := C.realloc(buf, C.size_t(bufSize))
-			if tmp == nil {
-				return -1, fmt.Errorf("allocation failed")
-			}
-			buf = tmp
-			goto again
-		}
-
-		C.free(buf)
-		return -1, fmt.Errorf("failed group lookup: %s", syscall.Errno(rv))
-	}
-	C.free(buf)
-
-	if result == nil {
-		return -1, fmt.Errorf("unknown group %s", name)
-	}
-
-	return int(C.int(result.gr_gid)), nil
-}
-
 // --- pure Go functions ---
 
 func Major(dev uint64) int {
@@ -622,180 +220,6 @@ func GetAllXattr(path string) (xattrs map[string]string, err error) {
 	return xattrs, nil
 }
 
-// Extensively commented directly in the code. Please leave the comments!
-// Looking at this in a couple of months noone will know why and how this works
-// anymore.
-func ExecReaderToChannel(r io.Reader, bufferSize int, exited <-chan bool, fd int) <-chan []byte {
-	if bufferSize <= (128 * 1024) {
-		bufferSize = (128 * 1024)
-	}
-
-	ch := make(chan ([]byte))
-
-	// Takes care that the closeChannel() function is exactly executed once.
-	// This allows us to avoid using a mutex.
-	var once sync.Once
-	closeChannel := func() {
-		close(ch)
-	}
-
-	// [1]: This function has just one job: Dealing with the case where we
-	// are running an interactive shell session where we put a process in
-	// the background that does hold stdin/stdout open, but does not
-	// generate any output at all. This case cannot be dealt with in the
-	// following function call. Here's why: Assume the above case, now the
-	// attached child (the shell in this example) exits. This will not
-	// generate any poll() event: We won't get POLLHUP because the
-	// background process is holding stdin/stdout open and noone is writing
-	// to it. So we effectively block on GetPollRevents() in the function
-	// below. Hence, we use another go routine here who's only job is to
-	// handle that case: When we detect that the child has exited we check
-	// whether a POLLIN or POLLHUP event has been generated. If not, we know
-	// that there's nothing buffered on stdout and exit.
-	var attachedChildIsDead int32 = 0
-	go func() {
-		<-exited
-
-		atomic.StoreInt32(&attachedChildIsDead, 1)
-
-		ret, revents, err := GetPollRevents(fd, 0, (POLLIN | POLLPRI | POLLERR | POLLHUP | POLLRDHUP | POLLNVAL))
-		if ret < 0 {
-			logger.Errorf("Failed to poll(POLLIN | POLLPRI | POLLHUP | POLLRDHUP) on file descriptor: %s.", err)
-		} else if ret > 0 {
-			if (revents & POLLERR) > 0 {
-				logger.Warnf("Detected poll(POLLERR) event.")
-			} else if (revents & POLLNVAL) > 0 {
-				logger.Warnf("Detected poll(POLLNVAL) event.")
-			}
-		} else if ret == 0 {
-			logger.Debugf("No data in stdout: exiting.")
-			once.Do(closeChannel)
-			return
-		}
-	}()
-
-	go func() {
-		readSize := (128 * 1024)
-		offset := 0
-		buf := make([]byte, bufferSize)
-		avoidAtomicLoad := false
-
-		defer once.Do(closeChannel)
-		for {
-			nr := 0
-			var err error
-
-			ret, revents, err := GetPollRevents(fd, -1, (POLLIN | POLLPRI | POLLERR | POLLHUP | POLLRDHUP | POLLNVAL))
-			if ret < 0 {
-				// This condition is only reached in cases where we are massively f*cked since we even handle
-				// EINTR in the underlying C wrapper around poll(). So let's exit here.
-				logger.Errorf("Failed to poll(POLLIN | POLLPRI | POLLERR | POLLHUP | POLLRDHUP) on file descriptor: %s. Exiting.", err)
-				return
-			}
-
-			// [2]: If the process exits before all its data has been read by us and no other process holds stdin or
-			// stdout open, then we will observe a (POLLHUP | POLLRDHUP | POLLIN) event. This means, we need to
-			// keep on reading from the pty file descriptor until we get a simple POLLHUP back.
-			both := ((revents & (POLLIN | POLLPRI)) > 0) && ((revents & (POLLHUP | POLLRDHUP)) > 0)
-			if both {
-				logger.Debugf("Detected poll(POLLIN | POLLPRI | POLLHUP | POLLRDHUP) event.")
-				read := buf[offset : offset+readSize]
-				nr, err = r.Read(read)
-			}
-
-			if (revents & POLLERR) > 0 {
-				logger.Warnf("Detected poll(POLLERR) event: exiting.")
-				return
-			} else if (revents & POLLNVAL) > 0 {
-				logger.Warnf("Detected poll(POLLNVAL) event: exiting.")
-				return
-			}
-
-			if ((revents & (POLLIN | POLLPRI)) > 0) && !both {
-				// This might appear unintuitive at first but is actually a nice trick: Assume we are running
-				// a shell session in a container and put a process in the background that is writing to
-				// stdout. Now assume the attached process (aka the shell in this example) exits because we
-				// used Ctrl+D to send EOF or something. If no other process would be holding stdout open we
-				// would expect to observe either a (POLLHUP | POLLRDHUP | POLLIN | POLLPRI) event if there
-				// is still data buffered from the previous process or a simple (POLLHUP | POLLRDHUP) if
-				// no data is buffered. The fact that we only observe a (POLLIN | POLLPRI) event means that
-				// another process is holding stdout open and is writing to it.
-				// One counter argument that can be leveraged is (brauner looks at tycho :))
-				// "Hey, you need to write at least one additional tty buffer to make sure that
-				// everything that the attached child has written is actually shown."
-				// The answer to that is:
-				// "This case can only happen if the process has exited and has left data in stdout which
-				// would generate a (POLLIN | POLLPRI | POLLHUP | POLLRDHUP) event and this case is already
-				// handled and triggers another codepath. (See [2].)"
-				if avoidAtomicLoad || atomic.LoadInt32(&attachedChildIsDead) == 1 {
-					avoidAtomicLoad = true
-					// Handle race between atomic.StorInt32() in the go routine
-					// explained in [1] and atomic.LoadInt32() in the go routine
-					// here:
-					// We need to check for (POLLHUP | POLLRDHUP) here again since we might
-					// still be handling a pure POLLIN event from a write prior to the childs
-					// exit. But the child might have exited right before and performed
-					// atomic.StoreInt32() to update attachedChildIsDead before we
-					// performed our atomic.LoadInt32(). This means we accidentally hit this
-					// codepath and are misinformed about the available poll() events. So we
-					// need to perform a non-blocking poll() again to exclude that case:
-					//
-					// - If we detect no (POLLHUP | POLLRDHUP) event we know the child
-					//   has already exited but someone else is holding stdin/stdout open and
-					//   writing to it.
-					//   Note that his case should only ever be triggered in situations like
-					//   running a shell and doing stuff like:
-					//    > ./lxc exec xen1 -- bash
-					//   root at xen1:~# yes &
-					//   .
-					//   .
-					//   .
-					//   now send Ctrl+D or type "exit". By the time the Ctrl+D/exit event is
-					//   triggered, we will have read all of the childs data it has written to
-					//   stdout and so we can assume that anything that comes now belongs to
-					//   the process that is holding stdin/stdout open.
-					//
-					// - If we detect a (POLLHUP | POLLRDHUP) event we know that we've
-					//   hit this codepath on accident caused by the race between
-					//   atomic.StoreInt32() in the go routine explained in [1] and
-					//   atomic.LoadInt32() in this go routine. So the next call to
-					//   GetPollRevents() will either return
-					//   (POLLIN | POLLPRI | POLLERR | POLLHUP | POLLRDHUP)
-					//   or (POLLHUP | POLLRDHUP). Both will trigger another codepath (See [2].)
-					//   that takes care that all data of the child that is buffered in
-					//   stdout is written out.
-					ret, revents, err := GetPollRevents(fd, 0, (POLLIN | POLLPRI | POLLERR | POLLHUP | POLLRDHUP | POLLNVAL))
-					if ret < 0 {
-						logger.Errorf("Failed to poll(POLLIN | POLLPRI | POLLERR | POLLHUP | POLLRDHUP) on file descriptor: %s. Exiting.", err)
-						return
-					} else if (revents & (POLLHUP | POLLRDHUP | POLLERR | POLLNVAL)) == 0 {
-						logger.Debugf("Exiting but background processes are still running.")
-						return
-					}
-				}
-				read := buf[offset : offset+readSize]
-				nr, err = r.Read(read)
-			}
-
-			// The attached process has exited and we have read all data that may have
-			// been buffered.
-			if ((revents & (POLLHUP | POLLRDHUP)) > 0) && !both {
-				logger.Debugf("Detected poll(POLLHUP) event: exiting.")
-				return
-			}
-
-			offset += nr
-			if offset > 0 && (offset+readSize >= bufferSize || err != nil) {
-				ch <- buf[0:offset]
-				offset = 0
-				buf = make([]byte, bufferSize)
-			}
-		}
-	}()
-
-	return ch
-}
-
 var ObjectFound = fmt.Errorf("Found requested object")
 
 func LookupUUIDByBlockDevPath(diskDevice string) (string, error) {
diff --git a/shared/util_linux_cgo.go b/shared/util_linux_cgo.go
new file mode 100644
index 0000000000..aa80b9df0a
--- /dev/null
+++ b/shared/util_linux_cgo.go
@@ -0,0 +1,588 @@
+// +build linux
+// +build cgo
+
+package shared
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"sync"
+	"sync/atomic"
+	"syscall"
+	"unsafe"
+
+	"github.com/lxc/lxd/shared/logger"
+)
+
+// #cgo LDFLAGS: -lutil -lpthread
+/*
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <limits.h>
+#include <poll.h>
+#include <pty.h>
+#include <pwd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#ifndef AT_SYMLINK_FOLLOW
+#define AT_SYMLINK_FOLLOW    0x400
+#endif
+
+#ifndef AT_EMPTY_PATH
+#define AT_EMPTY_PATH       0x1000
+#endif
+
+#define ABSTRACT_UNIX_SOCK_LEN sizeof(((struct sockaddr_un *)0)->sun_path)
+
+// This is an adaption from https://codereview.appspot.com/4589049, to be
+// included in the stdlib with the stdlib's license.
+
+static int mygetgrgid_r(int gid, struct group *grp,
+	char *buf, size_t buflen, struct group **result) {
+	return getgrgid_r(gid, grp, buf, buflen, result);
+}
+
+void configure_pty(int fd) {
+	struct termios term_settings;
+	struct winsize win;
+
+	if (tcgetattr(fd, &term_settings) < 0) {
+		fprintf(stderr, "Failed to get settings: %s\n", strerror(errno));
+		return;
+	}
+
+	term_settings.c_iflag |= IMAXBEL;
+	term_settings.c_iflag |= IUTF8;
+	term_settings.c_iflag |= BRKINT;
+	term_settings.c_iflag |= IXANY;
+
+	term_settings.c_cflag |= HUPCL;
+
+	if (tcsetattr(fd, TCSANOW, &term_settings) < 0) {
+		fprintf(stderr, "Failed to set settings: %s\n", strerror(errno));
+		return;
+	}
+
+	if (ioctl(fd, TIOCGWINSZ, &win) < 0) {
+		fprintf(stderr, "Failed to get the terminal size: %s\n", strerror(errno));
+		return;
+	}
+
+	win.ws_col = 80;
+	win.ws_row = 25;
+
+	if (ioctl(fd, TIOCSWINSZ, &win) < 0) {
+		fprintf(stderr, "Failed to set the terminal size: %s\n", strerror(errno));
+		return;
+	}
+
+	if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0) {
+		fprintf(stderr, "Failed to set FD_CLOEXEC: %s\n", strerror(errno));
+		return;
+	}
+
+	return;
+}
+
+void create_pty(int *master, int *slave, uid_t uid, gid_t gid) {
+	if (openpty(master, slave, NULL, NULL, NULL) < 0) {
+		fprintf(stderr, "Failed to openpty: %s\n", strerror(errno));
+		return;
+	}
+
+	configure_pty(*master);
+	configure_pty(*slave);
+
+	if (fchown(*slave, uid, gid) < 0) {
+		fprintf(stderr, "Warning: error chowning pty to container root\n");
+		fprintf(stderr, "Continuing...\n");
+	}
+	if (fchown(*master, uid, gid) < 0) {
+		fprintf(stderr, "Warning: error chowning pty to container root\n");
+		fprintf(stderr, "Continuing...\n");
+	}
+}
+
+void create_pipe(int *master, int *slave) {
+	int pipefd[2];
+
+	if (pipe2(pipefd, O_CLOEXEC) < 0) {
+		fprintf(stderr, "Failed to create a pipe: %s\n", strerror(errno));
+		return;
+	}
+
+	*master = pipefd[0];
+	*slave = pipefd[1];
+}
+
+int get_poll_revents(int lfd, int timeout, int flags, int *revents, int *saved_errno)
+{
+	int ret;
+	struct pollfd pfd = {lfd, flags, 0};
+
+again:
+	ret = poll(&pfd, 1, timeout);
+	if (ret < 0) {
+		if (errno == EINTR)
+			goto again;
+
+		*saved_errno = errno;
+		fprintf(stderr, "Failed to poll() on file descriptor.\n");
+		return -1;
+	}
+
+	*revents = pfd.revents;
+
+	return ret;
+}
+
+int lxc_abstract_unix_send_fds(int fd, int *sendfds, int num_sendfds,
+			       void *data, size_t size)
+{
+	int ret;
+	struct msghdr msg;
+	struct iovec iov;
+	struct cmsghdr *cmsg = NULL;
+	char buf[1] = {0};
+	char *cmsgbuf;
+	size_t cmsgbufsize = CMSG_SPACE(num_sendfds * sizeof(int));
+
+	memset(&msg, 0, sizeof(msg));
+	memset(&iov, 0, sizeof(iov));
+
+	cmsgbuf = malloc(cmsgbufsize);
+	if (!cmsgbuf)
+		return -1;
+
+	msg.msg_control = cmsgbuf;
+	msg.msg_controllen = cmsgbufsize;
+
+	cmsg = CMSG_FIRSTHDR(&msg);
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SCM_RIGHTS;
+	cmsg->cmsg_len = CMSG_LEN(num_sendfds * sizeof(int));
+
+	msg.msg_controllen = cmsg->cmsg_len;
+
+	memcpy(CMSG_DATA(cmsg), sendfds, num_sendfds * sizeof(int));
+
+	iov.iov_base = data ? data : buf;
+	iov.iov_len = data ? size : sizeof(buf);
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	ret = sendmsg(fd, &msg, MSG_NOSIGNAL);
+	if (ret < 0)
+		fprintf(stderr, "%s - Failed to send file descriptor\n", strerror(errno));
+	free(cmsgbuf);
+	return ret;
+}
+
+int lxc_abstract_unix_recv_fds(int fd, int *recvfds, int num_recvfds,
+			       void *data, size_t size)
+{
+	int ret;
+	struct msghdr msg;
+	struct iovec iov;
+	struct cmsghdr *cmsg = NULL;
+	char buf[1] = {0};
+	char *cmsgbuf;
+	size_t cmsgbufsize = CMSG_SPACE(num_recvfds * sizeof(int));
+
+	memset(&msg, 0, sizeof(msg));
+	memset(&iov, 0, sizeof(iov));
+
+	cmsgbuf = malloc(cmsgbufsize);
+	if (!cmsgbuf)
+		return -1;
+
+	msg.msg_control = cmsgbuf;
+	msg.msg_controllen = cmsgbufsize;
+
+	iov.iov_base = data ? data : buf;
+	iov.iov_len = data ? size : sizeof(buf);
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	ret = recvmsg(fd, &msg, 0);
+	if (ret <= 0) {
+		fprintf(stderr, "%s - Failed to receive file descriptor\n", strerror(errno));
+		goto out;
+	}
+
+	cmsg = CMSG_FIRSTHDR(&msg);
+
+	memset(recvfds, -1, num_recvfds * sizeof(int));
+	if (cmsg && cmsg->cmsg_len == CMSG_LEN(num_recvfds * sizeof(int)) &&
+	    cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+		memcpy(recvfds, CMSG_DATA(cmsg), num_recvfds * sizeof(int));
+	}
+
+out:
+	free(cmsgbuf);
+	return ret;
+}
+*/
+// #cgo CFLAGS: -std=gnu11 -Wvla
+import "C"
+
+const ABSTRACT_UNIX_SOCK_LEN int = C.ABSTRACT_UNIX_SOCK_LEN
+
+const POLLIN int = C.POLLIN
+const POLLPRI int = C.POLLPRI
+const POLLNVAL int = C.POLLNVAL
+const POLLERR int = C.POLLERR
+const POLLHUP int = C.POLLHUP
+const POLLRDHUP int = C.POLLRDHUP
+
+func GetPollRevents(fd int, timeout int, flags int) (int, int, error) {
+	var err error
+	revents := C.int(0)
+	saved_errno := C.int(0)
+
+	ret := C.get_poll_revents(C.int(fd), C.int(timeout), C.int(flags), &revents, &saved_errno)
+	if int(ret) < 0 {
+		err = syscall.Errno(saved_errno)
+	}
+
+	return int(ret), int(revents), err
+}
+
+func AbstractUnixSendFd(sockFD int, sendFD int) error {
+	fd := C.int(sendFD)
+	sk_fd := C.int(sockFD)
+	ret := C.lxc_abstract_unix_send_fds(sk_fd, &fd, C.int(1), nil, C.size_t(0))
+	if ret < 0 {
+		return fmt.Errorf("Failed to send file descriptor via abstract unix socket")
+	}
+
+	return nil
+}
+
+func AbstractUnixReceiveFd(sockFD int) (*os.File, error) {
+	fd := C.int(-1)
+	sk_fd := C.int(sockFD)
+	ret := C.lxc_abstract_unix_recv_fds(sk_fd, &fd, C.int(1), nil, C.size_t(0))
+	if ret < 0 {
+		return nil, fmt.Errorf("Failed to receive file descriptor via abstract unix socket")
+	}
+
+	file := os.NewFile(uintptr(fd), "")
+	return file, nil
+}
+
+func OpenPty(uid, gid int64) (master *os.File, slave *os.File, err error) {
+	fd_master := C.int(-1)
+	fd_slave := C.int(-1)
+	rootUid := C.uid_t(uid)
+	rootGid := C.gid_t(gid)
+
+	C.create_pty(&fd_master, &fd_slave, rootUid, rootGid)
+
+	if fd_master == -1 || fd_slave == -1 {
+		return nil, nil, errors.New("Failed to create a new pts pair")
+	}
+
+	master = os.NewFile(uintptr(fd_master), "master")
+	slave = os.NewFile(uintptr(fd_slave), "slave")
+
+	return master, slave, nil
+}
+
+func Pipe() (master *os.File, slave *os.File, err error) {
+	fd_master := C.int(-1)
+	fd_slave := C.int(-1)
+
+	C.create_pipe(&fd_master, &fd_slave)
+
+	if fd_master == -1 || fd_slave == -1 {
+		return nil, nil, errors.New("Failed to create a new pipe")
+	}
+
+	master = os.NewFile(uintptr(fd_master), "master")
+	slave = os.NewFile(uintptr(fd_slave), "slave")
+
+	return master, slave, nil
+}
+
+// UserId is an adaption from https://codereview.appspot.com/4589049.
+func UserId(name string) (int, error) {
+	var pw C.struct_passwd
+	var result *C.struct_passwd
+
+	bufSize := C.sysconf(C._SC_GETPW_R_SIZE_MAX)
+	if bufSize < 0 {
+		bufSize = 4096
+	}
+
+	buf := C.malloc(C.size_t(bufSize))
+	if buf == nil {
+		return -1, fmt.Errorf("allocation failed")
+	}
+	defer C.free(buf)
+
+	cname := C.CString(name)
+	defer C.free(unsafe.Pointer(cname))
+
+again:
+	rv, errno := C.getpwnam_r(cname,
+		&pw,
+		(*C.char)(buf),
+		C.size_t(bufSize),
+		&result)
+	if rv < 0 {
+		// OOM killer will take care of us if we end up doing this too
+		// often.
+		if errno == syscall.ERANGE {
+			bufSize *= 2
+			tmp := C.realloc(buf, C.size_t(bufSize))
+			if tmp == nil {
+				return -1, fmt.Errorf("allocation failed")
+			}
+			buf = tmp
+			goto again
+		}
+		return -1, fmt.Errorf("failed user lookup: %s", syscall.Errno(rv))
+	}
+
+	if result == nil {
+		return -1, fmt.Errorf("unknown user %s", name)
+	}
+
+	return int(C.int(result.pw_uid)), nil
+}
+
+// GroupId is an adaption from https://codereview.appspot.com/4589049.
+func GroupId(name string) (int, error) {
+	var grp C.struct_group
+	var result *C.struct_group
+
+	bufSize := C.sysconf(C._SC_GETGR_R_SIZE_MAX)
+	if bufSize < 0 {
+		bufSize = 4096
+	}
+
+	buf := C.malloc(C.size_t(bufSize))
+	if buf == nil {
+		return -1, fmt.Errorf("allocation failed")
+	}
+
+	cname := C.CString(name)
+	defer C.free(unsafe.Pointer(cname))
+
+again:
+	rv, errno := C.getgrnam_r(cname,
+		&grp,
+		(*C.char)(buf),
+		C.size_t(bufSize),
+		&result)
+	if rv != 0 {
+		// OOM killer will take care of us if we end up doing this too
+		// often.
+		if errno == syscall.ERANGE {
+			bufSize *= 2
+			tmp := C.realloc(buf, C.size_t(bufSize))
+			if tmp == nil {
+				return -1, fmt.Errorf("allocation failed")
+			}
+			buf = tmp
+			goto again
+		}
+
+		C.free(buf)
+		return -1, fmt.Errorf("failed group lookup: %s", syscall.Errno(rv))
+	}
+	C.free(buf)
+
+	if result == nil {
+		return -1, fmt.Errorf("unknown group %s", name)
+	}
+
+	return int(C.int(result.gr_gid)), nil
+}
+
+// Extensively commented directly in the code. Please leave the comments!
+// Looking at this in a couple of months noone will know why and how this works
+// anymore.
+func ExecReaderToChannel(r io.Reader, bufferSize int, exited <-chan bool, fd int) <-chan []byte {
+	if bufferSize <= (128 * 1024) {
+		bufferSize = (128 * 1024)
+	}
+
+	ch := make(chan ([]byte))
+
+	// Takes care that the closeChannel() function is exactly executed once.
+	// This allows us to avoid using a mutex.
+	var once sync.Once
+	closeChannel := func() {
+		close(ch)
+	}
+
+	// [1]: This function has just one job: Dealing with the case where we
+	// are running an interactive shell session where we put a process in
+	// the background that does hold stdin/stdout open, but does not
+	// generate any output at all. This case cannot be dealt with in the
+	// following function call. Here's why: Assume the above case, now the
+	// attached child (the shell in this example) exits. This will not
+	// generate any poll() event: We won't get POLLHUP because the
+	// background process is holding stdin/stdout open and noone is writing
+	// to it. So we effectively block on GetPollRevents() in the function
+	// below. Hence, we use another go routine here who's only job is to
+	// handle that case: When we detect that the child has exited we check
+	// whether a POLLIN or POLLHUP event has been generated. If not, we know
+	// that there's nothing buffered on stdout and exit.
+	var attachedChildIsDead int32 = 0
+	go func() {
+		<-exited
+
+		atomic.StoreInt32(&attachedChildIsDead, 1)
+
+		ret, revents, err := GetPollRevents(fd, 0, (POLLIN | POLLPRI | POLLERR | POLLHUP | POLLRDHUP | POLLNVAL))
+		if ret < 0 {
+			logger.Errorf("Failed to poll(POLLIN | POLLPRI | POLLHUP | POLLRDHUP) on file descriptor: %s.", err)
+		} else if ret > 0 {
+			if (revents & POLLERR) > 0 {
+				logger.Warnf("Detected poll(POLLERR) event.")
+			} else if (revents & POLLNVAL) > 0 {
+				logger.Warnf("Detected poll(POLLNVAL) event.")
+			}
+		} else if ret == 0 {
+			logger.Debugf("No data in stdout: exiting.")
+			once.Do(closeChannel)
+			return
+		}
+	}()
+
+	go func() {
+		readSize := (128 * 1024)
+		offset := 0
+		buf := make([]byte, bufferSize)
+		avoidAtomicLoad := false
+
+		defer once.Do(closeChannel)
+		for {
+			nr := 0
+			var err error
+
+			ret, revents, err := GetPollRevents(fd, -1, (POLLIN | POLLPRI | POLLERR | POLLHUP | POLLRDHUP | POLLNVAL))
+			if ret < 0 {
+				// This condition is only reached in cases where we are massively f*cked since we even handle
+				// EINTR in the underlying C wrapper around poll(). So let's exit here.
+				logger.Errorf("Failed to poll(POLLIN | POLLPRI | POLLERR | POLLHUP | POLLRDHUP) on file descriptor: %s. Exiting.", err)
+				return
+			}
+
+			// [2]: If the process exits before all its data has been read by us and no other process holds stdin or
+			// stdout open, then we will observe a (POLLHUP | POLLRDHUP | POLLIN) event. This means, we need to
+			// keep on reading from the pty file descriptor until we get a simple POLLHUP back.
+			both := ((revents & (POLLIN | POLLPRI)) > 0) && ((revents & (POLLHUP | POLLRDHUP)) > 0)
+			if both {
+				logger.Debugf("Detected poll(POLLIN | POLLPRI | POLLHUP | POLLRDHUP) event.")
+				read := buf[offset : offset+readSize]
+				nr, err = r.Read(read)
+			}
+
+			if (revents & POLLERR) > 0 {
+				logger.Warnf("Detected poll(POLLERR) event: exiting.")
+				return
+			} else if (revents & POLLNVAL) > 0 {
+				logger.Warnf("Detected poll(POLLNVAL) event: exiting.")
+				return
+			}
+
+			if ((revents & (POLLIN | POLLPRI)) > 0) && !both {
+				// This might appear unintuitive at first but is actually a nice trick: Assume we are running
+				// a shell session in a container and put a process in the background that is writing to
+				// stdout. Now assume the attached process (aka the shell in this example) exits because we
+				// used Ctrl+D to send EOF or something. If no other process would be holding stdout open we
+				// would expect to observe either a (POLLHUP | POLLRDHUP | POLLIN | POLLPRI) event if there
+				// is still data buffered from the previous process or a simple (POLLHUP | POLLRDHUP) if
+				// no data is buffered. The fact that we only observe a (POLLIN | POLLPRI) event means that
+				// another process is holding stdout open and is writing to it.
+				// One counter argument that can be leveraged is (brauner looks at tycho :))
+				// "Hey, you need to write at least one additional tty buffer to make sure that
+				// everything that the attached child has written is actually shown."
+				// The answer to that is:
+				// "This case can only happen if the process has exited and has left data in stdout which
+				// would generate a (POLLIN | POLLPRI | POLLHUP | POLLRDHUP) event and this case is already
+				// handled and triggers another codepath. (See [2].)"
+				if avoidAtomicLoad || atomic.LoadInt32(&attachedChildIsDead) == 1 {
+					avoidAtomicLoad = true
+					// Handle race between atomic.StorInt32() in the go routine
+					// explained in [1] and atomic.LoadInt32() in the go routine
+					// here:
+					// We need to check for (POLLHUP | POLLRDHUP) here again since we might
+					// still be handling a pure POLLIN event from a write prior to the childs
+					// exit. But the child might have exited right before and performed
+					// atomic.StoreInt32() to update attachedChildIsDead before we
+					// performed our atomic.LoadInt32(). This means we accidentally hit this
+					// codepath and are misinformed about the available poll() events. So we
+					// need to perform a non-blocking poll() again to exclude that case:
+					//
+					// - If we detect no (POLLHUP | POLLRDHUP) event we know the child
+					//   has already exited but someone else is holding stdin/stdout open and
+					//   writing to it.
+					//   Note that his case should only ever be triggered in situations like
+					//   running a shell and doing stuff like:
+					//    > ./lxc exec xen1 -- bash
+					//   root at xen1:~# yes &
+					//   .
+					//   .
+					//   .
+					//   now send Ctrl+D or type "exit". By the time the Ctrl+D/exit event is
+					//   triggered, we will have read all of the childs data it has written to
+					//   stdout and so we can assume that anything that comes now belongs to
+					//   the process that is holding stdin/stdout open.
+					//
+					// - If we detect a (POLLHUP | POLLRDHUP) event we know that we've
+					//   hit this codepath on accident caused by the race between
+					//   atomic.StoreInt32() in the go routine explained in [1] and
+					//   atomic.LoadInt32() in this go routine. So the next call to
+					//   GetPollRevents() will either return
+					//   (POLLIN | POLLPRI | POLLERR | POLLHUP | POLLRDHUP)
+					//   or (POLLHUP | POLLRDHUP). Both will trigger another codepath (See [2].)
+					//   that takes care that all data of the child that is buffered in
+					//   stdout is written out.
+					ret, revents, err := GetPollRevents(fd, 0, (POLLIN | POLLPRI | POLLERR | POLLHUP | POLLRDHUP | POLLNVAL))
+					if ret < 0 {
+						logger.Errorf("Failed to poll(POLLIN | POLLPRI | POLLERR | POLLHUP | POLLRDHUP) on file descriptor: %s. Exiting.", err)
+						return
+					} else if (revents & (POLLHUP | POLLRDHUP | POLLERR | POLLNVAL)) == 0 {
+						logger.Debugf("Exiting but background processes are still running.")
+						return
+					}
+				}
+				read := buf[offset : offset+readSize]
+				nr, err = r.Read(read)
+			}
+
+			// The attached process has exited and we have read all data that may have
+			// been buffered.
+			if ((revents & (POLLHUP | POLLRDHUP)) > 0) && !both {
+				logger.Debugf("Detected poll(POLLHUP) event: exiting.")
+				return
+			}
+
+			offset += nr
+			if offset > 0 && (offset+readSize >= bufferSize || err != nil) {
+				ch <- buf[0:offset]
+				offset = 0
+				buf = make([]byte, bufferSize)
+			}
+		}
+	}()
+
+	return ch
+}
+


More information about the lxc-devel mailing list