[lxc-devel] [lxc/master] ringbuffer: implement efficient and performant ringbuffer

brauner on Github lxc-bot at linuxcontainers.org
Wed Oct 18 20:08:01 UTC 2017


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 2659 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20171018/4c0f6992/attachment.bin>
-------------- next part --------------
From e574ded08b246f3457c9d804e559324c676761c8 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 18 Oct 2017 13:36:36 +0200
Subject: [PATCH 01/11] utils: move memfd_create() definition

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/conf.c  | 57 ---------------------------------------------------------
 src/lxc/utils.h | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 57 deletions(-)

diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 88ed2b7a6..3b1e422a3 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -68,10 +68,6 @@
 #include <../include/openpty.h>
 #endif
 
-#ifdef HAVE_LINUX_MEMFD_H
-#include <linux/memfd.h>
-#endif
-
 #include "af_unix.h"
 #include "caps.h"       /* for lxc_caps_last_cap() */
 #include "cgroup.h"
@@ -181,59 +177,6 @@ static int sethostname(const char * name, size_t len)
 #define MS_LAZYTIME (1<<25)
 #endif
 
-/* memfd_create() */
-#ifndef MFD_CLOEXEC
-#define MFD_CLOEXEC 0x0001U
-#endif
-
-#ifndef MFD_ALLOW_SEALING
-#define MFD_ALLOW_SEALING 0x0002U
-#endif
-
-#ifndef HAVE_MEMFD_CREATE
-static int memfd_create(const char *name, unsigned int flags) {
-	#ifndef __NR_memfd_create
-		#if defined __i386__
-			#define __NR_memfd_create 356
-		#elif defined __x86_64__
-			#define __NR_memfd_create 319
-		#elif defined __arm__
-			#define __NR_memfd_create 385
-		#elif defined __aarch64__
-			#define __NR_memfd_create 279
-		#elif defined __s390__
-			#define __NR_memfd_create 350
-		#elif defined __powerpc__
-			#define __NR_memfd_create 360
-		#elif defined __sparc__
-			#define __NR_memfd_create 348
-		#elif defined __blackfin__
-			#define __NR_memfd_create 390
-		#elif defined __ia64__
-			#define __NR_memfd_create 1340
-		#elif defined _MIPS_SIM
-			#if _MIPS_SIM == _MIPS_SIM_ABI32
-				#define __NR_memfd_create 4354
-			#endif
-			#if _MIPS_SIM == _MIPS_SIM_NABI32
-				#define __NR_memfd_create 6318
-			#endif
-			#if _MIPS_SIM == _MIPS_SIM_ABI64
-				#define __NR_memfd_create 5314
-			#endif
-		#endif
-	#endif
-	#ifdef __NR_memfd_create
-	return syscall(__NR_memfd_create, name, flags);
-	#else
-	errno = ENOSYS;
-	return -1;
-	#endif
-}
-#else
-extern int memfd_create(const char *name, unsigned int flags);
-#endif
-
 char *lxchook_names[NUM_LXC_HOOKS] = {"pre-start", "pre-mount", "mount",
 				      "autodev",   "start",     "stop",
 				      "post-stop", "clone",     "destroy",
diff --git a/src/lxc/utils.h b/src/lxc/utils.h
index 41c43827a..2005aa61e 100644
--- a/src/lxc/utils.h
+++ b/src/lxc/utils.h
@@ -39,6 +39,10 @@
 #include <sys/types.h>
 #include <sys/vfs.h>
 
+#ifdef HAVE_LINUX_MEMFD_H
+#include <linux/memfd.h>
+#endif
+
 #include "initutils.h"
 
 /* Define __S_ISTYPE if missing from the C library. */
@@ -184,6 +188,59 @@ static inline int signalfd(int fd, const sigset_t *mask, int flags)
 #define LOOP_CTL_GET_FREE 0x4C82
 #endif
 
+/* memfd_create() */
+#ifndef MFD_CLOEXEC
+#define MFD_CLOEXEC 0x0001U
+#endif
+
+#ifndef MFD_ALLOW_SEALING
+#define MFD_ALLOW_SEALING 0x0002U
+#endif
+
+#ifndef HAVE_MEMFD_CREATE
+static inline int memfd_create(const char *name, unsigned int flags) {
+	#ifndef __NR_memfd_create
+		#if defined __i386__
+			#define __NR_memfd_create 356
+		#elif defined __x86_64__
+			#define __NR_memfd_create 319
+		#elif defined __arm__
+			#define __NR_memfd_create 385
+		#elif defined __aarch64__
+			#define __NR_memfd_create 279
+		#elif defined __s390__
+			#define __NR_memfd_create 350
+		#elif defined __powerpc__
+			#define __NR_memfd_create 360
+		#elif defined __sparc__
+			#define __NR_memfd_create 348
+		#elif defined __blackfin__
+			#define __NR_memfd_create 390
+		#elif defined __ia64__
+			#define __NR_memfd_create 1340
+		#elif defined _MIPS_SIM
+			#if _MIPS_SIM == _MIPS_SIM_ABI32
+				#define __NR_memfd_create 4354
+			#endif
+			#if _MIPS_SIM == _MIPS_SIM_NABI32
+				#define __NR_memfd_create 6318
+			#endif
+			#if _MIPS_SIM == _MIPS_SIM_ABI64
+				#define __NR_memfd_create 5314
+			#endif
+		#endif
+	#endif
+	#ifdef __NR_memfd_create
+	return syscall(__NR_memfd_create, name, flags);
+	#else
+	errno = ENOSYS;
+	return -1;
+	#endif
+}
+#else
+extern int memfd_create(const char *name, unsigned int flags);
+#endif
+
 /* Struct to carry child pid from lxc_popen() to lxc_pclose().
  * Not an opaque struct to allow direct access to the underlying FILE *
  * (i.e., struct lxc_popen_FILE *file; fgets(buf, sizeof(buf), file->f))

From e16d7a57c8b3251203952b3b8041bea16c8d9fa4 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 18 Oct 2017 19:48:29 +0200
Subject: [PATCH 02/11] utils: add lxc_cloexec()

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/utils.c | 16 ++++++++++++++++
 src/lxc/utils.h |  1 +
 2 files changed, 17 insertions(+)

diff --git a/src/lxc/utils.c b/src/lxc/utils.c
index 4c886cadd..68ff5cfc3 100644
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -2339,3 +2339,19 @@ bool lxc_nic_exists(char *nic)
 
 	return true;
 }
+
+int lxc_cloexec(int fd, bool set)
+{
+	int oflags;
+
+	oflags = fcntl(fd, F_GETFD, 0);
+	if (oflags < 0)
+		return -1;
+
+	if (set)
+		oflags |= FD_CLOEXEC;
+	else
+		oflags &= ~FD_CLOEXEC;
+
+	return fcntl(fd, F_SETFD, oflags);
+}
diff --git a/src/lxc/utils.h b/src/lxc/utils.h
index 2005aa61e..1d2732589 100644
--- a/src/lxc/utils.h
+++ b/src/lxc/utils.h
@@ -461,5 +461,6 @@ typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic;
 extern bool has_fs_type(const char *path, fs_type_magic magic_val);
 extern bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val);
 extern bool lxc_nic_exists(char *nic);
+extern int lxc_cloexec(int fd, bool set);
 
 #endif /* __LXC_UTILS_H */

From f50e3cbf9051f3e72f6f0e95fd8b45492b5875f4 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 18 Oct 2017 19:48:57 +0200
Subject: [PATCH 03/11] utils: add lxc_make_tmpfile()

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/utils.c | 25 +++++++++++++++++++++++++
 src/lxc/utils.h |  1 +
 2 files changed, 26 insertions(+)

diff --git a/src/lxc/utils.c b/src/lxc/utils.c
index 68ff5cfc3..52bd88e80 100644
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -2355,3 +2355,28 @@ int lxc_cloexec(int fd, bool set)
 
 	return fcntl(fd, F_SETFD, oflags);
 }
+
+int lxc_make_tmpfile(char *template, int suffixlen, bool cloexec, bool rm)
+{
+	int fd, ret;
+
+	fd = mkstemps(template, suffixlen);
+	if (fd < 0)
+		return -1;
+
+	if (rm) {
+		ret = unlink(template);
+		if (ret < 0)
+			goto on_error;
+	}
+
+	ret = lxc_cloexec(fd, cloexec);
+	if (ret < 0)
+		goto on_error;
+
+	return fd;
+
+on_error:
+	close(fd);
+	return -1;
+}
diff --git a/src/lxc/utils.h b/src/lxc/utils.h
index 1d2732589..902635d32 100644
--- a/src/lxc/utils.h
+++ b/src/lxc/utils.h
@@ -462,5 +462,6 @@ extern bool has_fs_type(const char *path, fs_type_magic magic_val);
 extern bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val);
 extern bool lxc_nic_exists(char *nic);
 extern int lxc_cloexec(int fd, bool set);
+extern int lxc_make_tmpfile(char *template, int suffixlen, bool cloexec, bool rm);
 
 #endif /* __LXC_UTILS_H */

From bf3c2b3a37d413d766e31cd9c62dbb5010998454 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 18 Oct 2017 14:19:31 +0200
Subject: [PATCH 04/11] ringbuf: implement simple and efficient ringbuffer

liblxc will use a ringbuffer implementation that employs mmap()ed memory.
Specifically, the ringbuffer will create an anonymous memory mapping twice the
requested size for the ringbuffer. Afterwards, an in-memory file the requested
size for the ringbuffer will be created. This in-memory file will then be
memory mapped twice into the previously established anonymous memory mapping
thereby effectively splitting the anoymous memory mapping in two halves of
equal size.  This will allow the ringbuffer to get rid of any complex boundary
and wrap-around calculation logic. Since the underlying physical memory is the
same in both halves of the memory mapping only a single memcpy() call for both
reads and writes from and to the ringbuffer is needed.

Design Notes:
- Since we're using MAP_FIXED memory mappings to map the same in-memory file
  twice into the anonymous memory mapping the kernel requires us to always
  operate on properly aligned pages. To guarantee proper page aligment the size
  of the ringbuffer must always be a muliple of the kernel's page size. This
  also implies that the minimum size of the ringbuffer must be at least equal to
  one page size. This additional requirement is reasonably unproblematic.
  First, any ringbuffer smaller than the size of a single page is very likely
  useless since the standard page size on linux is 4096 bytes.
- Because liblxc is not able to predict the output a user is going to produce
  (e.g. users could cat binary files onto the console) and because the
  ringbuffer is located in a hotpath and needs to be as performant as possible
  liblxc will not parse the buffer.

Use Case:
The ringbuffer is needed by liblxc in order to safely log the output of write
intensive callers that produce unpredictable output or unpredictable amounts of
output. The console output created by a booting system and the user is one of
those cases. Allowing a container to log the console's output to a file it
would be possible for a malicious user to fill up the host filesystem by
producing random ouput on the container's console if quota support is either
not enabled or not available for the underlying filesystem. Using a ringbuffer
is a reliable and secure way to ensure a fixed-size log.

Closes #1857.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/Makefile.am |   1 +
 src/lxc/ringbuf.c   | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/lxc/ringbuf.h   |  90 +++++++++++++++++++++++++++++++
 3 files changed, 241 insertions(+)
 create mode 100644 src/lxc/ringbuf.c
 create mode 100644 src/lxc/ringbuf.h

diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am
index b71992d75..fff32ae4f 100644
--- a/src/lxc/Makefile.am
+++ b/src/lxc/Makefile.am
@@ -116,6 +116,7 @@ liblxc_la_SOURCES = \
 	log.c log.h \
 	attach.c attach.h \
 	criu.c criu.h \
+	ringbuf.c ringbuf.h \
 	\
 	network.c network.h \
 	nl.c nl.h \
diff --git a/src/lxc/ringbuf.c b/src/lxc/ringbuf.c
new file mode 100644
index 000000000..a78bac0c4
--- /dev/null
+++ b/src/lxc/ringbuf.c
@@ -0,0 +1,150 @@
+/* liblxcapi
+ *
+ * Copyright © 2017 Christian Brauner <christian.brauner at ubuntu.com>.
+ * Copyright © 2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+#define __STDC_FORMAT_MACROS
+#include <errno.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include "ringbuf.h"
+#include "utils.h"
+
+int lxc_ringbuf_create(struct lxc_ringbuf *buf, size_t size)
+{
+	char *tmp;
+	int ret;
+	int memfd = -1;
+
+	buf->size = size;
+	buf->r_off = 0;
+	buf->w_off = 0;
+
+	/* verify that we are at least given the multiple of a page size */
+	if (buf->size % getpagesize())
+		return -EINVAL;
+
+	buf->addr = mmap(NULL, buf->size * 2, PROT_NONE,
+			 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (buf->addr == MAP_FAILED)
+		return -EINVAL;
+
+	/* For kernels < 3.17 we can fallback to mkstemp() + unlink(). Not
+	 * required for the RFC.
+	 */
+	memfd = memfd_create("lxc_ringbuf", MFD_CLOEXEC);
+	if (memfd < 0) {
+		if (errno != ENOSYS)
+			goto on_error;
+
+		memfd = lxc_make_tmpfile((char *){"lxc_ringbuf_XXXXXXXX"}, 8,
+					 true, true);
+	}
+	if (memfd < 0)
+		goto on_error;
+
+	ret = ftruncate(memfd, buf->size);
+	if (ret < 0)
+		goto on_error;
+
+	tmp = mmap(buf->addr, buf->size, PROT_READ | PROT_WRITE,
+		   MAP_FIXED | MAP_SHARED, memfd, 0);
+	if (tmp == MAP_FAILED || tmp != buf->addr)
+		goto on_error;
+
+	tmp = mmap(buf->addr + buf->size, buf->size, PROT_READ | PROT_WRITE,
+		   MAP_FIXED | MAP_SHARED, memfd, 0);
+	if (tmp == MAP_FAILED || tmp != (buf->addr + buf->size))
+		goto on_error;
+
+	close(memfd);
+
+	return 0;
+
+on_error:
+	lxc_ringbuf_release(buf);
+	if (memfd >= 0)
+		close(memfd);
+	return -1;
+}
+
+void lxc_ringbuf_move_read_addr(struct lxc_ringbuf *buf, size_t len)
+{
+	buf->r_off += len;
+
+	if (buf->r_off < buf->size)
+		return;
+	fprintf(stderr, "BLA\n");
+
+	/* wrap around */
+	buf->r_off -= buf->size;
+	buf->w_off -= buf->size;
+}
+
+/**
+ * lxc_ringbuf_write - write a message to the ringbuffer
+ * - The size of the message should never be greater than the size of the whole
+ *   ringbuffer.
+ * - The write method will always succeed i.e. it will always advance the r_off
+ *   if it detects that there's not enough space available to write the
+ *   message.
+ */
+int lxc_ringbuf_write(struct lxc_ringbuf *buf, const char *msg, size_t len)
+{
+	char *w_addr;
+	uint64_t free;
+
+	/* sanity check: a write should never exceed the ringbuffer's total size */
+	if (len > buf->size)
+		return -EFBIG;
+
+	free = lxc_ringbuf_free(buf);
+
+	/* not enough space left so advance read address */
+	if (len > free)
+		lxc_ringbuf_move_read_addr(buf, len);
+	w_addr = lxc_ringbuf_get_write_addr(buf);
+	memcpy(w_addr, msg, len);
+	lxc_ringbuf_move_write_addr(buf, len);
+	return 0;
+}
+
+int lxc_ringbuf_read(struct lxc_ringbuf *buf, char *out, size_t *len)
+{
+	uint64_t used;
+
+	/* there's nothing to read */
+	if (buf->r_off == buf->w_off)
+		return -ENODATA;
+
+	/* read maximum amount available */
+	used = lxc_ringbuf_used(buf);
+	if (used < *len)
+		*len = used;
+
+	/* copy data to reader but don't advance addr */
+	memcpy(out, lxc_ringbuf_get_read_addr(buf), *len);
+	out[*len - 1] = '\0';
+	return 0;
+}
diff --git a/src/lxc/ringbuf.h b/src/lxc/ringbuf.h
new file mode 100644
index 000000000..0e8e7922f
--- /dev/null
+++ b/src/lxc/ringbuf.h
@@ -0,0 +1,90 @@
+/* liblxcapi
+ *
+ * Copyright © 2017 Christian Brauner <christian.brauner at ubuntu.com>.
+ * Copyright © 2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef __LXC_RINGBUF_H
+#define __LXC_RINGBUF_H
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/mman.h>
+
+/**
+ * lxc_ringbuf - Implements a simple and efficient memory mapped ringbuffer.
+ * - The "addr" field of struct lxc_ringbuf is considered immutable. Instead the
+ *   read and write offsets r_off and w_off are used to calculate the current
+ *   read and write addresses. There should never be a need to use any of those
+ *   fields directly. Instead use the appropriate helpers below.
+ * - Callers are expected to synchronize read and write accesses to the
+ *   ringbuffer.
+ */
+struct lxc_ringbuf {
+	char *addr; /* start address of the ringbuffer */
+	uint64_t size; /* total size of the ringbuffer in bytes */
+	uint64_t r_off; /* read offset */
+	uint64_t w_off; /* write offset */
+};
+
+/**
+ * lxc_ringbuf_create - Initialize a new ringbuffer.
+ *
+ * @param[in] size	Size of the new ringbuffer as a power of 2.
+ */
+extern int lxc_ringbuf_create(struct lxc_ringbuf *buf, size_t size);
+extern void lxc_ringbuf_move_read_addr(struct lxc_ringbuf *buf, size_t len);
+extern int lxc_ringbuf_write(struct lxc_ringbuf *buf, const char *msg, size_t len);
+extern int lxc_ringbuf_read(struct lxc_ringbuf *buf, char *out, size_t *len);
+
+static inline void lxc_ringbuf_release(struct lxc_ringbuf *buf)
+{
+	munmap(buf->addr, buf->size * 2);
+}
+
+static inline void lxc_ringbuf_clear(struct lxc_ringbuf *buf)
+{
+	buf->r_off = 0;
+	buf->w_off = 0;
+}
+
+static inline uint64_t lxc_ringbuf_used(struct lxc_ringbuf *buf)
+{
+	return buf->w_off - buf->r_off;
+}
+
+static inline uint64_t lxc_ringbuf_free(struct lxc_ringbuf *buf)
+{
+	return buf->size - lxc_ringbuf_used(buf);
+}
+
+static inline char *lxc_ringbuf_get_read_addr(struct lxc_ringbuf *buf)
+{
+	return buf->addr + buf->r_off;
+}
+
+static inline char *lxc_ringbuf_get_write_addr(struct lxc_ringbuf *buf)
+{
+	return buf->addr + buf->w_off;
+}
+
+static inline void lxc_ringbuf_move_write_addr(struct lxc_ringbuf *buf, size_t len)
+{
+	buf->w_off += len;
+}
+
+#endif /* __LXC_RINGBUF_H */

From 0e201f6fca12992b23403192982e6d366b4322b7 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 18 Oct 2017 19:53:17 +0200
Subject: [PATCH 05/11] utils: add lxc_safe_long_long()

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/utils.c | 20 ++++++++++++++++++++
 src/lxc/utils.h |  1 +
 2 files changed, 21 insertions(+)

diff --git a/src/lxc/utils.c b/src/lxc/utils.c
index 52bd88e80..d6f5b140b 100644
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -2003,6 +2003,26 @@ int lxc_safe_long(const char *numstr, long int *converted)
 	return 0;
 }
 
+int lxc_safe_long_long(const char *numstr, long long int *converted)
+{
+	char *err = NULL;
+	signed long int sli;
+
+	errno = 0;
+	sli = strtoll(numstr, &err, 0);
+	if (errno == ERANGE && (sli == LLONG_MAX || sli == LLONG_MIN))
+		return -ERANGE;
+
+	if (errno != 0 && sli == 0)
+		return -EINVAL;
+
+	if (err == numstr || *err != '\0')
+		return -EINVAL;
+
+	*converted = sli;
+	return 0;
+}
+
 int lxc_switch_uid_gid(uid_t uid, gid_t gid)
 {
 	if (setgid(gid) < 0) {
diff --git a/src/lxc/utils.h b/src/lxc/utils.h
index 902635d32..b98435cb1 100644
--- a/src/lxc/utils.h
+++ b/src/lxc/utils.h
@@ -414,6 +414,7 @@ extern bool task_blocking_signal(pid_t pid, int signal);
 extern int lxc_safe_uint(const char *numstr, unsigned int *converted);
 extern int lxc_safe_int(const char *numstr, int *converted);
 extern int lxc_safe_long(const char *numstr, long int *converted);
+extern int lxc_safe_long_long(const char *numstr, long long int *converted);
 extern int lxc_safe_ulong(const char *numstr, unsigned long *converted);
 
 /* Switch to a new uid and gid. */

From 97f357bd62017e647fd3a14040b140a301853d62 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 18 Oct 2017 19:53:37 +0200
Subject: [PATCH 06/11] utils: parse_byte_size_string()

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/utils.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/lxc/utils.h |  2 ++
 2 files changed, 79 insertions(+)

diff --git a/src/lxc/utils.c b/src/lxc/utils.c
index d6f5b140b..e6c84a0d5 100644
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -47,6 +47,7 @@
 #include "log.h"
 #include "lxclock.h"
 #include "namespace.h"
+#include "parse.h"
 #include "utils.h"
 
 #ifndef O_PATH
@@ -2400,3 +2401,79 @@ int lxc_make_tmpfile(char *template, int suffixlen, bool cloexec, bool rm)
 	close(fd);
 	return -1;
 }
+
+int parse_byte_size_string(const char *s, int64_t *converted)
+{
+	int ret, suffix_len;
+	long long int conv;
+	int64_t mltpl, overflow;
+	char *end;
+	char dup[LXC_NUMSTRLEN64 + 2];
+	char suffix[3];
+
+	if (!s || !strcmp(s, "")) {
+		*converted = 0;
+		return 0;
+	}
+
+	end = stpncpy(dup, s, sizeof(dup));
+	if (*end != '\0')
+		return -EINVAL;
+
+	/* this is simply a \0-byte */
+	if (end == dup)
+		return -EINVAL;
+
+	if (isdigit(*(end - 1)))
+		suffix_len = 0;
+	else if (isalpha(*(end - 1)))
+		suffix_len = 1;
+	else
+		return -EINVAL;
+
+	/* there's simply a single letter */
+	if ((end - 2) == dup) {
+		if (isalpha(*(end - 2)))
+			suffix_len = 2;
+		else
+			return -EINVAL;
+
+		if (suffix_len == 2 && !isdigit(*(end - 2)))
+			return -EINVAL;
+	} else if (isalpha(*(end - 2))) {
+		suffix_len = 2;
+	}
+
+	if (suffix_len > 0) {
+		memcpy(suffix, end - suffix_len, suffix_len);
+		suffix[2] = '\0';
+		*(end - suffix_len) = '\0';
+	}
+
+	dup[lxc_char_right_gc(dup, end - dup)] = '\0';
+
+	ret = lxc_safe_long_long(dup, &conv);
+	if (ret < 0)
+		return -ret;
+
+	if (suffix_len != 2) {
+		*converted = conv;
+		return 0;
+	}
+
+	if (!strcmp(suffix, "kB"))
+		mltpl = 1024;
+	else if (!strcmp(suffix, "MB"))
+		mltpl = 1024 * 1024;
+	else if (!strcmp(suffix, "GB"))
+		mltpl = 1024 * 1024 * 1024;
+	else
+		return -EINVAL;
+
+	overflow = conv * mltpl;
+	if (conv != 0 && (overflow / conv) != mltpl)
+		return -ERANGE;
+
+	*converted = overflow;
+	return 0;
+}
diff --git a/src/lxc/utils.h b/src/lxc/utils.h
index b98435cb1..aca4e8f18 100644
--- a/src/lxc/utils.h
+++ b/src/lxc/utils.h
@@ -416,6 +416,8 @@ extern int lxc_safe_int(const char *numstr, int *converted);
 extern int lxc_safe_long(const char *numstr, long int *converted);
 extern int lxc_safe_long_long(const char *numstr, long long int *converted);
 extern int lxc_safe_ulong(const char *numstr, unsigned long *converted);
+/* Handles B, kb, MB, GB. Detects overflows and reports -ERANGE. */
+extern int parse_byte_size_string(const char *s, int64_t *converted);
 
 /* Switch to a new uid and gid. */
 extern int lxc_switch_uid_gid(uid_t uid, gid_t gid);

From 73c9faa084d61111f2523b00081ef7fb71944552 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 18 Oct 2017 21:20:37 +0200
Subject: [PATCH 07/11] utils: add lxc_find_next_power2()

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/utils.c | 12 ++++++++++++
 src/lxc/utils.h |  1 +
 2 files changed, 13 insertions(+)

diff --git a/src/lxc/utils.c b/src/lxc/utils.c
index e6c84a0d5..7266b774f 100644
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -2477,3 +2477,15 @@ int parse_byte_size_string(const char *s, int64_t *converted)
 	*converted = overflow;
 	return 0;
 }
+
+uint64_t lxc_find_next_power2(uint64_t n)
+{
+	if (!(n & (n - 1)))
+		return n;
+
+	while (n & (n - 1))
+		n = n & (n - 1);
+
+	n = n << 1;
+	return n;
+}
diff --git a/src/lxc/utils.h b/src/lxc/utils.h
index aca4e8f18..92bc08bcd 100644
--- a/src/lxc/utils.h
+++ b/src/lxc/utils.h
@@ -466,5 +466,6 @@ extern bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val);
 extern bool lxc_nic_exists(char *nic);
 extern int lxc_cloexec(int fd, bool set);
 extern int lxc_make_tmpfile(char *template, int suffixlen, bool cloexec, bool rm);
+extern uint64_t lxc_find_next_power2(uint64_t n);
 
 #endif /* __LXC_UTILS_H */

From 9ecb229905d73efc3b00052610351ea6fcc70e99 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 18 Oct 2017 16:20:01 +0200
Subject: [PATCH 08/11] confile_utils: add lxc_get_conf_uint64()

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/confile_utils.c | 10 ++++++++++
 src/lxc/confile_utils.h |  1 +
 2 files changed, 11 insertions(+)

diff --git a/src/lxc/confile_utils.c b/src/lxc/confile_utils.c
index e49178809..0dc50662a 100644
--- a/src/lxc/confile_utils.c
+++ b/src/lxc/confile_utils.c
@@ -672,6 +672,16 @@ int lxc_get_conf_int(struct lxc_conf *c, char *retv, int inlen, int v)
 	return snprintf(retv, inlen, "%d", v);
 }
 
+int lxc_get_conf_uint64(struct lxc_conf *c, char *retv, int inlen, uint64_t v)
+{
+	if (!retv)
+		inlen = 0;
+	else
+		memset(retv, 0, inlen);
+
+	return snprintf(retv, inlen, "%"PRIu64, v);
+}
+
 bool parse_limit_value(const char **value, unsigned long *res)
 {
 	char *endptr = NULL;
diff --git a/src/lxc/confile_utils.h b/src/lxc/confile_utils.h
index 2f1079a2c..9b75c8c50 100644
--- a/src/lxc/confile_utils.h
+++ b/src/lxc/confile_utils.h
@@ -84,5 +84,6 @@ extern void update_hwaddr(const char *line);
 extern bool new_hwaddr(char *hwaddr);
 extern int lxc_get_conf_str(char *retv, int inlen, const char *value);
 extern int lxc_get_conf_int(struct lxc_conf *c, char *retv, int inlen, int v);
+extern int lxc_get_conf_uint64(struct lxc_conf *c, char *retv, int inlen, uint64_t v);
 extern bool parse_limit_value(const char **value, unsigned long *res);
 #endif /* __LXC_CONFILE_UTILS_H */

From 8e1db34705ebd3db615c2f2e02e3c815558b6571 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 18 Oct 2017 16:20:36 +0200
Subject: [PATCH 09/11] confile: add lxc.console.logsize

Closes #1857.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/conf.c    |  2 ++
 src/lxc/conf.h    |  1 +
 src/lxc/confile.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+)

diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 3b1e422a3..fdc4d806d 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -3466,6 +3466,8 @@ void lxc_conf_free(struct lxc_conf *conf)
 	free(conf->logfile);
 	if (conf->logfd != -1)
 		close(conf->logfd);
+	if (conf->console.log_size > 0 && conf->console.ringbuf.addr)
+		lxc_ringbuf_release(&conf->console.ringbuf);
 	free(conf->utsname);
 	free(conf->ttydir);
 	free(conf->fstab);
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index 169857f2b..d89aed400 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -152,6 +152,7 @@ struct lxc_console {
 	char name[MAXPATHLEN];
 	struct termios *tios;
 	struct lxc_tty_state *tty_state;
+	uint64_t log_size;
 };
 
 /*
diff --git a/src/lxc/confile.c b/src/lxc/confile.c
index 4850e4ce5..831517efa 100644
--- a/src/lxc/confile.c
+++ b/src/lxc/confile.c
@@ -83,6 +83,7 @@ lxc_config_define(cap_keep);
 lxc_config_define(cgroup_controller);
 lxc_config_define(cgroup_dir);
 lxc_config_define(console_logfile);
+lxc_config_define(console_logsize);
 lxc_config_define(console_path);
 lxc_config_define(environment);
 lxc_config_define(ephemeral);
@@ -148,6 +149,7 @@ static struct lxc_config_t config[] = {
 	{ "lxc.cgroup.dir",                false,                  set_config_cgroup_dir,                  get_config_cgroup_dir,                  clr_config_cgroup_dir,                },
 	{ "lxc.cgroup",                    false,                  set_config_cgroup_controller,           get_config_cgroup_controller,           clr_config_cgroup_controller,         },
 	{ "lxc.console.logfile",           false,                  set_config_console_logfile,             get_config_console_logfile,             clr_config_console_logfile,           },
+	{ "lxc.console.logsize",           false,                  set_config_console_logsize,             get_config_console_logsize,             clr_config_console_logsize,           },
 	{ "lxc.console.path",              false,                  set_config_console_path,                get_config_console_path,                clr_config_console_path,              },
 	{ "lxc.environment",               false,                  set_config_environment,                 get_config_environment,                 clr_config_environment,               },
 	{ "lxc.ephemeral",                 false,                  set_config_ephemeral,                   get_config_ephemeral,                   clr_config_ephemeral,                 },
@@ -1790,6 +1792,45 @@ static int set_config_console_logfile(const char *key, const char *value,
 	return set_config_path_item(&lxc_conf->console.log_path, value);
 }
 
+static int set_config_console_logsize(const char *key, const char *value,
+				      struct lxc_conf *lxc_conf, void *data)
+{
+	int pgsz, ret;
+	int64_t size;
+	uint64_t logsize;
+
+	if (lxc_config_value_empty(value)) {
+		lxc_conf->console.log_size = 0;
+		return 0;
+	}
+
+	/* If the user specified "auto" the default log size is 2^17 = 128 Kib */
+	if (!strcmp(value, "auto")) {
+		lxc_conf->console.log_size = 1 << 17;
+		return 0;
+	}
+
+	ret = parse_byte_size_string(value, &size);
+	if (ret < 0)
+		return -1;
+
+	/* Must be at least a page size */
+	pgsz = getpagesize();
+	if (size < pgsz) {
+		ERROR("%s - The size of the console log must be at least %d "
+		      "bytes", strerror(EINVAL), pgsz);
+		return -EINVAL;
+	}
+
+	logsize = lxc_find_next_power2((uint64_t)size);
+	if (logsize != size)
+		NOTICE("Passed size was not a power of 2. Rounding log size to "
+		       "next power of two: %" PRIu64 " bytes", logsize);
+
+	lxc_conf->console.log_size = logsize;
+	return 0;
+}
+
 int append_unexp_config_line(const char *line, struct lxc_conf *conf)
 {
 	size_t len = conf->unexpanded_len, linelen = strlen(line);
@@ -3023,6 +3064,12 @@ static int get_config_console_logfile(const char *key, char *retv, int inlen,
 	return lxc_get_conf_str(retv, inlen, c->console.log_path);
 }
 
+static int get_config_console_logsize(const char *key, char *retv, int inlen,
+				      struct lxc_conf *c, void *data)
+{
+	return lxc_get_conf_uint64(c, retv, inlen, c->autodev);
+}
+
 static int get_config_seccomp_profile(const char *key, char *retv, int inlen,
 				      struct lxc_conf *c, void *data)
 {
@@ -3405,6 +3452,13 @@ static inline int clr_config_console_logfile(const char *key,
 	return 0;
 }
 
+static inline int clr_config_console_logsize(const char *key,
+					     struct lxc_conf *c, void *data)
+{
+	c->console.log_size = 0;
+	return 0;
+}
+
 static inline int clr_config_seccomp_profile(const char *key,
 					     struct lxc_conf *c, void *data)
 {

From 3e791f86db3d5b8f3dd6c018c203a094925df493 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 18 Oct 2017 20:24:54 +0200
Subject: [PATCH 10/11] conf: lxc_setup() -> lxc_setup_child()

Closes #1857.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/conf.c | 2 +-
 src/lxc/conf.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index fdc4d806d..a48a09167 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -3079,7 +3079,7 @@ static bool verify_start_hooks(struct lxc_conf *conf)
 	return true;
 }
 
-int lxc_setup(struct lxc_handler *handler)
+int lxc_setup_child(struct lxc_handler *handler)
 {
 	int ret;
 	const char *name = handler->name;
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index d89aed400..d228c94c1 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -377,7 +377,7 @@ extern int lxc_delete_autodev(struct lxc_handler *handler);
 extern void lxc_clear_includes(struct lxc_conf *conf);
 extern int do_rootfs_setup(struct lxc_conf *conf, const char *name,
 			   const char *lxcpath);
-extern int lxc_setup(struct lxc_handler *handler);
+extern int lxc_setup_child(struct lxc_handler *handler);
 extern int setup_resource_limits(struct lxc_list *limits, pid_t pid);
 extern int find_unmapped_nsid(struct lxc_conf *conf, enum idtype idtype);
 extern int mapped_hostid(unsigned id, struct lxc_conf *conf,

From e801ec16a4e29d429e5560e05ddd305c5518be43 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 18 Oct 2017 20:30:08 +0200
Subject: [PATCH 11/11] console: add ringbuffer

Closes #1857.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/conf.c    | 29 +++++++++++++++++++++++++++++
 src/lxc/conf.h    |  3 +++
 src/lxc/console.c | 11 ++++++++++-
 src/lxc/start.c   |  7 ++++++-
 4 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index a48a09167..d3cfc2aed 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -80,6 +80,7 @@
 #include "namespace.h"
 #include "network.h"
 #include "parse.h"
+#include "ringbuf.h"
 #include "storage.h"
 #include "storage/aufs.h"
 #include "storage/overlay.h"
@@ -2428,6 +2429,7 @@ struct lxc_conf *lxc_conf_init(void)
 	new->autodev = 1;
 	new->console.log_path = NULL;
 	new->console.log_fd = -1;
+	new->console.log_size = 0;
 	new->console.path = NULL;
 	new->console.peer = -1;
 	new->console.peerpty.busy = -1;
@@ -2436,6 +2438,7 @@ struct lxc_conf *lxc_conf_init(void)
 	new->console.master = -1;
 	new->console.slave = -1;
 	new->console.name[0] = '\0';
+	memset(&new->console.ringbuf, 0, sizeof(struct lxc_ringbuf));
 	new->maincmd_fd = -1;
 	new->nbd_idx = -1;
 	new->rootfs.mount = strdup(default_rootfs_mount);
@@ -3079,6 +3082,32 @@ static bool verify_start_hooks(struct lxc_conf *conf)
 	return true;
 }
 
+static int lxc_setup_console_ringbuf(struct lxc_console *console)
+{
+	int ret;
+	struct lxc_ringbuf *buf = &console->ringbuf;
+	uint64_t size = console->log_size;
+
+	ret = lxc_ringbuf_create(buf, size);
+	if (ret < 0) {
+		ERROR("Failed to setup ringbuffer for console \"%s\"", console->name);
+		return -1;
+	}
+
+	return 0;
+}
+
+int lxc_setup_parent(struct lxc_handler *handler)
+{
+	int ret;
+
+	ret = lxc_setup_console_ringbuf(&handler->conf->console);
+	if (ret < 0)
+		return -1;
+
+	return 0;
+}
+
 int lxc_setup_child(struct lxc_handler *handler)
 {
 	int ret;
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index d228c94c1..c61f861ed 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -36,6 +36,7 @@
 #include <stdbool.h>
 
 #include "list.h"
+#include "ringbuf.h"
 #include "start.h" /* for lxc_handler */
 
 #if HAVE_SCMP_FILTER_CTX
@@ -153,6 +154,7 @@ struct lxc_console {
 	struct termios *tios;
 	struct lxc_tty_state *tty_state;
 	uint64_t log_size;
+	struct lxc_ringbuf ringbuf;
 };
 
 /*
@@ -378,6 +380,7 @@ extern void lxc_clear_includes(struct lxc_conf *conf);
 extern int do_rootfs_setup(struct lxc_conf *conf, const char *name,
 			   const char *lxcpath);
 extern int lxc_setup_child(struct lxc_handler *handler);
+extern int lxc_setup_parent(struct lxc_handler *handler);
 extern int setup_resource_limits(struct lxc_list *limits, pid_t pid);
 extern int find_unmapped_nsid(struct lxc_conf *conf, enum idtype idtype);
 extern int mapped_hostid(unsigned id, struct lxc_conf *conf,
diff --git a/src/lxc/console.c b/src/lxc/console.c
index c8e545eb4..e4fff0008 100644
--- a/src/lxc/console.c
+++ b/src/lxc/console.c
@@ -168,7 +168,7 @@ static int lxc_console_cb_con(int fd, uint32_t events, void *data,
 {
 	struct lxc_console *console = (struct lxc_console *)data;
 	char buf[1024];
-	int r, w;
+	int r, ret, w;
 
 	w = r = lxc_read_nointr(fd, buf, sizeof(buf));
 	if (r <= 0) {
@@ -196,6 +196,15 @@ static int lxc_console_cb_con(int fd, uint32_t events, void *data,
 
 		if (console->peer >= 0)
 			w = lxc_write_nointr(console->peer, buf, r);
+
+		if (console->log_size > 0) {
+			process_lock();
+			ret = lxc_ringbuf_write(&console->ringbuf, buf, r);
+			process_unlock();
+			if (ret < 0)
+				TRACE("Failed to write %d bytes to console "
+				      "ringbuffer", r);
+		}
 	}
 
 	if (w != r)
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 402bba552..7748dbf61 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -937,7 +937,7 @@ static int do_start(void *data)
 	}
 
 	/* Setup the container, ip, names, utsname, ... */
-	ret = lxc_setup(handler);
+	ret = lxc_setup_child(handler);
 	close(handler->data_sock[0]);
 	close(handler->data_sock[1]);
 	if (ret < 0) {
@@ -1254,6 +1254,11 @@ static int lxc_spawn(struct lxc_handler *handler)
 		 */
 		flags &= ~CLONE_NEWNET;
 	}
+
+	ret = lxc_setup_parent(handler);
+	if (ret < 0)
+		goto out_delete_net;
+
 	handler->pid = lxc_clone(do_start, handler, flags);
 	if (handler->pid < 0) {
 		SYSERROR("Failed to clone a new set of namespaces.");


More information about the lxc-devel mailing list