[Lxc-users] [PATCH RFC][gross hack] containerized syslog
Serge E. Hallyn
serue at us.ibm.com
Thu Feb 4 06:09:18 UTC 2010
Provide each user namespace with its own syslog ringbuffer.
So you can do
ns_exec -cU /bin/bash
dmesg
and see nothing. Root in a container (with private user namespace)
cannot clear the host's ring buffer.
Since containers do not have a notion of consoles at present,
only the initial user namespace deals with console output or
with the console-related syslog commands.
This opens the door to targetting printk at certain syslog
namespaces. It's not safe to be applied - it's a quick-n-dirty
hack and won't even compile for CONFIG_PRINTK=n. Also I've not decided
what to do about duplication of printks to init_user_ns so for
now emit_one_char always duplicates to inti_user_ns. We probably
want to be smarter about this and output a prefix indicating the
target.
But I figured discussions about the API would be more meaningful
with a testable patch.
---
fs/proc/kmsg.c | 5 +-
include/linux/user_namespace.h | 2 +
kernel/printk.c | 225 ++++++++++++++++++++++++++--------------
kernel/user.c | 4 +
kernel/user_namespace.c | 13 +++
5 files changed, 168 insertions(+), 81 deletions(-)
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index 7ca7834..2746b70 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -12,11 +12,12 @@
#include <linux/poll.h>
#include <linux/proc_fs.h>
#include <linux/fs.h>
+#include <linux/syslog.h>
#include <asm/uaccess.h>
#include <asm/io.h>
-extern wait_queue_head_t log_wait;
+extern struct syslog_ns init_syslog_ns;
extern int do_syslog(int type, char __user *bug, int count);
@@ -41,7 +42,7 @@ static ssize_t kmsg_read(struct file *file, char __user *buf,
static unsigned int kmsg_poll(struct file *file, poll_table *wait)
{
- poll_wait(file, &log_wait, wait);
+ poll_wait(file, &init_syslog_ns.wait, wait);
if (do_syslog(9, NULL, 0))
return POLLIN | POLLRDNORM;
return 0;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index cc4f453..3926c89 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -5,6 +5,7 @@
#include <linux/nsproxy.h>
#include <linux/sched.h>
#include <linux/err.h>
+#include <linux/syslog.h>
#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8)
#define UIDHASH_SZ (1 << UIDHASH_BITS)
@@ -14,6 +15,7 @@ struct user_namespace {
struct hlist_head uidhash_table[UIDHASH_SZ];
struct user_struct *creator;
struct work_struct destroyer;
+ struct syslog_ns *syslog;
};
extern struct user_namespace init_user_ns;
diff --git a/kernel/printk.c b/kernel/printk.c
index 1751c45..5b93447 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -35,9 +35,18 @@
#include <linux/kexec.h>
#include <linux/ratelimit.h>
#include <linux/kmsg_dump.h>
+#include <linux/user_namespace.h>
#include <asm/uaccess.h>
+struct syslog_ns init_syslog_ns;
+#define g_log_wait (init_syslog_ns.wait)
+#define g_log_start (init_syslog_ns.start)
+#define g_log_end (init_syslog_ns.end)
+#define g_log_buf_len (init_syslog_ns.buf_len)
+#define g_logged_chars (init_syslog_ns.logged_chars)
+#define g_log_buf (init_syslog_ns.buf)
+
/*
* for_each_console() allows you to iterate on each console
*/
@@ -52,6 +61,7 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
}
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
+#define CONTAINER_BUF_LEN 4096
/* printk's without a loglevel use this.. */
#define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */
@@ -60,8 +70,6 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */
-DECLARE_WAIT_QUEUE_HEAD(log_wait);
-
int console_printk[4] = {
DEFAULT_CONSOLE_LOGLEVEL, /* console_loglevel */
DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */
@@ -98,22 +106,20 @@ EXPORT_SYMBOL_GPL(console_drivers);
static int console_locked, console_suspended;
/*
- * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars
+ * logbuf_lock protects g_log_buf, g_log_start, g_log_end, con_start and g_logged_chars
* It is also used in interesting ways to provide interlocking in
* release_console_sem().
*/
static DEFINE_SPINLOCK(logbuf_lock);
-#define LOG_BUF_MASK (log_buf_len-1)
-#define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
+#define LOG_BUF_MASK(ns) ((ns)->buf_len-1)
+#define LOG_BUF(ns, idx) ((ns)->buf[(idx) & LOG_BUF_MASK(ns)])
/*
- * The indices into log_buf are not constrained to log_buf_len - they
+ * The indices into g_log_buf are not constrained to g_log_buf_len - they
* must be masked before subscripting
*/
-static unsigned log_start; /* Index into log_buf: next char to be read by syslog() */
-static unsigned con_start; /* Index into log_buf: next char to be sent to consoles */
-static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */
+static unsigned con_start; /* Index into g_log_buf: next char to be sent to consoles */
/*
* Array of consoles built from command line options (console=)
@@ -142,9 +148,6 @@ static int console_may_schedule;
#ifdef CONFIG_PRINTK
static char __log_buf[__LOG_BUF_LEN];
-static char *log_buf = __log_buf;
-static int log_buf_len = __LOG_BUF_LEN;
-static unsigned logged_chars; /* Number of chars produced since last read+clear operation */
#ifdef CONFIG_KEXEC
/*
@@ -157,10 +160,10 @@ static unsigned logged_chars; /* Number of chars produced since last read+clear
*/
void log_buf_kexec_setup(void)
{
- VMCOREINFO_SYMBOL(log_buf);
- VMCOREINFO_SYMBOL(log_end);
- VMCOREINFO_SYMBOL(log_buf_len);
- VMCOREINFO_SYMBOL(logged_chars);
+ VMCOREINFO_SYMBOL(g_log_buf);
+ VMCOREINFO_SYMBOL(g_log_end);
+ VMCOREINFO_SYMBOL(g_log_buf_len);
+ VMCOREINFO_SYMBOL(g_logged_chars);
}
#endif
@@ -171,7 +174,7 @@ static int __init log_buf_len_setup(char *str)
if (size)
size = roundup_pow_of_two(size);
- if (size > log_buf_len) {
+ if (size > g_log_buf_len) {
unsigned start, dest_idx, offset;
char *new_log_buf;
@@ -182,22 +185,22 @@ static int __init log_buf_len_setup(char *str)
}
spin_lock_irqsave(&logbuf_lock, flags);
- log_buf_len = size;
- log_buf = new_log_buf;
+ g_log_buf_len = size;
+ g_log_buf = new_log_buf;
- offset = start = min(con_start, log_start);
+ offset = start = min(con_start, g_log_start);
dest_idx = 0;
- while (start != log_end) {
- log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)];
+ while (start != g_log_end) {
+ g_log_buf[dest_idx] = g_log_buf[start & (__LOG_BUF_LEN - 1)];
start++;
dest_idx++;
}
- log_start -= offset;
+ g_log_start -= offset;
con_start -= offset;
- log_end -= offset;
+ g_log_end -= offset;
spin_unlock_irqrestore(&logbuf_lock, flags);
- printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len);
+ printk(KERN_NOTICE "log_buf_len: %d\n", g_log_buf_len);
}
out:
return 1;
@@ -279,6 +282,7 @@ int do_syslog(int type, char __user *buf, int len)
int do_clear = 0;
char c;
int error = 0;
+ struct syslog_ns *syslog_ns = current_user_ns()->syslog;
error = security_syslog(type);
if (error)
@@ -300,15 +304,17 @@ int do_syslog(int type, char __user *buf, int len)
error = -EFAULT;
goto out;
}
- error = wait_event_interruptible(log_wait,
- (log_start - log_end));
+ error = wait_event_interruptible(syslog_ns->wait,
+ (syslog_ns->start - syslog_ns->end));
if (error)
goto out;
i = 0;
spin_lock_irq(&logbuf_lock);
- while (!error && (log_start != log_end) && i < len) {
- c = LOG_BUF(log_start);
- log_start++;
+ while (!error &&
+ (syslog_ns->start != syslog_ns->end)
+ && i < len) {
+ c = LOG_BUF(syslog_ns, syslog_ns->start);
+ syslog_ns->start++;
spin_unlock_irq(&logbuf_lock);
error = __put_user(c,buf);
buf++;
@@ -335,14 +341,14 @@ int do_syslog(int type, char __user *buf, int len)
goto out;
}
count = len;
- if (count > log_buf_len)
- count = log_buf_len;
+ if (count > syslog_ns->buf_len)
+ count = syslog_ns->buf_len;
spin_lock_irq(&logbuf_lock);
- if (count > logged_chars)
- count = logged_chars;
+ if (count > syslog_ns->logged_chars)
+ count = syslog_ns->logged_chars;
if (do_clear)
- logged_chars = 0;
- limit = log_end;
+ syslog_ns->logged_chars = 0;
+ limit = syslog_ns->end;
/*
* __put_user() could sleep, and while we sleep
* printk() could overwrite the messages
@@ -351,9 +357,9 @@ int do_syslog(int type, char __user *buf, int len)
*/
for (i = 0; i < count && !error; i++) {
j = limit-1-i;
- if (j + log_buf_len < log_end)
+ if (j + syslog_ns->buf_len < syslog_ns->end)
break;
- c = LOG_BUF(j);
+ c = LOG_BUF(syslog_ns, j);
spin_unlock_irq(&logbuf_lock);
error = __put_user(c,&buf[count-1-i]);
cond_resched();
@@ -377,20 +383,32 @@ int do_syslog(int type, char __user *buf, int len)
}
break;
case 5: /* Clear ring buffer */
- logged_chars = 0;
+ syslog_ns->logged_chars = 0;
break;
case 6: /* Disable logging to console */
+ if (syslog_ns != &init_syslog_ns) {
+ error = -EPERM;
+ break;
+ }
if (saved_console_loglevel == -1)
saved_console_loglevel = console_loglevel;
console_loglevel = minimum_console_loglevel;
break;
case 7: /* Enable logging to console */
+ if (syslog_ns != &init_syslog_ns) {
+ error = -EPERM;
+ break;
+ }
if (saved_console_loglevel != -1) {
console_loglevel = saved_console_loglevel;
saved_console_loglevel = -1;
}
break;
case 8: /* Set level of messages printed to console */
+ if (syslog_ns != &init_syslog_ns) {
+ error = -EPERM;
+ break;
+ }
error = -EINVAL;
if (len < 1 || len > 8)
goto out;
@@ -402,10 +420,10 @@ int do_syslog(int type, char __user *buf, int len)
error = 0;
break;
case 9: /* Number of chars in the log buffer */
- error = log_end - log_start;
+ error = syslog_ns->end - syslog_ns->start;
break;
case 10: /* Size of the log buffer */
- error = log_buf_len;
+ error = syslog_ns->buf_len;
break;
default:
error = -EINVAL;
@@ -421,7 +439,7 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
}
/*
- * Call the console drivers on a range of log_buf
+ * Call the console drivers on a range of g_log_buf
*/
static void __call_console_drivers(unsigned start, unsigned end)
{
@@ -431,7 +449,8 @@ static void __call_console_drivers(unsigned start, unsigned end)
if ((con->flags & CON_ENABLED) && con->write &&
(cpu_online(smp_processor_id()) ||
(con->flags & CON_ANYTIME)))
- con->write(con, &LOG_BUF(start), end - start);
+ con->write(con, &LOG_BUF(&init_syslog_ns, start),
+ end - start);
}
}
@@ -455,11 +474,14 @@ static void _call_console_drivers(unsigned start,
{
if ((msg_log_level < console_loglevel || ignore_loglevel) &&
console_drivers && start != end) {
- if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) {
+ if ((start & LOG_BUF_MASK(&init_syslog_ns)) >
+ (end & LOG_BUF_MASK(&init_syslog_ns))) {
/* wrapped write */
- __call_console_drivers(start & LOG_BUF_MASK,
- log_buf_len);
- __call_console_drivers(0, end & LOG_BUF_MASK);
+ __call_console_drivers(start &
+ LOG_BUF_MASK(&init_syslog_ns),
+ g_log_buf_len);
+ __call_console_drivers(0,
+ end & LOG_BUF_MASK(&init_syslog_ns));
} else {
__call_console_drivers(start, end);
}
@@ -468,13 +490,14 @@ static void _call_console_drivers(unsigned start,
/*
* Call the console drivers, asking them to write out
- * log_buf[start] to log_buf[end - 1].
+ * g_log_buf[start] to g_log_buf[end - 1].
* The console_sem must be held.
*/
static void call_console_drivers(unsigned start, unsigned end)
{
unsigned cur_index, start_print;
static int msg_level = -1;
+ static struct syslog_ns *ns = &init_syslog_ns;
BUG_ON(((int)(start - end)) > 0);
@@ -482,16 +505,16 @@ static void call_console_drivers(unsigned start, unsigned end)
start_print = start;
while (cur_index != end) {
if (msg_level < 0 && ((end - cur_index) > 2) &&
- LOG_BUF(cur_index + 0) == '<' &&
- LOG_BUF(cur_index + 1) >= '0' &&
- LOG_BUF(cur_index + 1) <= '7' &&
- LOG_BUF(cur_index + 2) == '>') {
- msg_level = LOG_BUF(cur_index + 1) - '0';
+ LOG_BUF(ns, cur_index + 0) == '<' &&
+ LOG_BUF(ns, cur_index + 1) >= '0' &&
+ LOG_BUF(ns, cur_index + 1) <= '7' &&
+ LOG_BUF(ns, cur_index + 2) == '>') {
+ msg_level = LOG_BUF(ns, cur_index + 1) - '0';
cur_index += 3;
start_print = cur_index;
}
while (cur_index != end) {
- char c = LOG_BUF(cur_index);
+ char c = LOG_BUF(ns, cur_index);
cur_index++;
if (c == '\n') {
@@ -514,16 +537,26 @@ static void call_console_drivers(unsigned start, unsigned end)
_call_console_drivers(start_print, end, msg_level);
}
+static void do_emit_log_char(struct syslog_ns *ns, char c)
+{
+ LOG_BUF(ns, ns->end) = c;
+ ns->end++;
+ if (ns->end - ns->start > ns->buf_len)
+ ns->start = ns->end - ns->buf_len;
+ if (ns == &init_syslog_ns) {
+ if (g_log_end - con_start > g_log_buf_len)
+ con_start = g_log_end - g_log_buf_len;
+ }
+ if (ns->logged_chars < ns->buf_len)
+ ns->logged_chars++;
+}
+
static void emit_log_char(char c)
{
- LOG_BUF(log_end) = c;
- log_end++;
- if (log_end - log_start > log_buf_len)
- log_start = log_end - log_buf_len;
- if (log_end - con_start > log_buf_len)
- con_start = log_end - log_buf_len;
- if (logged_chars < log_buf_len)
- logged_chars++;
+ struct syslog_ns *ns = current_user_ns()->syslog;
+ if (ns != &init_syslog_ns)
+ do_emit_log_char(ns,c);
+ do_emit_log_char(&init_syslog_ns, c);
}
/*
@@ -669,6 +702,25 @@ static inline void printk_delay(void)
}
}
+/* called from create_user_ns() */
+struct syslog_ns * do_syslog_init(void)
+{
+ struct syslog_ns *ns;
+
+ ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+ if (!ns)
+ return ERR_PTR(-ENOMEM);
+ ns->buf = kzalloc(CONTAINER_BUF_LEN, GFP_KERNEL);
+ if (!ns->buf) {
+ kfree(ns);
+ return ERR_PTR(-ENOMEM);
+ }
+ init_waitqueue_head(&ns->wait);
+ ns->buf_len = CONTAINER_BUF_LEN;
+
+ return ns;
+}
+
asmlinkage int vprintk(const char *fmt, va_list args)
{
int printed_len = 0;
@@ -676,6 +728,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
unsigned long flags;
int this_cpu;
char *p;
+ struct syslog_ns *syslog_ns;
boot_delay_msec();
printk_delay();
@@ -741,7 +794,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
}
/*
- * Copy the output into log_buf. If the caller didn't provide
+ * Copy the output into g_log_buf. If the caller didn't provide
* appropriate log level tags, we insert them here
*/
for ( ; *p; p++) {
@@ -790,7 +843,13 @@ asmlinkage int vprintk(const char *fmt, va_list args)
* will release 'logbuf_lock' regardless of whether it
* actually gets the semaphore or not.
*/
- if (acquire_console_semaphore_for_printk(this_cpu))
+ syslog_ns = current_user_ns()->syslog;
+ if (syslog_ns != &init_syslog_ns) {
+ int need_wake = (syslog_ns->start != syslog_ns->end);
+ spin_unlock_irqrestore(&logbuf_lock, flags);
+ if (!oops_in_progress && need_wake)
+ wake_up_interruptible(&syslog_ns->wait);
+ } else if (acquire_console_semaphore_for_printk(this_cpu))
release_console_sem();
lockdep_on();
@@ -811,6 +870,14 @@ static void call_console_drivers(unsigned start, unsigned end)
#endif
+/* init_syslog_ns is part of init_user_ns */
+/* note this does not work for !CONFIG_PRINTK */
+struct syslog_ns init_syslog_ns = {
+ .wait = __WAIT_QUEUE_HEAD_INITIALIZER(init_syslog_ns.wait),
+ .buf_len = __LOG_BUF_LEN,
+ .buf = __log_buf,
+};
+
static int __add_preferred_console(char *name, int idx, char *options,
char *brl_options)
{
@@ -1010,7 +1077,7 @@ void printk_tick(void)
{
if (__get_cpu_var(printk_pending)) {
__get_cpu_var(printk_pending) = 0;
- wake_up_interruptible(&log_wait);
+ wake_up_interruptible(&g_log_wait);
}
}
@@ -1021,7 +1088,7 @@ int printk_needs_cpu(int cpu)
void wake_up_klogd(void)
{
- if (waitqueue_active(&log_wait))
+ if (waitqueue_active(&g_log_wait))
__raw_get_cpu_var(printk_pending) = 1;
}
@@ -1054,12 +1121,12 @@ void release_console_sem(void)
for ( ; ; ) {
spin_lock_irqsave(&logbuf_lock, flags);
- wake_klogd |= log_start - log_end;
- if (con_start == log_end)
+ wake_klogd |= g_log_start - g_log_end;
+ if (con_start == g_log_end)
break; /* Nothing to print */
_con_start = con_start;
- _log_end = log_end;
- con_start = log_end; /* Flush */
+ _log_end = g_log_end;
+ con_start = g_log_end; /* Flush */
spin_unlock(&logbuf_lock);
stop_critical_timings(); /* don't trace print latency */
call_console_drivers(_con_start, _log_end);
@@ -1287,7 +1354,7 @@ void register_console(struct console *newcon)
* for us.
*/
spin_lock_irqsave(&logbuf_lock, flags);
- con_start = log_start;
+ con_start = g_log_start;
spin_unlock_irqrestore(&logbuf_lock, flags);
}
release_console_sem();
@@ -1498,22 +1565,22 @@ void kmsg_dump(enum kmsg_dump_reason reason)
there's not a lot we can do about that. The new messages
will overwrite the start of what we dump. */
spin_lock_irqsave(&logbuf_lock, flags);
- end = log_end & LOG_BUF_MASK;
- chars = logged_chars;
+ end = g_log_end & LOG_BUF_MASK(&init_syslog_ns);
+ chars = g_logged_chars;
spin_unlock_irqrestore(&logbuf_lock, flags);
- if (logged_chars > end) {
- s1 = log_buf + log_buf_len - logged_chars + end;
- l1 = logged_chars - end;
+ if (g_logged_chars > end) {
+ s1 = g_log_buf + g_log_buf_len - g_logged_chars + end;
+ l1 = g_logged_chars - end;
- s2 = log_buf;
+ s2 = g_log_buf;
l2 = end;
} else {
s1 = "";
l1 = 0;
- s2 = log_buf + end - logged_chars;
- l2 = logged_chars;
+ s2 = g_log_buf + end - g_logged_chars;
+ l2 = g_logged_chars;
}
if (!spin_trylock_irqsave(&dump_list_lock, flags)) {
diff --git a/kernel/user.c b/kernel/user.c
index 46d0165..102c2ce 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -18,11 +18,15 @@
#include <linux/user_namespace.h>
#include "cred-internals.h"
+/* defined in kernel/printk.c */
+extern struct syslog_ns init_syslog_ns;
+
struct user_namespace init_user_ns = {
.kref = {
.refcount = ATOMIC_INIT(2),
},
.creator = &root_user,
+ .syslog = &init_syslog_ns,
};
EXPORT_SYMBOL_GPL(init_user_ns);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 076c7c8..43d46d1 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -11,6 +11,9 @@
#include <linux/user_namespace.h>
#include <linux/cred.h>
+/* defined in kernel/printk.c */
+extern struct syslog_ns *do_syslog_init(void);
+
/*
* Create a new user namespace, deriving the creator from the user in the
* passed credentials, and replacing that user with the new root user for the
@@ -34,9 +37,17 @@ int create_user_ns(struct cred *new)
for (n = 0; n < UIDHASH_SZ; ++n)
INIT_HLIST_HEAD(ns->uidhash_table + n);
+ ns->syslog = do_syslog_init();
+ if (!ns->syslog) {
+ kfree(ns);
+ return -ENOMEM;
+ }
+
/* Alloc new root user. */
root_user = alloc_uid(ns, 0);
if (!root_user) {
+ kfree(ns->syslog->buf);
+ kfree(ns->syslog);
kfree(ns);
return -ENOMEM;
}
@@ -70,6 +81,8 @@ static void free_user_ns_work(struct work_struct *work)
struct user_namespace *ns =
container_of(work, struct user_namespace, destroyer);
free_uid(ns->creator);
+ kfree(ns->syslog->buf);
+ kfree(ns->syslog);
kfree(ns);
}
--
1.6.1
More information about the lxc-users
mailing list