[lxc-devel] [PATCH 1/2] Modify lxc_attach and lxc_init to use clone instead of fork.
Greg Kurz
gkurz at fr.ibm.com
Wed Aug 24 22:07:06 UTC 2011
On Wed, 2011-08-24 at 14:17 +0400, Vladimir Smirnov wrote:
> Clone allows more flexible control. Currently, if there is any inherited fd,
> lxc-start exits with error. With clone it's possible not to pass open fd's to childs.
>
Hmm... when it comes to file descriptors, you have two flavours:
- CLONE_FILES: the child will share the file descriptor table with the
parent. If any of the two tasks open or close a fd, the other will also
see the change.
- without CLONE_FILES: the child will get a copy of the parent's file
descriptor table. Then, each task may open or close a fd without effect
for the other.
As you see, the child shares or at least inherits a copy of all the fd's
with its parent. Honestly, I don't know any clone flag that would
prevent the child to be passed a fd...
> Signed-off-by: Vladimir Smirnov <civil at yandex-team.ru>
> ---
> src/lxc/lxc_attach.c | 79 +++++++++++++++++++++++++++++---------------------
> src/lxc/lxc_init.c | 46 ++++++++++++++++++-----------
> 2 files changed, 75 insertions(+), 50 deletions(-)
>
> diff --git a/src/lxc/lxc_attach.c b/src/lxc/lxc_attach.c
Why do you need to patch lxc-attach ? AFAIK it doesn't check for
inherited file descriptors...
> index ed3d5a4..930865e 100644
> --- a/src/lxc/lxc_attach.c
> +++ b/src/lxc/lxc_attach.c
> @@ -20,6 +20,7 @@
> * License along with this library; if not, write to the Free Software
> * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
> */
> +#define STACKSIZE 16384
>
> #define _GNU_SOURCE
> #include <unistd.h>
> @@ -29,6 +30,7 @@
> #include <sys/param.h>
> #include <sys/types.h>
> #include <sys/wait.h>
> +#include <sched.h>
>
> #include "commands.h"
> #include "arguments.h"
> @@ -56,14 +58,48 @@ Options :\n\
> .checker = NULL,
> };
>
> +static char *envp_global;
> +
> +int child_main()
> +{
> + struct passwd *passwd;
> + uid_t uid;
> +
> + if (my_args.argc) {
> + execve(my_args.argv[0], my_args.argv, envp_global);
> + SYSERROR("failed to exec '%s'", my_args.argv[0]);
> + return -1;
> + }
> +
> + uid = getuid();
> +
> + passwd = getpwuid(uid);
> + if (!passwd) {
> + SYSERROR("failed to get passwd " \
> + "entry for uid '%d'", uid);
> + return -1;
> + }
> +
> + {
> + char *const args[] = {
> + passwd->pw_shell,
> + NULL,
> + };
> +
> + execve(args[0], args, envp_global);
> + SYSERROR("failed to exec '%s'", args[0]);
> + return -1;
> + }
> +}
> +
> int main(int argc, char *argv[], char *envp[])
> {
> int ret;
> pid_t pid;
> - struct passwd *passwd;
> - uid_t uid;
> char *curdir;
>
> + envp_global = envp;
> +
> ret = lxc_caps_init();
> if (ret)
> return ret;
> @@ -96,7 +132,14 @@ int main(int argc, char *argv[], char *envp[])
>
> free(curdir);
>
> - pid = fork();
> + void **newstack;
> + newstack = (void **) malloc(STACKSIZE);
> + if (!newstack)
> + exit(newstack);
> +
> + newstack = (void **)(STACKSIZE + (char *)newstack);
> + *--newstack = 0;
> + pid = clone(child_main, newstack, CLONE_VM | CLONE_FS | CLONE_SIGHAND, 0);
>
> if (pid < 0) {
> SYSERROR("failed to fork");
> @@ -120,35 +163,5 @@ int main(int argc, char *argv[], char *envp[])
> return -1;
> }
>
> - if (!pid) {
> -
> - if (my_args.argc) {
> - execve(my_args.argv[0], my_args.argv, envp);
> - SYSERROR("failed to exec '%s'", my_args.argv[0]);
> - return -1;
> - }
> -
> - uid = getuid();
> -
> - passwd = getpwuid(uid);
> - if (!passwd) {
> - SYSERROR("failed to get passwd " \
> - "entry for uid '%d'", uid);
> - return -1;
> - }
> -
> - {
> - char *const args[] = {
> - passwd->pw_shell,
> - NULL,
> - };
> -
> - execve(args[0], args, envp);
> - SYSERROR("failed to exec '%s'", args[0]);
> - return -1;
> - }
> -
> - }
> -
> return 0;
> }
> diff --git a/src/lxc/lxc_init.c b/src/lxc/lxc_init.c
> index a534b51..7debd6f 100644
> --- a/src/lxc/lxc_init.c
> +++ b/src/lxc/lxc_init.c
> @@ -20,6 +20,7 @@
> * License along with this library; if not, write to the Free Software
> * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
> */
> +#define STACKSIZE 16384
>
> #include <stdio.h>
> #include <unistd.h>
> @@ -32,6 +33,7 @@
> #include <sys/wait.h>
> #define _GNU_SOURCE
> #include <getopt.h>
> +#include <sched.h>
>
> #include "log.h"
> #include "caps.h"
> @@ -40,6 +42,9 @@
>
> lxc_log_define(lxc_init, lxc);
>
> +static sigset_t mask, omask;
> +static char **aargv;
> +
> static int quiet;
>
> static struct option options[] = {
> @@ -49,6 +54,23 @@ static struct option options[] = {
>
> static int was_interrupted = 0;
>
> +int child_main()
> +{
> + int i;
> + int err = -1;
> + /* restore default signal handlers */
> + for (i = 1; i < NSIG; i++)
> + signal(i, SIG_DFL);
> +
> + sigprocmask(SIG_SETMASK, &omask, NULL);
> +
> + NOTICE("about to exec '%s'", aargv[0]);
> +
> + execvp(aargv[0], aargv);
> + ERROR("failed to exec: '%s' : %m", aargv[0]);
> + exit(err);
> +}
> +
> int main(int argc, char *argv[])
> {
>
> @@ -61,8 +83,6 @@ int main(int argc, char *argv[])
> pid_t pid;
> int nbargs = 0;
> int err = -1;
> - char **aargv;
> - sigset_t mask, omask;
> int i, shutdown = 0;
>
> while (1) {
> @@ -115,25 +135,17 @@ int main(int argc, char *argv[])
> if (lxc_caps_reset())
> exit(err);
>
> - pid = fork();
> -
> - if (pid < 0)
> + void **newstack;
> + newstack = (void **) malloc(STACKSIZE);
> + if (!newstack)
> exit(err);
>
> - if (!pid) {
> -
> - /* restore default signal handlers */
> - for (i = 1; i < NSIG; i++)
> - signal(i, SIG_DFL);
> + newstack = (void **)(STACKSIZE + (char *)newstack);
> + *--newstack = 0;
> + pid = clone(child_main, newstack, CLONE_VM | CLONE_FS | CLONE_SIGHAND, 0);
>
Passing CLONE_VM here means execvp() will flush the memory for both
parent and child... This means the lxc-init code doesn't run anymore and
this isn't acceptable, as it honors the child ripping work in an
application container.
> - sigprocmask(SIG_SETMASK, &omask, NULL);
> -
> - NOTICE("about to exec '%s'", aargv[0]);
> -
> - execvp(aargv[0], aargv);
> - ERROR("failed to exec: '%s' : %m", aargv[0]);
> + if (pid < 0)
> exit(err);
> - }
>
> /* let's process the signals now */
> sigdelset(&omask, SIGALRM);
--
Gregory Kurz gkurz at fr.ibm.com
Software Engineer @ IBM/Meiosys http://www.ibm.com
Tel +33 (0)534 638 479 Fax +33 (0)561 400 420
"Anarchy is about taking complete responsibility for yourself."
Alan Moore.
More information about the lxc-devel
mailing list