[lxc-devel] [PATCH 1/2] Modify lxc_attach and lxc_init to use clone instead of fork.

Smirnov Vladimir civil at yandex-team.ru
Thu Aug 25 08:11:55 UTC 2011



25.08.2011, 02:07, "Greg Kurz" <gkurz at fr.ibm.com>:
> On Wed, 2011-08-24 at 14:17 +0400, Vladimir Smirnov wrote:
>
>>  Clone allows more flexible control. Currently, if there is any inherited fd,
>>  lxc-start exits with error. With clone it's possible not to pass open fd's to childs.
>
> Hmm... when it comes to file descriptors, you have two flavours:
> - CLONE_FILES: the child will share the file descriptor table with the
> parent. If any of the two tasks open or close a fd, the other will also
> see the change.
> - without CLONE_FILES: the child will get a copy of the parent's file
> descriptor table. Then, each task may open or close a fd without effect
> for the other.
>
> As you see, the child shares or at least inherits a copy of all the fd's
> with its parent. Honestly, I don't know any clone flag that would
> prevent the child to be passed a fd...

Yes. I've missunderstood it. Maybe change back to fork, but 1-st action of
child should be closing all fd's except of whitelist? As it was before
rev 80090207defda9adfc922555b359b11acdad1401?

>
>>  Signed-off-by: Vladimir Smirnov <civil at yandex-team.ru>
>>  ---
>>   src/lxc/lxc_attach.c |   79 +++++++++++++++++++++++++++++---------------------
>>   src/lxc/lxc_init.c   |   46 ++++++++++++++++++-----------
>>   2 files changed, 75 insertions(+), 50 deletions(-)
>>
>>  diff --git a/src/lxc/lxc_attach.c b/src/lxc/lxc_attach.c
>
> Why do you need to patch lxc-attach ? AFAIK it doesn't check for
> inherited file descriptors...

No need, yes.

>
>>  index ed3d5a4..930865e 100644
>>  --- a/src/lxc/lxc_attach.c
>>  +++ b/src/lxc/lxc_attach.c
>>  @@ -20,6 +20,7 @@
>>    * License along with this library; if not, write to the Free Software
>>    * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
>>    */
>>  +#define STACKSIZE 16384
>>
>>   #define _GNU_SOURCE
>>   #include <unistd.h>
>>  @@ -29,6 +30,7 @@
>>   #include <sys/param.h>
>>   #include <sys/types.h>
>>   #include <sys/wait.h>
>>  +#include <sched.h>
>>
>>   #include "commands.h"
>>   #include "arguments.h"
>>  @@ -56,14 +58,48 @@ Options :\n\
>>           .checker  = NULL,
>>   };
>>
>>  +static char *envp_global;
>>  +
>>  +int child_main()
>>  +{
>>  + struct passwd *passwd;
>>  + uid_t uid;
>>  +
>>  + if (my_args.argc) {
>>  + execve(my_args.argv[0], my_args.argv, envp_global);
>>  + SYSERROR("failed to exec '%s'", my_args.argv[0]);
>>  + return -1;
>>  + }
>>  +
>>  + uid = getuid();
>>  +
>>  + passwd = getpwuid(uid);
>>  + if (!passwd) {
>>  + SYSERROR("failed to get passwd " \
>>  + "entry for uid '%d'", uid);
>>  + return -1;
>>  + }
>>  +
>>  + {
>>  + char *const args[] = {
>>  + passwd->pw_shell,
>>  + NULL,
>>  + };
>>  +
>>  + execve(args[0], args, envp_global);
>>  + SYSERROR("failed to exec '%s'", args[0]);
>>  + return -1;
>>  + }
>>  +}
>>  +
>>   int main(int argc, char *argv[], char *envp[])
>>   {
>>           int ret;
>>           pid_t pid;
>>  - struct passwd *passwd;
>>  - uid_t uid;
>>           char *curdir;
>>
>>  + envp_global = envp;
>>  +
>>           ret = lxc_caps_init();
>>           if (ret)
>>                   return ret;
>>  @@ -96,7 +132,14 @@ int main(int argc, char *argv[], char *envp[])
>>
>>           free(curdir);
>>
>>  - pid = fork();
>>  + void **newstack;
>>  + newstack = (void **) malloc(STACKSIZE);
>>  + if (!newstack)
>>  + exit(newstack);
>>  +
>>  + newstack = (void **)(STACKSIZE + (char *)newstack);
>>  + *--newstack = 0;
>>  + pid = clone(child_main, newstack, CLONE_VM | CLONE_FS | CLONE_SIGHAND, 0);
>>
>>           if (pid < 0) {
>>                   SYSERROR("failed to fork");
>>  @@ -120,35 +163,5 @@ int main(int argc, char *argv[], char *envp[])
>>                   return -1;
>>           }
>>
>>  - if (!pid) {
>>  -
>>  - if (my_args.argc) {
>>  - execve(my_args.argv[0], my_args.argv, envp);
>>  - SYSERROR("failed to exec '%s'", my_args.argv[0]);
>>  - return -1;
>>  - }
>>  -
>>  - uid = getuid();
>>  -
>>  - passwd = getpwuid(uid);
>>  - if (!passwd) {
>>  - SYSERROR("failed to get passwd " \
>>  - "entry for uid '%d'", uid);
>>  - return -1;
>>  - }
>>  -
>>  - {
>>  - char *const args[] = {
>>  - passwd->pw_shell,
>>  - NULL,
>>  - };
>>  -
>>  - execve(args[0], args, envp);
>>  - SYSERROR("failed to exec '%s'", args[0]);
>>  - return -1;
>>  - }
>>  -
>>  - }
>>  -
>>           return 0;
>>   }
>>  diff --git a/src/lxc/lxc_init.c b/src/lxc/lxc_init.c
>>  index a534b51..7debd6f 100644
>>  --- a/src/lxc/lxc_init.c
>>  +++ b/src/lxc/lxc_init.c
>>  @@ -20,6 +20,7 @@
>>    * License along with this library; if not, write to the Free Software
>>    * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
>>    */
>>  +#define STACKSIZE 16384
>>
>>   #include <stdio.h>
>>   #include <unistd.h>
>>  @@ -32,6 +33,7 @@
>>   #include <sys/wait.h>
>>   #define _GNU_SOURCE
>>   #include <getopt.h>
>>  +#include <sched.h>
>>
>>   #include "log.h"
>>   #include "caps.h"
>>  @@ -40,6 +42,9 @@
>>
>>   lxc_log_define(lxc_init, lxc);
>>
>>  +static sigset_t mask, omask;
>>  +static char **aargv;
>>  +
>>   static int quiet;
>>
>>   static struct option options[] = {
>>  @@ -49,6 +54,23 @@ static struct option options[] = {
>>
>>   static int was_interrupted = 0;
>>
>>  +int child_main()
>>  +{
>>  + int i;
>>  + int err = -1;
>>  + /* restore default signal handlers */
>>  + for (i = 1; i < NSIG; i++)
>>  + signal(i, SIG_DFL);
>>  +
>>  + sigprocmask(SIG_SETMASK, &omask, NULL);
>>  +
>>  + NOTICE("about to exec '%s'", aargv[0]);
>>  +
>>  + execvp(aargv[0], aargv);
>>  + ERROR("failed to exec: '%s' : %m", aargv[0]);
>>  + exit(err);
>>  +}
>>  +
>>   int main(int argc, char *argv[])
>>   {
>>
>>  @@ -61,8 +83,6 @@ int main(int argc, char *argv[])
>>           pid_t pid;
>>           int nbargs = 0;
>>           int err = -1;
>>  - char **aargv;
>>  - sigset_t mask, omask;
>>           int i, shutdown = 0;
>>
>>           while (1) {
>>  @@ -115,25 +135,17 @@ int main(int argc, char *argv[])
>>           if (lxc_caps_reset())
>>                   exit(err);
>>
>>  - pid = fork();
>>  -
>>  - if (pid < 0)
>>  + void **newstack;
>>  + newstack = (void **) malloc(STACKSIZE);
>>  + if (!newstack)
>>                   exit(err);
>>
>>  - if (!pid) {
>>  -
>>  - /* restore default signal handlers */
>>  - for (i = 1; i < NSIG; i++)
>>  - signal(i, SIG_DFL);
>>  + newstack = (void **)(STACKSIZE + (char *)newstack);
>>  + *--newstack = 0;
>>  + pid = clone(child_main, newstack, CLONE_VM | CLONE_FS | CLONE_SIGHAND, 0);
>
> Passing CLONE_VM here means execvp() will flush the memory for both
> parent and child... This means the lxc-init code doesn't run anymore and
> this isn't acceptable, as it honors the child ripping work in an
> application container.

Yes... I understand it now.

>
>>  - sigprocmask(SIG_SETMASK, &omask, NULL);
>>  -
>>  - NOTICE("about to exec '%s'", aargv[0]);
>>  -
>>  - execvp(aargv[0], aargv);
>>  - ERROR("failed to exec: '%s' : %m", aargv[0]);
>>  + if (pid < 0)
>>                   exit(err);
>>  - }
>>
>>           /* let's process the signals now */
>>           sigdelset(&omask, SIGALRM);
> --
> Gregory Kurz                                     gkurz at fr.ibm.com
> Software Engineer @ IBM/Meiosys                  http://www.ibm.com
> Tel +33 (0)534 638 479                           Fax +33 (0)561 400 420
>
> "Anarchy is about taking complete responsibility for yourself."
>        Alan Moore.

-- 
С уважением, Смирнов Владимир.
http://staff.yandex-team.ru/civil




More information about the lxc-devel mailing list