[lxc-devel] [PATCH 1/2] Modify lxc_attach and lxc_init to use clone instead of fork.
Smirnov Vladimir
civil at yandex-team.ru
Thu Aug 25 08:11:55 UTC 2011
25.08.2011, 02:07, "Greg Kurz" <gkurz at fr.ibm.com>:
> On Wed, 2011-08-24 at 14:17 +0400, Vladimir Smirnov wrote:
>
>> Clone allows more flexible control. Currently, if there is any inherited fd,
>> lxc-start exits with error. With clone it's possible not to pass open fd's to childs.
>
> Hmm... when it comes to file descriptors, you have two flavours:
> - CLONE_FILES: the child will share the file descriptor table with the
> parent. If any of the two tasks open or close a fd, the other will also
> see the change.
> - without CLONE_FILES: the child will get a copy of the parent's file
> descriptor table. Then, each task may open or close a fd without effect
> for the other.
>
> As you see, the child shares or at least inherits a copy of all the fd's
> with its parent. Honestly, I don't know any clone flag that would
> prevent the child to be passed a fd...
Yes. I've missunderstood it. Maybe change back to fork, but 1-st action of
child should be closing all fd's except of whitelist? As it was before
rev 80090207defda9adfc922555b359b11acdad1401?
>
>> Signed-off-by: Vladimir Smirnov <civil at yandex-team.ru>
>> ---
>> src/lxc/lxc_attach.c | 79 +++++++++++++++++++++++++++++---------------------
>> src/lxc/lxc_init.c | 46 ++++++++++++++++++-----------
>> 2 files changed, 75 insertions(+), 50 deletions(-)
>>
>> diff --git a/src/lxc/lxc_attach.c b/src/lxc/lxc_attach.c
>
> Why do you need to patch lxc-attach ? AFAIK it doesn't check for
> inherited file descriptors...
No need, yes.
>
>> index ed3d5a4..930865e 100644
>> --- a/src/lxc/lxc_attach.c
>> +++ b/src/lxc/lxc_attach.c
>> @@ -20,6 +20,7 @@
>> * License along with this library; if not, write to the Free Software
>> * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
>> */
>> +#define STACKSIZE 16384
>>
>> #define _GNU_SOURCE
>> #include <unistd.h>
>> @@ -29,6 +30,7 @@
>> #include <sys/param.h>
>> #include <sys/types.h>
>> #include <sys/wait.h>
>> +#include <sched.h>
>>
>> #include "commands.h"
>> #include "arguments.h"
>> @@ -56,14 +58,48 @@ Options :\n\
>> .checker = NULL,
>> };
>>
>> +static char *envp_global;
>> +
>> +int child_main()
>> +{
>> + struct passwd *passwd;
>> + uid_t uid;
>> +
>> + if (my_args.argc) {
>> + execve(my_args.argv[0], my_args.argv, envp_global);
>> + SYSERROR("failed to exec '%s'", my_args.argv[0]);
>> + return -1;
>> + }
>> +
>> + uid = getuid();
>> +
>> + passwd = getpwuid(uid);
>> + if (!passwd) {
>> + SYSERROR("failed to get passwd " \
>> + "entry for uid '%d'", uid);
>> + return -1;
>> + }
>> +
>> + {
>> + char *const args[] = {
>> + passwd->pw_shell,
>> + NULL,
>> + };
>> +
>> + execve(args[0], args, envp_global);
>> + SYSERROR("failed to exec '%s'", args[0]);
>> + return -1;
>> + }
>> +}
>> +
>> int main(int argc, char *argv[], char *envp[])
>> {
>> int ret;
>> pid_t pid;
>> - struct passwd *passwd;
>> - uid_t uid;
>> char *curdir;
>>
>> + envp_global = envp;
>> +
>> ret = lxc_caps_init();
>> if (ret)
>> return ret;
>> @@ -96,7 +132,14 @@ int main(int argc, char *argv[], char *envp[])
>>
>> free(curdir);
>>
>> - pid = fork();
>> + void **newstack;
>> + newstack = (void **) malloc(STACKSIZE);
>> + if (!newstack)
>> + exit(newstack);
>> +
>> + newstack = (void **)(STACKSIZE + (char *)newstack);
>> + *--newstack = 0;
>> + pid = clone(child_main, newstack, CLONE_VM | CLONE_FS | CLONE_SIGHAND, 0);
>>
>> if (pid < 0) {
>> SYSERROR("failed to fork");
>> @@ -120,35 +163,5 @@ int main(int argc, char *argv[], char *envp[])
>> return -1;
>> }
>>
>> - if (!pid) {
>> -
>> - if (my_args.argc) {
>> - execve(my_args.argv[0], my_args.argv, envp);
>> - SYSERROR("failed to exec '%s'", my_args.argv[0]);
>> - return -1;
>> - }
>> -
>> - uid = getuid();
>> -
>> - passwd = getpwuid(uid);
>> - if (!passwd) {
>> - SYSERROR("failed to get passwd " \
>> - "entry for uid '%d'", uid);
>> - return -1;
>> - }
>> -
>> - {
>> - char *const args[] = {
>> - passwd->pw_shell,
>> - NULL,
>> - };
>> -
>> - execve(args[0], args, envp);
>> - SYSERROR("failed to exec '%s'", args[0]);
>> - return -1;
>> - }
>> -
>> - }
>> -
>> return 0;
>> }
>> diff --git a/src/lxc/lxc_init.c b/src/lxc/lxc_init.c
>> index a534b51..7debd6f 100644
>> --- a/src/lxc/lxc_init.c
>> +++ b/src/lxc/lxc_init.c
>> @@ -20,6 +20,7 @@
>> * License along with this library; if not, write to the Free Software
>> * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
>> */
>> +#define STACKSIZE 16384
>>
>> #include <stdio.h>
>> #include <unistd.h>
>> @@ -32,6 +33,7 @@
>> #include <sys/wait.h>
>> #define _GNU_SOURCE
>> #include <getopt.h>
>> +#include <sched.h>
>>
>> #include "log.h"
>> #include "caps.h"
>> @@ -40,6 +42,9 @@
>>
>> lxc_log_define(lxc_init, lxc);
>>
>> +static sigset_t mask, omask;
>> +static char **aargv;
>> +
>> static int quiet;
>>
>> static struct option options[] = {
>> @@ -49,6 +54,23 @@ static struct option options[] = {
>>
>> static int was_interrupted = 0;
>>
>> +int child_main()
>> +{
>> + int i;
>> + int err = -1;
>> + /* restore default signal handlers */
>> + for (i = 1; i < NSIG; i++)
>> + signal(i, SIG_DFL);
>> +
>> + sigprocmask(SIG_SETMASK, &omask, NULL);
>> +
>> + NOTICE("about to exec '%s'", aargv[0]);
>> +
>> + execvp(aargv[0], aargv);
>> + ERROR("failed to exec: '%s' : %m", aargv[0]);
>> + exit(err);
>> +}
>> +
>> int main(int argc, char *argv[])
>> {
>>
>> @@ -61,8 +83,6 @@ int main(int argc, char *argv[])
>> pid_t pid;
>> int nbargs = 0;
>> int err = -1;
>> - char **aargv;
>> - sigset_t mask, omask;
>> int i, shutdown = 0;
>>
>> while (1) {
>> @@ -115,25 +135,17 @@ int main(int argc, char *argv[])
>> if (lxc_caps_reset())
>> exit(err);
>>
>> - pid = fork();
>> -
>> - if (pid < 0)
>> + void **newstack;
>> + newstack = (void **) malloc(STACKSIZE);
>> + if (!newstack)
>> exit(err);
>>
>> - if (!pid) {
>> -
>> - /* restore default signal handlers */
>> - for (i = 1; i < NSIG; i++)
>> - signal(i, SIG_DFL);
>> + newstack = (void **)(STACKSIZE + (char *)newstack);
>> + *--newstack = 0;
>> + pid = clone(child_main, newstack, CLONE_VM | CLONE_FS | CLONE_SIGHAND, 0);
>
> Passing CLONE_VM here means execvp() will flush the memory for both
> parent and child... This means the lxc-init code doesn't run anymore and
> this isn't acceptable, as it honors the child ripping work in an
> application container.
Yes... I understand it now.
>
>> - sigprocmask(SIG_SETMASK, &omask, NULL);
>> -
>> - NOTICE("about to exec '%s'", aargv[0]);
>> -
>> - execvp(aargv[0], aargv);
>> - ERROR("failed to exec: '%s' : %m", aargv[0]);
>> + if (pid < 0)
>> exit(err);
>> - }
>>
>> /* let's process the signals now */
>> sigdelset(&omask, SIGALRM);
> --
> Gregory Kurz gkurz at fr.ibm.com
> Software Engineer @ IBM/Meiosys http://www.ibm.com
> Tel +33 (0)534 638 479 Fax +33 (0)561 400 420
>
> "Anarchy is about taking complete responsibility for yourself."
> Alan Moore.
--
С уважением, Смирнов Владимир.
http://staff.yandex-team.ru/civil
More information about the lxc-devel
mailing list