user-chroot: Add --unshare-pid, --unshare-net, and --mount-proc
To use CLONE_NEWPID we have to actually call clone() because it's not supported by unshare(). To enable CLONE_NEWPID to be useful, we have to allow creating a new proc mount rather than binding an existing one.
This commit is contained in:
parent
fbb09d71a6
commit
15d23546ff
|
|
@ -37,6 +37,8 @@
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/prctl.h>
|
#include <sys/prctl.h>
|
||||||
#include <sys/mount.h>
|
#include <sys/mount.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
#include <linux/securebits.h>
|
#include <linux/securebits.h>
|
||||||
#include <sched.h>
|
#include <sched.h>
|
||||||
|
|
||||||
|
|
@ -65,24 +67,30 @@ fatal_errno (const char *message)
|
||||||
exit (1);
|
exit (1);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct _BindMount BindMount;
|
typedef enum {
|
||||||
struct _BindMount {
|
MOUNT_SPEC_BIND,
|
||||||
|
MOUNT_SPEC_READONLY,
|
||||||
|
MOUNT_SPEC_PROCFS
|
||||||
|
} MountSpecType;
|
||||||
|
|
||||||
|
typedef struct _MountSpec MountSpec;
|
||||||
|
struct _MountSpec {
|
||||||
|
MountSpecType type;
|
||||||
|
|
||||||
const char *source;
|
const char *source;
|
||||||
const char *dest;
|
const char *dest;
|
||||||
|
|
||||||
unsigned int readonly;
|
MountSpec *next;
|
||||||
|
|
||||||
BindMount *next;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static BindMount *
|
static MountSpec *
|
||||||
reverse_bind_mount_list (BindMount *mount)
|
reverse_mount_list (MountSpec *mount)
|
||||||
{
|
{
|
||||||
BindMount *prev = NULL;
|
MountSpec *prev = NULL;
|
||||||
|
|
||||||
while (mount)
|
while (mount)
|
||||||
{
|
{
|
||||||
BindMount *next = mount->next;
|
MountSpec *next = mount->next;
|
||||||
mount->next = prev;
|
mount->next = prev;
|
||||||
prev = mount;
|
prev = mount;
|
||||||
mount = next;
|
mount = next;
|
||||||
|
|
@ -104,10 +112,14 @@ main (int argc,
|
||||||
unsigned int n_mounts = 0;
|
unsigned int n_mounts = 0;
|
||||||
const unsigned int max_mounts = 50; /* Totally arbitrary... */
|
const unsigned int max_mounts = 50; /* Totally arbitrary... */
|
||||||
char **program_argv;
|
char **program_argv;
|
||||||
BindMount *bind_mounts = NULL;
|
MountSpec *bind_mounts = NULL;
|
||||||
BindMount *bind_mount_iter;
|
MountSpec *bind_mount_iter;
|
||||||
int unshare_ipc = 0;
|
int unshare_ipc = 0;
|
||||||
int unshare_flags = 0;
|
int unshare_net = 0;
|
||||||
|
int unshare_pid = 0;
|
||||||
|
int clone_flags = 0;
|
||||||
|
int child_status = 0;
|
||||||
|
pid_t child;
|
||||||
|
|
||||||
if (argc <= 0)
|
if (argc <= 0)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
@ -123,7 +135,7 @@ main (int argc,
|
||||||
while (after_mount_arg_index < argc)
|
while (after_mount_arg_index < argc)
|
||||||
{
|
{
|
||||||
const char *arg = argv[after_mount_arg_index];
|
const char *arg = argv[after_mount_arg_index];
|
||||||
BindMount *mount = NULL;
|
MountSpec *mount = NULL;
|
||||||
|
|
||||||
if (n_mounts >= max_mounts)
|
if (n_mounts >= max_mounts)
|
||||||
fatal ("Too many mounts (maximum of %u)", n_mounts);
|
fatal ("Too many mounts (maximum of %u)", n_mounts);
|
||||||
|
|
@ -134,10 +146,10 @@ main (int argc,
|
||||||
if ((argc - after_mount_arg_index) < 3)
|
if ((argc - after_mount_arg_index) < 3)
|
||||||
fatal ("--mount-bind takes two arguments");
|
fatal ("--mount-bind takes two arguments");
|
||||||
|
|
||||||
mount = malloc (sizeof (BindMount));
|
mount = malloc (sizeof (MountSpec));
|
||||||
|
mount->type = MOUNT_SPEC_BIND;
|
||||||
mount->source = argv[after_mount_arg_index+1];
|
mount->source = argv[after_mount_arg_index+1];
|
||||||
mount->dest = argv[after_mount_arg_index+2];
|
mount->dest = argv[after_mount_arg_index+2];
|
||||||
mount->readonly = 0;
|
|
||||||
mount->next = bind_mounts;
|
mount->next = bind_mounts;
|
||||||
|
|
||||||
bind_mounts = mount;
|
bind_mounts = mount;
|
||||||
|
|
@ -145,15 +157,31 @@ main (int argc,
|
||||||
}
|
}
|
||||||
else if (strcmp (arg, "--mount-readonly") == 0)
|
else if (strcmp (arg, "--mount-readonly") == 0)
|
||||||
{
|
{
|
||||||
BindMount *mount;
|
MountSpec *mount;
|
||||||
|
|
||||||
if ((argc - after_mount_arg_index) < 2)
|
if ((argc - after_mount_arg_index) < 2)
|
||||||
fatal ("--mount-readonly takes one argument");
|
fatal ("--mount-readonly takes one argument");
|
||||||
|
|
||||||
mount = malloc (sizeof (BindMount));
|
mount = malloc (sizeof (MountSpec));
|
||||||
|
mount->type = MOUNT_SPEC_READONLY;
|
||||||
|
mount->source = NULL;
|
||||||
|
mount->dest = argv[after_mount_arg_index+1];
|
||||||
|
mount->next = bind_mounts;
|
||||||
|
|
||||||
|
bind_mounts = mount;
|
||||||
|
after_mount_arg_index += 2;
|
||||||
|
}
|
||||||
|
else if (strcmp (arg, "--mount-proc") == 0)
|
||||||
|
{
|
||||||
|
MountSpec *mount;
|
||||||
|
|
||||||
|
if ((argc - after_mount_arg_index) < 2)
|
||||||
|
fatal ("--mount-proc takes one argument");
|
||||||
|
|
||||||
|
mount = malloc (sizeof (MountSpec));
|
||||||
|
mount->type = MOUNT_SPEC_PROCFS;
|
||||||
mount->source = NULL;
|
mount->source = NULL;
|
||||||
mount->dest = argv[after_mount_arg_index+1];
|
mount->dest = argv[after_mount_arg_index+1];
|
||||||
mount->readonly = 1;
|
|
||||||
mount->next = bind_mounts;
|
mount->next = bind_mounts;
|
||||||
|
|
||||||
bind_mounts = mount;
|
bind_mounts = mount;
|
||||||
|
|
@ -164,14 +192,24 @@ main (int argc,
|
||||||
unshare_ipc = 1;
|
unshare_ipc = 1;
|
||||||
after_mount_arg_index += 1;
|
after_mount_arg_index += 1;
|
||||||
}
|
}
|
||||||
|
else if (strcmp (arg, "--unshare-pid") == 0)
|
||||||
|
{
|
||||||
|
unshare_pid = 1;
|
||||||
|
after_mount_arg_index += 1;
|
||||||
|
}
|
||||||
|
else if (strcmp (arg, "--unshare-net") == 0)
|
||||||
|
{
|
||||||
|
unshare_net = 1;
|
||||||
|
after_mount_arg_index += 1;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
bind_mounts = reverse_bind_mount_list (bind_mounts);
|
bind_mounts = reverse_mount_list (bind_mounts);
|
||||||
|
|
||||||
if ((argc - after_mount_arg_index) < 2)
|
if ((argc - after_mount_arg_index) < 2)
|
||||||
fatal ("usage: %s [--unshare-ipc] [--unshare-pid] [--mount-readonly DIR] [--mount-bind SOURCE DEST] ROOTDIR PROGRAM ARGS...", argv0);
|
fatal ("usage: %s [--unshare-ipc] [--unshare-pid] [--unshare-net] [--mount-proc DIR] [--mount-readonly DIR] [--mount-bind SOURCE DEST] ROOTDIR PROGRAM ARGS...", argv0);
|
||||||
chroot_dir = argv[after_mount_arg_index];
|
chroot_dir = argv[after_mount_arg_index];
|
||||||
program = argv[after_mount_arg_index+1];
|
program = argv[after_mount_arg_index+1];
|
||||||
program_argv = argv + after_mount_arg_index + 1;
|
program_argv = argv + after_mount_arg_index + 1;
|
||||||
|
|
@ -186,79 +224,117 @@ main (int argc,
|
||||||
if (rgid == 0)
|
if (rgid == 0)
|
||||||
rgid = ruid;
|
rgid = ruid;
|
||||||
|
|
||||||
/* Ensure we can't execute setuid programs. See prctl(2) and
|
/* CLONE_NEWNS makes it so that when we create bind mounts below,
|
||||||
* capabilities(7).
|
* we're only affecting our children, not the entire system. This
|
||||||
*
|
* way it's harmless to bind mount e.g. /proc over an arbitrary
|
||||||
* This closes the main historical reason why only uid 0 can
|
* directory.
|
||||||
* chroot(2) - because unprivileged users can create hard links to
|
|
||||||
* setuid binaries, and possibly confuse them into looking at data
|
|
||||||
* (or loading libraries) that they don't expect, and thus elevating
|
|
||||||
* privileges.
|
|
||||||
*/
|
*/
|
||||||
if (prctl (PR_SET_SECUREBITS,
|
clone_flags = SIGCHLD | CLONE_NEWNS;
|
||||||
SECBIT_NOROOT | SECBIT_NOROOT_LOCKED) < 0)
|
/* CLONE_NEWIPC and CLONE_NEWUTS are avenues of communication that
|
||||||
fatal_errno ("prctl (SECBIT_NOROOT)");
|
* might leak outside the container; any IPC can be done by setting
|
||||||
|
* up a bind mount and using files or sockets there, if desired.
|
||||||
/* This call makes it so that when we create bind mounts, we're only
|
|
||||||
* affecting our children, not the entire system. This way it's
|
|
||||||
* harmless to bind mount e.g. /proc over an arbitrary directory.
|
|
||||||
*/
|
*/
|
||||||
unshare_flags = CLONE_NEWNS;
|
|
||||||
if (unshare_ipc)
|
if (unshare_ipc)
|
||||||
unshare_flags |= CLONE_NEWIPC | CLONE_NEWUTS;
|
clone_flags |= (CLONE_NEWIPC | CLONE_NEWUTS);
|
||||||
if (unshare (unshare_flags) < 0)
|
/* CLONE_NEWPID helps ensure random build or test scripts don't kill
|
||||||
fatal_errno ("unshare");
|
* processes outside of the container.
|
||||||
|
|
||||||
/* This is necessary to undo the damage "sandbox" creates on Fedora
|
|
||||||
* by making / a shared mount instead of private. This isn't
|
|
||||||
* totally correct because the targets for our bind mounts may still
|
|
||||||
* be shared, but really, Fedora's sandbox is broken.
|
|
||||||
*/
|
*/
|
||||||
if (mount ("/", "/", "none", MS_PRIVATE | MS_REC, NULL) < 0)
|
if (unshare_pid)
|
||||||
fatal_errno ("mount(/, MS_PRIVATE | MS_REC)");
|
clone_flags |= CLONE_NEWPID;
|
||||||
|
|
||||||
/* Now let's set up our bind mounts */
|
/* Isolated networking */
|
||||||
for (bind_mount_iter = bind_mounts; bind_mount_iter; bind_mount_iter = bind_mount_iter->next)
|
if (unshare_net)
|
||||||
|
clone_flags |= CLONE_NEWNET;
|
||||||
|
|
||||||
|
if ((child = syscall (__NR_clone, clone_flags, NULL)) < 0)
|
||||||
|
perror ("clone");
|
||||||
|
|
||||||
|
if (child == 0)
|
||||||
{
|
{
|
||||||
char *dest;
|
/* Ensure we can't execute setuid programs. See prctl(2) and
|
||||||
|
* capabilities(7).
|
||||||
|
*
|
||||||
|
* This closes the main historical reason why only uid 0 can
|
||||||
|
* chroot(2) - because unprivileged users can create hard links to
|
||||||
|
* setuid binaries, and possibly confuse them into looking at data
|
||||||
|
* (or loading libraries) that they don't expect, and thus elevating
|
||||||
|
* privileges.
|
||||||
|
*/
|
||||||
|
if (prctl (PR_SET_SECUREBITS,
|
||||||
|
SECBIT_NOROOT | SECBIT_NOROOT_LOCKED) < 0)
|
||||||
|
fatal_errno ("prctl (SECBIT_NOROOT)");
|
||||||
|
|
||||||
asprintf (&dest, "%s%s", chroot_dir, bind_mount_iter->dest);
|
/* This is necessary to undo the damage "sandbox" creates on Fedora
|
||||||
|
* by making / a shared mount instead of private. This isn't
|
||||||
|
* totally correct because the targets for our bind mounts may still
|
||||||
|
* be shared, but really, Fedora's sandbox is broken.
|
||||||
|
*/
|
||||||
|
if (mount ("/", "/", "none", MS_PRIVATE | MS_REC, NULL) < 0)
|
||||||
|
fatal_errno ("mount(/, MS_PRIVATE | MS_REC)");
|
||||||
|
|
||||||
if (bind_mount_iter->readonly)
|
/* Now let's set up our bind mounts */
|
||||||
|
for (bind_mount_iter = bind_mounts; bind_mount_iter; bind_mount_iter = bind_mount_iter->next)
|
||||||
{
|
{
|
||||||
if (mount (dest, dest,
|
char *dest;
|
||||||
NULL, MS_BIND | MS_PRIVATE, NULL) < 0)
|
|
||||||
fatal_errno ("mount (MS_BIND)");
|
asprintf (&dest, "%s%s", chroot_dir, bind_mount_iter->dest);
|
||||||
if (mount (dest, dest,
|
|
||||||
NULL, MS_BIND | MS_PRIVATE | MS_REMOUNT | MS_RDONLY, NULL) < 0)
|
if (bind_mount_iter->type == MOUNT_SPEC_READONLY)
|
||||||
fatal_errno ("mount (MS_BIND | MS_RDONLY)");
|
{
|
||||||
|
if (mount (dest, dest,
|
||||||
|
NULL, MS_BIND | MS_PRIVATE, NULL) < 0)
|
||||||
|
fatal_errno ("mount (MS_BIND)");
|
||||||
|
if (mount (dest, dest,
|
||||||
|
NULL, MS_BIND | MS_PRIVATE | MS_REMOUNT | MS_RDONLY, NULL) < 0)
|
||||||
|
fatal_errno ("mount (MS_BIND | MS_RDONLY)");
|
||||||
|
}
|
||||||
|
else if (bind_mount_iter->type == MOUNT_SPEC_BIND)
|
||||||
|
{
|
||||||
|
if (mount (bind_mount_iter->source, dest,
|
||||||
|
NULL, MS_BIND | MS_PRIVATE, NULL) < 0)
|
||||||
|
fatal_errno ("mount (MS_BIND)");
|
||||||
|
}
|
||||||
|
else if (bind_mount_iter->type == MOUNT_SPEC_PROCFS)
|
||||||
|
{
|
||||||
|
if (mount ("proc", dest,
|
||||||
|
"proc", MS_MGC_VAL | MS_PRIVATE, NULL) < 0)
|
||||||
|
fatal_errno ("mount (\"proc\")");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
assert (0);
|
||||||
|
free (dest);
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
/* Actually perform the chroot. */
|
||||||
|
if (chroot (chroot_dir) < 0)
|
||||||
|
fatal_errno ("chroot");
|
||||||
|
if (chdir ("/") < 0)
|
||||||
|
fatal_errno ("chdir");
|
||||||
|
|
||||||
if (mount (bind_mount_iter->source, dest,
|
/* Switch back to the uid of our invoking process. These calls are
|
||||||
NULL, MS_BIND | MS_PRIVATE, NULL) < 0)
|
* irrevocable - see setuid(2) */
|
||||||
fatal_errno ("mount (MS_BIND)");
|
if (setgid (rgid) < 0)
|
||||||
}
|
fatal_errno ("setgid");
|
||||||
free (dest);
|
if (setuid (ruid) < 0)
|
||||||
|
fatal_errno ("setuid");
|
||||||
|
|
||||||
|
if (execv (program, program_argv) < 0)
|
||||||
|
fatal_errno ("execv");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Actually perform the chroot. */
|
/* Let's also setuid back in the parent - there's no reason to stay uid 0, and
|
||||||
if (chroot (chroot_dir) < 0)
|
* it's just better to drop privileges. */
|
||||||
fatal_errno ("chroot");
|
|
||||||
if (chdir ("/") < 0)
|
|
||||||
fatal_errno ("chdir");
|
|
||||||
|
|
||||||
/* Switch back to the uid of our invoking process. These calls are
|
|
||||||
* irrevocable - see setuid(2) */
|
|
||||||
if (setgid (rgid) < 0)
|
if (setgid (rgid) < 0)
|
||||||
fatal_errno ("setgid");
|
fatal_errno ("setgid");
|
||||||
if (setuid (ruid) < 0)
|
if (setuid (ruid) < 0)
|
||||||
fatal_errno ("setuid");
|
fatal_errno ("setuid");
|
||||||
|
|
||||||
/* Finally, run the given child program. */
|
/* Kind of lame to sit around blocked in waitpid, but oh well. */
|
||||||
if (execv (program, program_argv) < 0)
|
if (waitpid (child, &child_status, 0) < 0)
|
||||||
fatal_errno ("execv");
|
fatal_errno ("waitpid");
|
||||||
|
|
||||||
return 1;
|
if (WIFEXITED (child_status))
|
||||||
|
return WEXITSTATUS (child_status);
|
||||||
|
else
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue