From e0e64be8de3d220a12612b3a2e4aee428277d865 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Mon, 13 Oct 2025 10:39:21 +0200 Subject: [PATCH] linux-container: Remove #:lock-mounts? and related code. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commits 437bb9ece55f37d4b5a62cafc98c0c3b848a53ce and a57ed987ffd1452ba5a4d70feb54893e99b8e076, which were reported in guix/guix#1169 to occasionally cause errors like: guix shell: error: unshare : 268566528: Invalid argument --- gnu/build/linux-container.scm | 111 +++++++++++---------------------- gnu/system/linux-container.scm | 4 -- tests/containers.scm | 33 ++-------- 3 files changed, 41 insertions(+), 107 deletions(-) diff --git a/gnu/build/linux-container.scm b/gnu/build/linux-container.scm index b6f8563f7d0..0df51c390b5 100644 --- a/gnu/build/linux-container.scm +++ b/gnu/build/linux-container.scm @@ -190,10 +190,7 @@ for the process." (remount-read-only "/")))) (define* (initialize-user-namespace pid host-uids - #:key - (host-uid (getuid)) - (host-gid (getgid)) - (guest-uid 0) (guest-gid 0)) + #:key (guest-uid 0) (guest-gid 0)) "Configure the user namespace for PID. HOST-UIDS specifies the number of host user identifiers to map into the user namespace. GUEST-UID and GUEST-GID specify the first UID (respectively GID) that host UIDs (respectively GIDs) @@ -204,21 +201,24 @@ map to in the namespace." (define (scope file) (string-append proc-dir file)) - ;; Only root can write to the gid map without first disabling the - ;; setgroups syscall. - (unless (and (zero? host-uid) (zero? host-gid)) - (call-with-output-file (scope "/setgroups") - (lambda (port) - (display "deny" port)))) + (let ((uid (getuid)) + (gid (getgid))) - ;; Map the user/group that created the container to the root user - ;; within the container. - (call-with-output-file (scope "/uid_map") - (lambda (port) - (format port "~d ~d ~d" guest-uid host-uid host-uids))) - (call-with-output-file (scope "/gid_map") - (lambda (port) - (format port "~d ~d ~d" guest-gid host-gid host-uids)))) + ;; Only root can write to the gid map without first disabling the + ;; setgroups syscall. + (unless (and (zero? uid) (zero? gid)) + (call-with-output-file (scope "/setgroups") + (lambda (port) + (display "deny" port)))) + + ;; Map the user/group that created the container to the root user + ;; within the container. + (call-with-output-file (scope "/uid_map") + (lambda (port) + (format port "~d ~d ~d" guest-uid uid host-uids))) + (call-with-output-file (scope "/gid_map") + (lambda (port) + (format port "~d ~d ~d" guest-gid gid host-uids))))) (define (namespaces->bit-mask namespaces) "Return the number suitable for the 'flags' argument of 'clone' that @@ -239,14 +239,12 @@ corresponds to the symbols in NAMESPACES." #:key (guest-uid 0) (guest-gid 0) (populate-file-system (const #t)) (loopback-network? #t) - (lock-mounts? #t) writable-root?) "Run THUNK in a new container process and return its PID. ROOT specifies the root directory for the container. MOUNTS is a list of objects that specify file systems to mount inside the container. NAMESPACES is a list of symbols that correspond to the possible Linux namespaces: mnt, -ipc, uts, user, and net. When LOCK-MOUNTS? is true, arrange so that none of -MOUNTS can be unmounted or remounted individually from within THUNK. +ipc, uts, user, and net. When LOOPBACK-NETWORK? is true and 'net is amount NAMESPACES, set up the loopback device (\"lo\") and a minimal /etc/hosts. @@ -306,28 +304,6 @@ that host UIDs (respectively GIDs) map to in the namespace." ;; cannot be 'read' so they shouldn't be written as is. (write args child) (primitive-exit 3)))) - - (when (and lock-mounts? - (memq 'mnt namespaces) - (memq 'user namespaces)) - ;; Create a new mount namespace owned by a new user - ;; namespace to "lock" together previous mounts, such that - ;; they cannot be unmounted or remounted separately--see - ;; mount_namespaces(7). - ;; - ;; Note: at this point, the process is single-threaded (no - ;; GC mark threads, no finalization thread, etc.) which is - ;; why unshare(CLONE_NEWUSER) can be used. - (let ((uid (getuid)) (gid (getgid))) - (unshare (logior CLONE_NEWUSER CLONE_NEWNS)) - (when (file-exists? "/proc/self") - (initialize-user-namespace (getpid) - host-uids - #:host-uid uid - #:host-gid gid - #:guest-uid guest-uid - #:guest-gid guest-gid)))) - ;; TODO: Manage capabilities. (write 'ready child) (close-port child) @@ -400,7 +376,6 @@ if there are no child processes left." (define* (call-with-container mounts thunk #:key (namespaces %namespaces) (host-uids 1) (guest-uid 0) (guest-gid 0) - (lock-mounts? #t) (relayed-signals (list SIGINT SIGTERM)) (child-is-pid1? #t) (populate-file-system (const #t)) @@ -485,7 +460,6 @@ load path must be adjusted as needed." (call-with-temporary-directory (lambda (root) (let ((pid (run-container root mounts namespaces host-uids thunk* - #:lock-mounts? lock-mounts? #:guest-uid guest-uid #:guest-gid guest-gid #:populate-file-system populate-file-system @@ -506,35 +480,24 @@ return the exit status, an integer as returned by 'waitpid'." (0 (call-with-clean-exit (lambda () - ;; First, determine the user namespace that owns the pid namespace and - ;; join that user namespace (the assumption is that it also owns all - ;; the other namespaces). It's important that the user namespace is - ;; joined first, so that the user will have the privileges to join the - ;; other namespaces. - (let* ((pid-ns (open-fdes (namespace-file pid "pid") - (logior O_CLOEXEC O_RDONLY))) - (user-ns (get-user-ns pid-ns))) - (close-fdes pid-ns) - (unless (equal? (stat user-ns) - (stat (namespace-file (getpid) "user"))) - (setns user-ns 0)) - (close-fdes user-ns) - - ;; Then join all the remaining namespaces. - (for-each (lambda (ns) - (let ((source (namespace-file (getpid) ns)) - (target (namespace-file pid ns))) - ;; Joining the namespace that the process already - ;; belongs to would throw an error so avoid that. - ;; XXX: This /proc interface leads to TOCTTOU. - (unless (string=? (readlink source) (readlink target)) - (call-with-input-file target - (lambda (new-ns-port) - (setns (fileno new-ns-port) 0)))))) - ;; It's important that the mount namespace is joined last, - ;; otherwise the /proc mount point would no longer be - ;; accessible. - '("ipc" "uts" "net" "pid" "mnt"))) + (for-each (lambda (ns) + (let ((source (namespace-file (getpid) ns)) + (target (namespace-file pid ns))) + ;; Joining the namespace that the process already + ;; belongs to would throw an error so avoid that. + ;; XXX: This /proc interface leads to TOCTTOU. + (unless (string=? (readlink source) (readlink target)) + (call-with-input-file source + (lambda (current-ns-port) + (call-with-input-file target + (lambda (new-ns-port) + (setns (fileno new-ns-port) 0)))))))) + ;; It's important that the user namespace is joined first, + ;; so that the user will have the privileges to join the + ;; other namespaces. Furthermore, it's important that the + ;; mount namespace is joined last, otherwise the /proc mount + ;; point would no longer be accessible. + '("user" "ipc" "uts" "net" "pid" "mnt")) (purify-environment) (chdir "/") diff --git a/gnu/system/linux-container.scm b/gnu/system/linux-container.scm index d16d1e78b56..9bcdf24a7e0 100644 --- a/gnu/system/linux-container.scm +++ b/gnu/system/linux-container.scm @@ -317,10 +317,6 @@ Run the container with the given options.")) #:namespaces (if #$shared-network? (delq 'net %namespaces) %namespaces) - - ;; XXX: Work around . - #:lock-mounts? #f - #:writable-root? #t #:process-spawned-hook explain))))) diff --git a/tests/containers.scm b/tests/containers.scm index 6edea9631dc..1e915d517e8 100644 --- a/tests/containers.scm +++ b/tests/containers.scm @@ -1,6 +1,6 @@ ;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2015 David Thompson -;;; Copyright © 2016-2017, 2019, 2023, 2025 Ludovic Courtès +;;; Copyright © 2016, 2017, 2019, 2023 Ludovic Courtès ;;; ;;; This file is part of GNU Guix. ;;; @@ -110,26 +110,6 @@ (assert-exit (file-exists? "/testing"))) #:namespaces '(user mnt)))) -(skip-if-unsupported) -(test-equal "call-with-container, mnt namespace, locked mounts" - EINVAL - ;; umount(2) fails with EINVAL when targeting a mount point that is - ;; "locked". - (status:exit-val - (call-with-container (list (file-system - (device "none") - (mount-point "/testing") - (type "tmpfs") - (check? #f))) - (lambda () - (primitive-exit (catch 'system-error - (lambda () - (umount "/testing") - 0) - (lambda args - (system-error-errno args))))) - #:namespaces '(user mnt)))) - (skip-if-unsupported) (test-equal "call-with-container, mnt namespace, wrong bind mount" `(system-error ,ENOENT) @@ -189,8 +169,7 @@ #:namespaces '(user mnt)))) (skip-if-unsupported) -(test-equal "container-excursion" - 0 +(test-assert "container-excursion" (call-with-temporary-directory (lambda (root) ;; Two pipes: One for the container to signal that the test can begin, @@ -214,11 +193,7 @@ (readlink (string-append "/proc/" pid "/ns/" ns))) '("user" "ipc" "uts" "net" "pid" "mnt")))) - (let* ((pid (run-container root '() %namespaces 1 container - ;; Do not lock mounts so the user namespace - ;; appears to be the same seen from inside - ;; and from outside. - #:lock-mounts? #f)) + (let* ((pid (run-container root '() %namespaces 1 container)) (container-namespaces (namespaces pid)) (result (begin @@ -238,7 +213,7 @@ (write 'done end-out) (close end-out) (waitpid pid) - result)))))) + (zero? result))))))) (skip-if-unsupported) (test-equal "container-excursion, same namespaces"