From 2c1fe0df11ae0f66392b8abb6f62430d79305538 Mon Sep 17 00:00:00 2001 From: Maxim Cournoyer Date: Fri, 24 Oct 2025 16:06:12 +0900 Subject: [PATCH] Use mmap for the elf parser, reducing memory usage. The `file->bytevector' new procedure uses a memory mapped bytevector, so parsing the ELF file reads only the sections needed, not the whole file. * guix/scripts/pack.scm (wrapped-package): Use file->bytevector. * guix/build/gremlin.scm (file-dynamic-info): Likewise. (validate-needed-in-runpath): Likewise. (strip-runpath): Likewise, and write to bytevector directly, avoiding a port. (set-file-runpath): Likewise. * tests/gremlin.scm (read-elf): Delete procedure. ("elf-dynamic-info-needed, executable"): Use file-dynamic-info. ("strip-runpath"): Likewise. ("elf-dynamic-info-soname"): Likewise. guix/build/debug-link.scm (set-debuglink-crc): Use file->bytevector. * tests/debug-link.scm (read-elf): Delete procedure. ("elf-debuglink"): Rename to... ("elf-debuglink, no .gnu_debuglink section"): ... this. ("elf-debuglink", "set-debuglink-crc"): Use external store, and adjust to use file->bytevector. * gnu/packages/gnuzilla.scm (icecat-minimal) [#:phases] {build-sandbox-whitelist}: Use `file-runpath'. * gnu/packages/librewolf.scm (librewolf): Likewise. Fixes: Fixes: #1262 Change-Id: I43b77ed0cdc38994ea89d3d401e0d136aa6b187a --- gnu/packages/gnuzilla.scm | 8 +- gnu/packages/librewolf.scm | 6 +- guix/build/debug-link.scm | 17 ++-- guix/build/gremlin.scm | 62 ++++++------ guix/scripts/pack.scm | 13 +-- tests/debug-link.scm | 187 +++++++++++++++++++------------------ tests/gremlin.scm | 18 +--- 7 files changed, 147 insertions(+), 164 deletions(-) diff --git a/gnu/packages/gnuzilla.scm b/gnu/packages/gnuzilla.scm index 259f9a6fc6e..d24797b85a0 100644 --- a/gnu/packages/gnuzilla.scm +++ b/gnu/packages/gnuzilla.scm @@ -996,16 +996,10 @@ preferences/advanced-scripts.dtd" (search-input-file inputs "lib/libavcodec.so"))))) (add-after 'fix-ffmpeg-runtime-linker 'build-sandbox-whitelist (lambda* (#:key inputs #:allow-other-keys) - (define (runpath-of lib) - (call-with-input-file lib - (compose elf-dynamic-info-runpath - elf-dynamic-info - parse-elf - get-bytevector-all))) (define (runpaths-of-input label) (let* ((dir (string-append (assoc-ref inputs label) "/lib")) (libs (find-files dir "\\.so$"))) - (append-map runpath-of libs))) + (append-map file-runpath libs))) ;; Populate the sandbox read-path whitelist as needed by ffmpeg. (let* ((whitelist (map (cut string-append <> "/") diff --git a/gnu/packages/librewolf.scm b/gnu/packages/librewolf.scm index f8800b19252..6c852d7f1c2 100644 --- a/gnu/packages/librewolf.scm +++ b/gnu/packages/librewolf.scm @@ -530,15 +530,11 @@ ;; The following two functions are from Guix's icecat package in ;; (gnu packages gnuzilla). See commit ;; b7a0935420ee630a29b7e5ac73a32ba1eb24f00b. - (define (runpath-of lib) - (call-with-input-file lib - (compose elf-dynamic-info-runpath elf-dynamic-info - parse-elf get-bytevector-all))) (define (runpaths-of-input label) (let* ((dir (string-append (assoc-ref inputs label) "/lib")) (libs (find-files dir "\\.so$"))) - (append-map runpath-of libs))) + (append-map file-runpath libs))) (let* ((out (assoc-ref outputs "out")) (lib (string-append out "/lib")) (libs (map diff --git a/guix/build/debug-link.scm b/guix/build/debug-link.scm index 80941df2fc7..7a74e6001b7 100644 --- a/guix/build/debug-link.scm +++ b/guix/build/debug-link.scm @@ -1,5 +1,6 @@ ;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2018, 2023 Ludovic Courtès +;;; Copyright © 2025 Maxim Cournoyer ;;; ;;; This file is part of GNU Guix. ;;; @@ -18,6 +19,7 @@ (define-module (guix build debug-link) #:use-module (guix elf) + #:use-module (guix build io) #:use-module ((guix build utils) #:select (find-files elf-file? make-file-writable)) #:use-module (rnrs bytevectors) @@ -147,16 +149,13 @@ Return #f for both if ELF lacks a '.gnu_debuglink' section." (define (set-debuglink-crc file debug-file) "Compute the CRC of DEBUG-FILE and set it as the '.gnu_debuglink' CRC in FILE." - (let* ((elf (parse-elf (call-with-input-file file get-bytevector-all))) + (let* ((bv (file->bytevector file #:protection (logior PROT_READ PROT_WRITE))) + (elf (parse-elf bv)) (offset (elf-debuglink-crc-offset elf))) - (and offset - (let* ((crc (call-with-input-file debug-file debuglink-crc32)) - (bv (make-bytevector 4))) - (bytevector-u32-set! bv 0 crc (elf-byte-order elf)) - (let ((port (open file O_RDWR))) - (set-port-position! port offset) - (put-bytevector port bv) - (close-port port)))))) + (when offset + (let ((crc (call-with-input-file debug-file debuglink-crc32))) + (bytevector-u32-set! bv offset crc (elf-byte-order elf)) + (munmap bv))))) ;;; diff --git a/guix/build/gremlin.scm b/guix/build/gremlin.scm index 2a74d51dd91..2392a74a359 100644 --- a/guix/build/gremlin.scm +++ b/guix/build/gremlin.scm @@ -1,5 +1,6 @@ ;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2015, 2018, 2020 Ludovic Courtès +;;; Copyright © 2025 Maxim Cournoyer ;;; ;;; This file is part of GNU Guix. ;;; @@ -18,6 +19,7 @@ (define-module (guix build gremlin) #:use-module (guix elf) + #:use-module (guix build io) #:use-module ((guix build utils) #:select (store-file-name?)) #:use-module (ice-9 match) #:use-module (srfi srfi-1) @@ -248,9 +250,7 @@ string table if the type is a string." (define (file-dynamic-info file) "Return the record of FILE, or #f if FILE lacks dynamic info." - (call-with-input-file file - (lambda (port) - (elf-dynamic-info (parse-elf (get-bytevector-all port)))))) + (elf-dynamic-info (parse-elf (file->bytevector file)))) (define (file-runpath file) "Return the DT_RUNPATH dynamic entry of FILE as a list of strings, or #f if @@ -362,8 +362,7 @@ exceeds total size~%" (elf-segment-type segment)) #f))) - (let* ((elf (call-with-input-file file - (compose parse-elf get-bytevector-all))) + (let* ((elf (parse-elf (file->bytevector file))) (expand (cute expand-origin <> (dirname file))) (dyninfo (elf-dynamic-info elf))) (when dyninfo @@ -402,12 +401,13 @@ according to DT_NEEDED." needed))) runpath)) - (define port - (open-file file "r+b")) + (define bv (file->bytevector file #:protection + (logior PROT_READ PROT_WRITE))) - (catch #t + (dynamic-wind + (const #t) (lambda () - (let* ((elf (parse-elf (get-bytevector-all port))) + (let* ((elf (parse-elf bv)) (entries (dynamic-entries elf (dynamic-link-segment elf))) (needed (filter-map (lambda (entry) (and (= (dynamic-entry-type entry) @@ -425,15 +425,14 @@ according to DT_NEEDED." "~a: stripping RUNPATH to ~s (removed ~s)~%" file new (lset-difference string=? old new)) - (seek port (dynamic-entry-offset runpath) SEEK_SET) - (put-bytevector port (string->utf8 (string-join new ":"))) - (put-u8 port 0)) - (close-port port) + ;; Write to bytevector directly. + (let ((src (string->utf8 (string-append (string-join new ":") + "\0")))) + (bytevector-copy! src 0 bv (dynamic-entry-offset runpath) + (bytevector-length src)))) new)) - (lambda (key . args) - (false-if-exception (close-port port)) - (apply throw key args)))) - + (lambda () + (munmap bv)))) (define-condition-type &missing-runpath-error &elf-error missing-runpath-error? @@ -447,20 +446,18 @@ according to DT_NEEDED." "Set the value of the DT_RUNPATH dynamic entry of FILE, which must name an ELF file, to PATH, a list of strings. Raise a &missing-runpath-error or &runpath-too-long-error when appropriate." - (define (call-with-input+output-file file proc) - (let ((port (open-file file "r+b"))) - (guard (c (#t (close-port port) (raise c))) - (proc port) - (close-port port)))) - - (call-with-input+output-file file - (lambda (port) - (let* ((elf (parse-elf (get-bytevector-all port))) + (define bv (file->bytevector file #:protection + (logior PROT_READ PROT_WRITE))) + (dynamic-wind + (const #t) + (lambda () + (let* ((elf (parse-elf bv)) (entries (dynamic-entries elf (dynamic-link-segment elf))) (runpath (find (lambda (entry) (= DT_RUNPATH (dynamic-entry-type entry))) entries)) - (path (string->utf8 (string-join path ":")))) + (path (string->utf8 (string-append (string-join path ":") + "\0")))) (unless runpath (raise (condition (&missing-runpath-error (elf elf) (file file))))) @@ -473,10 +470,7 @@ ELF file, to PATH, a list of strings. Raise a &missing-runpath-error or (raise (condition (&runpath-too-long-error (elf #f #;elf) (file file))))) - (seek port (dynamic-entry-offset runpath) SEEK_SET) - (put-bytevector port path) - (put-u8 port 0))))) - -;;; Local Variables: -;;; eval: (put 'call-with-input+output-file 'scheme-indent-function 1) -;;; End: + (bytevector-copy! path 0 bv (dynamic-entry-offset runpath) + (bytevector-length path)))) + (lambda () + (munmap bv)))) diff --git a/guix/scripts/pack.scm b/guix/scripts/pack.scm index a6a7babf595..432e846bf4f 100644 --- a/guix/scripts/pack.scm +++ b/guix/scripts/pack.scm @@ -5,7 +5,7 @@ ;;; Copyright © 2018 Chris Marusich ;;; Copyright © 2018 Efraim Flashner ;;; Copyright © 2020 Tobias Geerinckx-Rice -;;; Copyright © 2020, 2021, 2022, 2023 Maxim Cournoyer +;;; Copyright © 2020-2023, 2025 Maxim Cournoyer ;;; Copyright © 2020 Eric Bavier ;;; Copyright © 2022 Alex Griffin ;;; Copyright © 2023 Graham James Addis @@ -1221,12 +1221,14 @@ libfakechroot.so and related ld.so machinery as a fallback." (define build (with-imported-modules (source-module-closure - '((guix build utils) + '((guix build io) + (guix build utils) (guix build union) (guix build gremlin) (guix elf))) #~(begin - (use-modules (guix build utils) + (use-modules (guix build io) + (guix build utils) ((guix build union) #:select (symlink-relative)) (guix elf) (guix build gremlin) @@ -1260,7 +1262,7 @@ libfakechroot.so and related ld.so machinery as a fallback." (match (find (lambda (segment) (= (elf-segment-type segment) PT_INTERP)) (elf-segments elf)) - (#f #f) ;maybe a .so + (#f #f) ;maybe a .so (segment (let ((bv (make-bytevector (- (elf-segment-memsz segment) 1)))) (bytevector-copy! (elf-bytes elf) @@ -1280,8 +1282,7 @@ libfakechroot.so and related ld.so machinery as a fallback." #$(if fakechroot? ;; TODO: Handle scripts by wrapping their interpreter. #~(if (elf-file? program) - (let* ((bv (call-with-input-file program - get-bytevector-all)) + (let* ((bv (file->bytevector program)) (elf (parse-elf bv)) (interp (elf-interpreter elf)) (gconv (and interp diff --git a/tests/debug-link.scm b/tests/debug-link.scm index a1ae4f141c0..7ccc054a5d9 100644 --- a/tests/debug-link.scm +++ b/tests/debug-link.scm @@ -1,5 +1,6 @@ ;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2018 Ludovic Courtès +;;; Copyright © 2025 Maxim Cournoyer ;;; ;;; This file is part of GNU Guix. ;;; @@ -20,12 +21,15 @@ #:use-module (guix elf) #:use-module (guix build utils) #:use-module (guix build debug-link) + #:use-module (guix build io) #:use-module (guix gexp) + #:use-module (guix modules) #:use-module (guix store) #:use-module (guix tests) #:use-module (guix monads) #:use-module (guix derivations) #:use-module (gnu packages bootstrap) + #:use-module ((gnu packages guile) #:select (guile-3.0)) #:use-module (srfi srfi-1) #:use-module (srfi srfi-26) #:use-module (srfi srfi-64) @@ -40,15 +44,12 @@ (_ #f))) -(define read-elf - (compose parse-elf get-bytevector-all)) - (test-begin "debug-link") (unless %guile-executable (test-skip 1)) -(test-assert "elf-debuglink" - (let ((elf (call-with-input-file %guile-executable read-elf))) +(test-assert "elf-debuglink, no .gnu_debuglink section" + (let ((elf (parse-elf (file->bytevector %guile-executable)))) (match (call-with-values (lambda () (elf-debuglink elf)) list) ((#f #f) ;no '.gnu_debuglink' section (pk 'no-debuglink #t)) @@ -56,95 +57,101 @@ (string-suffix? ".debug" file))))) ;; Since we need %BOOTSTRAP-GCC and co., we have to skip the following tests -;; when networking is unreachable because we'd fail to download it. -(unless (network-reachable?) (test-skip 1)) -(test-assertm "elf-debuglink" - ;; Check whether we can compute the CRC just like objcopy, and whether we - ;; can retrieve it. - (let* ((code (plain-file "test.c" "int main () { return 42; }")) - (exp (with-imported-modules '((guix build utils) - (guix build debug-link) - (guix elf)) - #~(begin - (use-modules (guix build utils) - (guix build debug-link) - (guix elf) - (rnrs io ports)) +;; when networking is unreachable because we'd fail to download it. Since +;; using mmap to load ELF more efficiently, we also need the regular Guile +;; package, as guile-bootstrap cannot resolve dynamic symbols. +(with-external-store store + (unless (and (network-reachable?) store) (test-skip 1)) + (test-assertm "elf-debuglink" + ;; Check whether we can compute the CRC just like objcopy, and whether we + ;; can retrieve it. + (let* ((code (plain-file "test.c" "int main () { return 42; }")) + (exp (with-imported-modules (source-module-closure + '((guix build io) + (guix build utils) + (guix build debug-link) + (guix elf))) + #~(begin + (use-modules (guix build io) + (guix build utils) + (guix build debug-link) + (guix elf) + (rnrs io ports)) - (define read-elf - (compose parse-elf get-bytevector-all)) + (define read-elf + (compose parse-elf file->bytevector)) - (setenv "PATH" (string-join '(#$%bootstrap-gcc - #$%bootstrap-binutils) - "/bin:" 'suffix)) - (invoke "gcc" "-O0" "-g" #$code "-o" "exe") - (copy-file "exe" "exe.debug") - (invoke "strip" "--only-keep-debug" "exe.debug") - (invoke "strip" "--strip-debug" "exe") - (invoke "objcopy" "--add-gnu-debuglink=exe.debug" - "exe") - (call-with-values (lambda () - (elf-debuglink - (call-with-input-file "exe" - read-elf))) - (lambda (file crc) - (call-with-output-file #$output - (lambda (port) - (let ((expected (call-with-input-file "exe.debug" - debuglink-crc32))) - (write (list file (= crc expected)) - port)))))))))) - (mlet* %store-monad ((drv (gexp->derivation "debuglink" exp)) - (x (built-derivations (list drv)))) - (call-with-input-file (derivation->output-path drv) - (lambda (port) - (return (match (read port) - (("exe.debug" #t) #t) - (x (pk 'fail x #f))))))))) + (setenv "PATH" (string-join '(#$%bootstrap-gcc + #$%bootstrap-binutils) + "/bin:" 'suffix)) + (invoke "gcc" "-O0" "-g" #$code "-o" "exe") + (copy-file "exe" "exe.debug") + (invoke "strip" "--only-keep-debug" "exe.debug") + (invoke "strip" "--strip-debug" "exe") + (invoke "objcopy" "--add-gnu-debuglink=exe.debug" + "exe") + (call-with-values (lambda () + (elf-debuglink (read-elf "exe"))) + (lambda (file crc) + (call-with-output-file #$output + (lambda (port) + (let ((expected (call-with-input-file "exe.debug" + debuglink-crc32))) + (write (list file (= crc expected)) + port)))))))))) + (mlet* %store-monad ((drv (gexp->derivation "debuglink" exp)) + (x (built-derivations (list drv)))) + (call-with-input-file (derivation->output-path drv) + (lambda (port) + (return (match (read port) + (("exe.debug" #t) #t) + (x (pk 'fail x #f))))))))) -(unless (network-reachable?) (test-skip 1)) -(test-assertm "set-debuglink-crc" - ;; Check whether 'set-debuglink-crc' successfully updates the CRC. - (let* ((code (plain-file "test.c" "int main () { return 42; }")) - (debug (plain-file "exe.debug" "a")) - (exp (with-imported-modules '((guix build utils) - (guix build debug-link) - (guix elf)) - #~(begin - (use-modules (guix build utils) - (guix build debug-link) - (guix elf) - (rnrs io ports)) + (unless (and (network-reachable?) store) (test-skip 1)) + (test-assertm "set-debuglink-crc" + ;; Check whether 'set-debuglink-crc' successfully updates the CRC. + (let* ((code (plain-file "test.c" "int main () { return 42; }")) + (debug (plain-file "exe.debug" "a")) + (exp (with-imported-modules (source-module-closure + '((guix build io) + (guix build utils) + (guix build debug-link) + (guix elf))) + #~(begin + (use-modules (guix build io) + (guix build utils) + (guix build debug-link) + (guix elf) + (rnrs io ports)) - (define read-elf - (compose parse-elf get-bytevector-all)) + (define read-elf + (compose parse-elf file->bytevector)) - (setenv "PATH" (string-join '(#$%bootstrap-gcc - #$%bootstrap-binutils) - "/bin:" 'suffix)) - (invoke "gcc" "-O0" "-g" #$code "-o" "exe") - (copy-file "exe" "exe.debug") - (invoke "strip" "--only-keep-debug" "exe.debug") - (invoke "strip" "--strip-debug" "exe") - (invoke "objcopy" "--add-gnu-debuglink=exe.debug" - "exe") - (set-debuglink-crc "exe" #$debug) - (call-with-values (lambda () - (elf-debuglink - (call-with-input-file "exe" - read-elf))) - (lambda (file crc) - (call-with-output-file #$output - (lambda (port) - (write (list file crc) port))))))))) - (mlet* %store-monad ((drv (gexp->derivation "debuglink" exp)) - (x (built-derivations (list drv)))) - (call-with-input-file (derivation->output-path drv) - (lambda (port) - (return (match (read port) - (("exe.debug" crc) - (= crc (debuglink-crc32 (open-input-string "a")))) - (x - (pk 'fail x #f))))))))) + (setenv "PATH" (string-join '(#$%bootstrap-gcc + #$%bootstrap-binutils) + "/bin:" 'suffix)) + (invoke "gcc" "-O0" "-g" #$code "-o" "exe") + (copy-file "exe" "exe.debug") + (invoke "strip" "--only-keep-debug" "exe.debug") + (invoke "strip" "--strip-debug" "exe") + (invoke "objcopy" "--add-gnu-debuglink=exe.debug" + "exe") + (set-debuglink-crc "exe" #$debug) + (call-with-values (lambda () + (elf-debuglink + (read-elf "exe"))) + (lambda (file crc) + (call-with-output-file #$output + (lambda (port) + (write (list file crc) port))))))))) + (mlet* %store-monad ((drv (gexp->derivation "debuglink" exp)) + (x (built-derivations (list drv)))) + (call-with-input-file (derivation->output-path drv) + (lambda (port) + (return (match (read port) + (("exe.debug" crc) + (= crc (debuglink-crc32 (open-input-string "a")))) + (x + (pk 'fail x #f)))))))))) (test-end "debug-link") diff --git a/tests/gremlin.scm b/tests/gremlin.scm index 280b1d88192..44237e2ad37 100644 --- a/tests/gremlin.scm +++ b/tests/gremlin.scm @@ -23,6 +23,7 @@ #:use-module (guix tests) #:use-module ((guix utils) #:select (call-with-temporary-directory target-aarch64?)) + #:use-module (guix build io) #:use-module (guix build utils) #:use-module (guix build gremlin) #:use-module (gnu packages bootstrap) @@ -44,9 +45,6 @@ (_ #f))) -(define read-elf - (compose parse-elf get-bytevector-all)) - (define c-compiler (or (which "gcc") (which "cc") (which "g++"))) @@ -55,8 +53,7 @@ (unless %guile-executable (test-skip 1)) (test-assert "elf-dynamic-info-needed, executable" - (let* ((elf (call-with-input-file %guile-executable read-elf)) - (dyninfo (elf-dynamic-info elf))) + (let ((dyninfo (file-dynamic-info %guile-executable))) (or (not dyninfo) ;static executable (lset<= string=? (list (string-append "libguile-" (effective-version)) @@ -140,9 +137,7 @@ (display "int main () { puts(\"hello\"); }" port))) (invoke c-compiler "t.c" "-Wl,--enable-new-dtags" "-Wl,-rpath=/foo" "-Wl,-rpath=/bar") - (let* ((dyninfo (elf-dynamic-info - (parse-elf (call-with-input-file "a.out" - get-bytevector-all)))) + (let* ((dyninfo (file-dynamic-info "a.out")) (old (elf-dynamic-info-runpath dyninfo)) (new (strip-runpath "a.out")) (new* (strip-runpath "a.out"))) @@ -196,10 +191,7 @@ (display "// empty file" port))) (invoke c-compiler "t.c" "-shared" "-Wl,-soname,libfoo.so.2") - (let* ((dyninfo (elf-dynamic-info - (parse-elf (call-with-input-file "a.out" - get-bytevector-all)))) - (soname (elf-dynamic-info-soname dyninfo))) - soname))))) + (let ((dyninfo (file-dynamic-info "a.out"))) + (elf-dynamic-info-soname dyninfo)))))) (test-end "gremlin")