From ad87b718eda20924b378fcf2d1ea2888a146caa6 Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Fri, 14 Nov 2025 22:26:20 +0100 Subject: [PATCH] import/cran: Reduce false positives in extracting imports. * tests/import/cran.scm: Add tests for extract-imports. * guix/import/cran.scm (extract-imports): New procedure, extracted from... (needed-test-inputs-in-directory): ...this procedure, which now uses it. (import-pattern): Update regex pattern. Change-Id: I07ac3f685ff08a0fa7da3c25cf1f63fbca18b95f --- guix/import/cran.scm | 36 ++++++++++++++++++++++++------------ tests/import/cran.scm | 19 +++++++++++++++++-- 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/guix/import/cran.scm b/guix/import/cran.scm index a3d008b7cfc..b24a732af97 100644 --- a/guix/import/cran.scm +++ b/guix/import/cran.scm @@ -59,6 +59,7 @@ %bioconductor-version download fetch-description + extract-imports cran->guix-package bioconductor->guix-package @@ -573,9 +574,27 @@ referenced in build system files." ;; Or perhaps... "|" ;; ...direct namespace access. - " *([A-Za-z0-9]+):::?" + " *([A-Za-z0-9._]+):::?" ")"))) +(define* (extract-imports line + #:key (initial-set (set)) (ignored-names (list))) + "Return a set of strings corresponding to R libraries that are directly +referenced by namespace on LINE." + (fold (lambda (match acc) + (let ((imported (or (match:substring match 4) + (match:substring match 5)))) + (if (or (not imported) + ;; Likely inside a string. + (odd? (string-count (match:prefix match) #\")) + ;; Part of a bigger expression. + (string-suffix? ":" (match:prefix match)) + (member imported ignored-names)) + acc + (set-insert imported acc)))) + initial-set + (list-matches import-pattern line))) + (define (needed-test-inputs-in-directory dir) "Return a set of R package names that are found in library import statements in files in the directory DIR." @@ -598,17 +617,10 @@ statements in files in the directory DIR." (cond ((eof-object? line) packages) (else - (loop - (fold (lambda (match acc) - (let ((imported (or (match:substring match 4) - (match:substring match 5)))) - (if (or (not imported) - (string=? imported package-directory-name) - (member imported default-r-packages)) - acc - (set-insert imported acc)))) - packages - (list-matches import-pattern line)))))))))) + (loop (extract-imports line + #:initial-set packages + #:ignored-names (cons package-directory-name + default-r-packages)))))))))) (set) (append-map (lambda (directory) (find-files directory "\\.(R|Rmd)")) diff --git a/tests/import/cran.scm b/tests/import/cran.scm index 5c820b1ab32..6b1bcc297dd 100644 --- a/tests/import/cran.scm +++ b/tests/import/cran.scm @@ -1,5 +1,5 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2015 Ricardo Wurmus +;;; Copyright © 2015, 2025 Ricardo Wurmus ;;; Copyright © 2017 Mathieu Othacehe ;;; ;;; This file is part of GNU Guix. @@ -20,6 +20,7 @@ (define-module (test-cran) #:use-module (gnu packages statistics) #:use-module (guix import cran) + #:use-module (guix sets) #:use-module (guix tests) #:use-module (srfi srfi-1) #:use-module (srfi srfi-64) @@ -88,7 +89,21 @@ Date/Publication: 2015-07-14 14:15:16 '() ((@@ (guix import cran) listify) simple-alist "BadList")) -(test-equal "r-mininal is not a cran package" +(test-equal "extract-imports: finds data.table" + (list "data.table") + (set->list ((@ (guix import cran) extract-imports) "abc + data.table::some_procedure()"))) + +(test-equal "extract-imports: ignores text inside strings" + (list) + (set->list ((@ (guix import cran) extract-imports) + "\"hello::world\", \"this is not data.table::some_procedure(), actually\""))) + +(test-equal "extract-imports: ignores other colon separated things" + (list) + (set->list ((@ (guix import cran) extract-imports) + "this:is:not::a:procedure"))) + +(test-equal "r-minimal is not a cran package" #f ((@@ (guix import cran) cran-package?) r-minimal))