From 4aef912f826f8fb773d9f5ba3ff4d1bed2ca337f Mon Sep 17 00:00:00 2001 From: pinoaffe Date: Thu, 28 Aug 2025 15:26:23 +0200 Subject: [PATCH] gnu: Add pdf-tocgen. * gnu/packages/pdf.scm (pdf-tocgen): New variable. Change-Id: Icf78f177e87d35de00d5c4a50224dcc1a7a9273a Signed-off-by: Liliana Marie Prikler --- gnu/packages/pdf.scm | 53 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/gnu/packages/pdf.scm b/gnu/packages/pdf.scm index 64e77c270ae..431b57021e5 100644 --- a/gnu/packages/pdf.scm +++ b/gnu/packages/pdf.scm @@ -101,6 +101,7 @@ #:use-module (gnu packages nss) #:use-module (gnu packages ocaml) #:use-module (gnu packages ocr) + #:use-module (gnu packages package-management) #:use-module (gnu packages pcre) #:use-module (gnu packages perl) #:use-module (gnu packages photo) @@ -1144,6 +1145,58 @@ Open XML Paper Specification}, @acronym{CBZ, Comic Book ZI}P, @acronym{EPUB, Electronic Publication} and @acronym{FB2, Fiction Book 2} (e-books) format.") (license license:agpl3))) +(define-public pdf-tocgen + (package + (name "pdf-tocgen") + (version "1.3.4") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/Krasjet/pdf.tocgen") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0sqqnrw6zw2awcf9g6v66fm9nnrv2xm3l5ql1liq9skilkjkgkx3")))) + (build-system pyproject-build-system) + (arguments + (list + ;; A required file is missing, seemingly causing the test suite to fail, + ;; see https://github.com/Krasjet/pdf.tocgen/issues/41 + #:tests? #f + #:phases #~(modify-phases %standard-phases + (add-after 'unpack 'patch-tests + ;; The test suite wants to run all commands with poetry, + ;; this is superfluous and causes errors, so we disable that. + (lambda _ + (substitute* "Makefile" + (("@poetry run ") "")))) + (replace 'check + (lambda* (#:key tests? #:allow-other-keys) + (when tests? + (invoke "make" "test"))))))) + (native-inputs (list poetry + python-poetry-core + python-mamba + python-pylint)) + (propagated-inputs (list + python-chardet + python-jedi + python-pymupdf + python-toml)) + (home-page "https://krasjet.com/voice/pdf.tocgen/") + (synopsis "Automatically generate table of contents for pdf files") + (description "pdf.tocgen is a set of command-line tools for automatically +extracting and generating the table of contents of a PDF file. It uses +the embedded font attributes and position of headings to deduce the basic +outline of a PDF file. + +It works best for PDF files produced from a TeX document, but it's designed to +work with any software-generated PDF files (i.e. you shouldn't expect it to +work with scanned PDFs). Some examples include troff/groff, Adobe InDesign, +LibreOffice Writer, and probably more.") + (license license:gpl3+))) + (define-public qpdf (package (name "qpdf")