From cb1183121b2c8e54118cfa0a3ee13ef59eeb1e5d Mon Sep 17 00:00:00 2001 From: pinoaffe Date: Thu, 28 Aug 2025 15:22:40 +0200 Subject: [PATCH] gnu: Add emacs-doc-toc. * gnu/packages/emacs-xyz.scm (emacs-doc-toc): New variable. Change-Id: I157a92ecb1b2df1dba22046cc21d0383bea5f65d Signed-off-by: Liliana Marie Prikler --- gnu/local.mk | 1 + gnu/packages/emacs-xyz.scm | 49 ++++ .../emacs-doc-toc-shell-commands.patch | 252 ++++++++++++++++++ 3 files changed, 302 insertions(+) create mode 100644 gnu/packages/patches/emacs-doc-toc-shell-commands.patch diff --git a/gnu/local.mk b/gnu/local.mk index e604f37d242..b273dae61c1 100644 --- a/gnu/local.mk +++ b/gnu/local.mk @@ -1217,6 +1217,7 @@ dist_patch_DATA = \ %D%/packages/patches/emacs-all-the-icons-remove-duplicate-rs.patch \ %D%/packages/patches/emacs-deferred-fix-number-of-arguments.patch \ %D%/packages/patches/emacs-disable-jit-compilation.patch \ + %D%/packages/patches/emacs-doc-toc-shell-commands.patch \ %D%/packages/patches/emacs-elisp-autofmt-fix-region-send.patch \ %D%/packages/patches/emacs-exec-path.patch \ %D%/packages/patches/emacs-fix-scheme-indent-function.patch \ diff --git a/gnu/packages/emacs-xyz.scm b/gnu/packages/emacs-xyz.scm index 1eb844ee709..343ff79c517 100644 --- a/gnu/packages/emacs-xyz.scm +++ b/gnu/packages/emacs-xyz.scm @@ -233,6 +233,7 @@ #:use-module (gnu packages julia-xyz) #:use-module (gnu packages ncurses) #:use-module (gnu packages networking) + #:use-module (gnu packages ocr) #:use-module (gnu packages python) #:use-module (gnu packages python-xyz) #:use-module (gnu packages python-check) @@ -7752,6 +7753,54 @@ with more precise location control.") (home-page "https://github.com/fuxialexander/org-pdftools/") (license license:gpl3+)))) +(define-public emacs-doc-toc + (package + (name "emacs-doc-toc") + (version "1.02") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/dalanicolai/doc-tools-toc") + (commit "4a179fbacd7bc9efbd6cfcdc8772d42935e6de29"))) + (sha256 + (base32 + "10w0gybhmx9g7qs5kmg7gsz156kndwzzpfhyb7l0p1cghgqii7l4")) + (patches + (search-patches "emacs-doc-toc-shell-commands.patch")))) + (build-system emacs-build-system) + (arguments + (list + #:tests? #f ; no tests + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'patch-exec-paths + (lambda* (#:key inputs #:allow-other-keys) + (emacs-substitute-variables "doc-toc.el" + ("doc-toc--ddjvu-program" (search-input-file inputs "/bin/ddjvu")) + ("doc-toc--djvused-program" (search-input-file inputs "/bin/djvused")) + ("doc-toc--djvutxt-program" (search-input-file inputs "/bin/djvutxt")) + ("doc-toc--mutool-program" (search-input-file inputs "/bin/mutool")) + ("doc-toc--pdftocgen-program" (search-input-file inputs "/bin/pdftocgen")) + ("doc-toc--pdftocio-program" (search-input-file inputs "/bin/pdftocio")) + ("doc-toc--pdftotext-program" (search-input-file inputs "/bin/pdftotext")) + ("doc-toc--pdfxmeta-program" (search-input-file inputs "/bin/pdfxmeta")) + ("doc-toc--tesseract-program" (search-input-file inputs "/bin/tesseract")))))))) + (inputs + (list djvulibre + fntsample ; for pdfoutline + mupdf + pdf-tocgen + poppler + tesseract-ocr)) + (home-page "https://github.com/dalanicolai/doc-tools-toc") + (synopsis "Manage outlines/table of contents of pdf and djvu documents") + (description "This package provides a multistep process to infer or +manually enter an outline for a given pdf or djvu document and to add it to +said document, or to edit a pre-existing outline. Outlines can be created +manually, automatically generated from a textual table of contents, or +generated from typesetting metadata.") + (license license:gpl3+))) + (define-public emacs-sage-shell-mode (let ((commit "4291700e981a2105d55fa56382ba25046d3d268d") (revision "1")) diff --git a/gnu/packages/patches/emacs-doc-toc-shell-commands.patch b/gnu/packages/patches/emacs-doc-toc-shell-commands.patch new file mode 100644 index 00000000000..0c10db7e253 --- /dev/null +++ b/gnu/packages/patches/emacs-doc-toc-shell-commands.patch @@ -0,0 +1,252 @@ +Upstream-status: https://github.com/dalanicolai/doc-tools-toc/pull/4 + +diff --git a/doc-toc.el b/doc-toc.el +index 8b7057c..64eca03 100644 +--- a/doc-toc.el ++++ b/doc-toc.el +@@ -216,6 +216,7 @@ + (defvar pdf-filename) + + (declare-function pdf-cache-get-image "pdf-cache") ++(declare-function pdf-view-active-region-text "pdf-view") + (declare-function pdf-view-goto-page "pdf-view") + (declare-function pdf-view-next-page "pdf-view") + (declare-function pdf-view-previous-page "pdf-view") +@@ -262,39 +263,60 @@ URL`http://handyoutlinerfo.sourceforge.net/'." + String (i.e. surround with double quotes)." + :type 'file) + ++(defvar doc-toc--ddjvu-program (executable-find "ddjvu")) ++(defvar doc-toc--djvused-program (executable-find "djvused")) ++(defvar doc-toc--djvutxt-program (executable-find "djvutxt")) ++(defvar doc-toc--mutool-program (executable-find "mutool")) ++(defvar doc-toc--pdfoutline-program (executable-find "pdfoutline")) ++(defvar doc-toc--pdftocgen-program (executable-find "pdftocgen")) ++(defvar doc-toc--pdftocio-program (executable-find "pdftocio")) ++(defvar doc-toc--pdftotext-program (executable-find "pdftotext")) ++(defvar doc-toc--pdfxmeta-program (executable-find "pdfxmeta")) ++(defvar doc-toc--tesseract-program (executable-find "tesseract")) ++ ++(defun doc-toc--process-to-string (program &rest args) ++ "Return the output of running PROGRAM with ARGS. ++ ++Like `shell-command-to-string', but PROGRAM and ARGS are provided as ++separate strings rather than a single space-separated, escaped string." ++ (with-temp-buffer ++ (apply #'call-process ++ program ++ nil ++ (list (current-buffer) nil) ++ nil ++ args) ++ (buffer-string))) ++ + ;;;; pdf.tocgen + ;;;###autoload + (defun doc-toc-gen-set-level (level) +- "Define the text properties of the heading level. +-In a pdf-view buffer select a single word in the headline of a +-certain level. Then run `doc-toc-gen-set-level' to write the text ++ "define the text properties of the heading level. ++in a pdf-view buffer select a single word in the headline of a ++certain level. then run `doc-toc-gen-set-level' to write the text + properties to the recipe.toml file that is created in the +-document's directory. You will be prompted to enter the LEVEL +-number. The highest level should have number 1, the next level ++document's directory. you will be prompted to enter the level ++number. the highest level should have number 1, the next level + number 2 etc." +- (interactive "nWhich level you are setting (number): ") ++ (interactive "nwhich level you are setting (number): ") + (let* ((page (pdf-view-current-page)) + (filename (url-filename (url-generic-parse-url buffer-file-name))) +- (pdfxmeta-result (shell-command +- (format "pdfxmeta --auto %s --page %s %s \"%s\" >> recipe.toml" +- level +- page +- (shell-quote-argument filename) +- (car (pdf-view-active-region-text)))))) +- ;; (pdfxmeta-result (call-process "pdfxmeta" nil "recipe.toml" nil +- ;; "--auto" (number-to-string level) +- ;; "--page" (number-to-string page) +- ;; (shell-quote-argument filename) +- ;; (concat "\"" (car (pdf-view-active-region-text)) "\"")))) ++ (pdfxmeta-result (call-process doc-toc--pdfxmeta-program nil "recipe.toml" nil ++ "--auto" (number-to-string level) ++ "--page" (number-to-string page) ++ (shell-quote-argument filename) ++ (concat "\"" (car (pdf-view-active-region-text)) "\"")))) + (when (eq pdfxmeta-result 1) +- (let ((page-text (shell-command-to-string +- (format "mutool draw -F text %s %s" +- (shell-quote-argument filename) +- page +- )))) ++ (let ((page-text (doc-toc--process-to-string ++ doc-toc--mutool-program ++ "draw" ++ "-f" ++ "text" ++ filename ++ page))) + (pop-to-buffer "page-text") + (insert +- "COULD NOT SET HEADING LEVEL. MUPDF EXTRACTED FOLLOWING PAGE TEXT FROM PAGE:\n") ++ "could not set heading level. mupdf extracted following page text from page:\n") + (add-text-properties 1 (point) '(face font-lock-warning-face)) + (let ((beg (point))) + (insert "(try to select partial word)\n\n") +@@ -312,8 +334,11 @@ be used after the headline text properties have been defined with + the function `doc-toc-gen-set-level'" + (interactive) + (let ((filename buffer-file-name) +- (toc (shell-command-to-string +- (format "pdftocgen %s < recipe.toml" (shell-quote-argument buffer-file-name))))) ++ (toc (doc-toc--process-to-string ++ doc-toc--pdftocgen-program ++ buffer-file-name ++ "-r" ++ "recipe.toml"))) + (switch-to-buffer "toc") + (doc-toc-pdftocgen-mode) ;; required before setting local variable + (when (fboundp 'flyspell-mode) +@@ -328,7 +353,8 @@ named output.pdf and opened in a new buffer. Don't forget to + rename this new file." + (interactive) + (let* ((output-buf (get-buffer-create "*pdftocio-output*"))) +- (call-process-region (point-min) (point-max) "pdftocio" ++ (call-process-region (point-min) (point-max) ++ doc-toc--pdftocio-program + nil output-buf nil pdf-filename) + (kill-buffer-if-not-modified (find-file pdf-filename)) + (when (file-exists-p (concat (file-name-base pdf-filename) "_out.pdf")) +@@ -483,14 +509,19 @@ ARG (\\[universal-argument]) to enter different separators." + (default-process-coding-system + (cond ((string= ".pdf" ext)'(windows-1252-unix . utf-8-unix)) + ((string= ".djvu" ext) '(utf-8-unix . utf-8-unix)))) +- (shell-command (cond ((string= ".pdf" ext) "pdftotext -f %s -l %s -layout %s -") +- ((string= ".djvu" ext) "djvutxt --page=%s-%s %s") +- (t (error "Buffer-filename does not have pdf or djvu extension")))) +- (text (shell-command-to-string +- (format shell-command +- startpage +- endpage +- (shell-quote-argument buffer-file-name)))) ++ (text (cond ((string= ".pdf" ext) ++ (doc-toc--process-to-string ++ doc-toc--pdftotext-program ++ "-f" startpage ++ "-l" endpage ++ "-layout" buffer-file-name ++ "-")) ++ ((string= ".djvu" ext) ++ (doc-toc--process-to-string ++ doc-toc--djvutxt-program ++ (format "--page=%s-%s" startpage endpage) ++ buffer-file-name)) ++ (t (error "Buffer-filename does not have pdf or djvu extension")))) + (buffer (get-buffer-create (file-name-sans-extension (buffer-name))))) + (switch-to-buffer buffer) + (doc-toc-cleanup-mode) ;; required before setting local variable +@@ -522,7 +553,8 @@ For use in `doc-toc-ocr-languages'." + (let ((print-length nil)) + (message (format "%s" (seq-subseq + (split-string +- (shell-command-to-string "tesseract --list-langs")) ++ (doc-toc--process-to-string doc-toc--tesseract-program ++ "--list-langs")) + 5))))) + + ;;;###autoload +@@ -557,7 +589,8 @@ unprocessed text." + ((string= ".djvu" ext) + ;; new code for djvu3 + (let ((outfile (format "/tmp/pageimagep%s" page))) +- (shell-command (format "ddjvu -page=%s '%s' %s" ++ (shell-command (format "%s -page=%s '%s' %s" ++ doc-toc--ddjvu-program + page + buffer-file-name + outfile)) +@@ -569,8 +602,12 @@ unprocessed text." + ;; (number-to-string page) + ;; (image-property djvu-doc-image :data)))))) + (apply #'call-process +- (append (list "tesseract" nil (list buffer nil) nil file) +- args)) ++ doc-toc--tesseract-program ++ nil ++ (list buffer nil) ++ nil ++ file ++ args) + (setq page (1+ page)))) + (switch-to-buffer buffer) + (doc-toc-cleanup-mode) ;; required before setting local variable +@@ -587,14 +624,20 @@ unprocessed text." + (interactive) + (let* ((source-buffer (current-buffer)) + (ext (url-file-extension (buffer-file-name (current-buffer)))) +- (shell-command (cond ((string= ".pdf" ext) (if (executable-find "mutool") +- "mutool show %s outline" +- "mutool command is not found")) +- ((string= ".djvu" ext) "djvused -e 'print-outline' %s") +- (t (error "Buffer-filename does not have pdf or djvu extension")))) +- (text (shell-command-to-string +- (format shell-command +- (shell-quote-argument buffer-file-name)))) ++ ++ (text (cond ((string= ".pdf" ext) ++ (unless doc-toc--mutool-program ++ (error "Command mutool is not found")) ++ (doc-toc--process-to-string doc-toc--mutool-program ++ "show" ++ buffer-file-name ++ "outline")) ++ ((string= ".djvu" ext) ++ (doc-toc--process-to-string doc-toc--djvused-program ++ "-e" ++ "print-outline" ++ buffer-file-name)) ++ (t (error "Buffer-filename does not have pdf or djvu extension")))) + (buffer (get-buffer-create (concat (file-name-sans-extension (buffer-name)) ".txt")))) + (switch-to-buffer buffer) + (setq-local doc-buffer source-buffer) +@@ -628,7 +671,7 @@ Prompt for startpage and endpage and print OCR output to new buffer." + nil + (number-to-string page) + (image-property djvu-doc-image :data)))))) +- (apply #'call-process "tesseract" nil (list buffer nil) nil ++ (apply #'call-process doc-toc--tesseract-program nil (list buffer nil) nil + file args) + (setq page (1+ page)))) + (switch-to-buffer buffer))))) +@@ -992,7 +1035,7 @@ to `pdfoutline' shell command." + This command uses the shell program `pdfoutline'." + (interactive) + (save-buffer) +- (call-process "pdfoutline" nil "*pdfoutline*" nil ++ (call-process doc-toc--pdfoutline-program nil "*pdfoutline*" nil + (concat (file-name-sans-extension (buffer-name)) ".pdf") + (buffer-name) + (if doc-toc-replace-original-file +@@ -1009,12 +1052,13 @@ This command uses the shell program `djvused'." + (buffer-name) + (shell-quote-argument + (concat (file-name-sans-extension (buffer-name)) ".djvu")))) +- (shell-command-to-string +- (format +- "djvused -s -e \"set-outline '%s'\" %s" +- (buffer-name) +- (shell-quote-argument +- (concat (file-name-sans-extension (buffer-name)) ".djvu"))))) ++ (doc-toc--process-to-string ++ doc-toc--djvused-program ++ "-s" ++ "-e" ++ (format "set-outline '%s'" ++ (buffer-name)) ;; TODO: maybe escape? ++ (concat (file-name-sans-extension (buffer-name)) ".djvu"))) + + + (defun doc-toc--add-to-doc ()