From be0d907de78c4689708573c03105059e04d2419f Mon Sep 17 00:00:00 2001 From: Kévin Le Gouguec Date: Mon, 28 Sep 2020 22:10:56 +0200 Subject: Factor some code out To make it easier to add a pre-processing step for Org files. --- repo/www/generate-index.py | 21 ++++++++++----------- repo/www/generate-page.py | 9 ++++++--- repo/www/helpers.py | 44 +++++++++++++++++++++++++++----------------- 3 files changed, 43 insertions(+), 31 deletions(-) (limited to 'repo/www') diff --git a/repo/www/generate-index.py b/repo/www/generate-index.py index 16d1874..3fe1212 100755 --- a/repo/www/generate-index.py +++ b/repo/www/generate-index.py @@ -10,7 +10,7 @@ from tempfile import NamedTemporaryFile from git import Repo -from helpers import deserialize_directories, generate_crumbs, pandoc +from helpers import deserialize_directories, generate_crumbs, PandocRunner def parse_arguments(): @@ -103,6 +103,11 @@ def main(arguments): if arguments.site_title is not None: metadata['sitetitle'] = arguments.site_title + pandoc = PandocRunner( + arguments.output, arguments.template, arguments.filters, + stylesheets, variables + ) + if readme is not None: repo_top = Repo(search_parent_directories=True).working_dir readme_path = Path(repo_top, target, readme) @@ -117,11 +122,8 @@ def main(arguments): toc.write(html_toc) toc.flush() - pandoc( - readme_path, arguments.output, - arguments.template, arguments.filters, stylesheets, - include_after=(toc.name,), - variables=variables, metadata=metadata + pandoc.run( + readme_path, include_after=(toc.name,), metadata=metadata ) return @@ -133,11 +135,8 @@ def main(arguments): metadata['pagetitle'] = toc_title metadata['title'] = 'Index' - pandoc( - dummy_readme.name, arguments.output, - arguments.template, arguments.filters, stylesheets, - include_after=(toc.name,), - variables=variables, metadata=metadata + pandoc.run( + dummy_readme.name, include_after=(toc.name,), metadata=metadata ) diff --git a/repo/www/generate-page.py b/repo/www/generate-page.py index cb2317b..bbe1288 100755 --- a/repo/www/generate-page.py +++ b/repo/www/generate-page.py @@ -6,7 +6,7 @@ from pathlib import Path from git import Repo -from helpers import generate_crumbs, pandoc +from helpers import generate_crumbs, PandocRunner def parse_arguments(): @@ -44,13 +44,16 @@ def main(arguments): page_path = Path(arguments.page).resolve().relative_to(repo_top) - pandoc( - arguments.page, + pandoc = PandocRunner( arguments.output, arguments.template, arguments.filters, stylesheets, variables={'crumbs': generate_crumbs(page_path)}, + ) + + pandoc.run( + arguments.page, metadata={'pagetitle': arguments.title, 'sitetitle': arguments.site_title} ) diff --git a/repo/www/helpers.py b/repo/www/helpers.py index 48ebccf..dbab622 100644 --- a/repo/www/helpers.py +++ b/repo/www/helpers.py @@ -2,8 +2,9 @@ from collections import defaultdict from dataclasses import dataclass, field from itertools import chain from os import environ, path +from pathlib import Path from subprocess import run -from typing import Iterator +from typing import Dict, Iterator, Union @dataclass @@ -56,26 +57,35 @@ def deserialize_directories(directories): } -def pandoc(page, output, template, filters, stylesheets, include_after=(), - variables=None, metadata=None): - cmd = ( - 'pandoc', '-s', page, '-o', output, '--template', template, - *chain(*(('--lua-filter', f) for f in filters)), - *chain(*(('--css', s) for s in stylesheets)), - *chain(*(('--include-after-body', f) for f in include_after)) - ) +_PathArg = Union[Path, str, bytes] + +@dataclass +class PandocRunner: + output: _PathArg + template: _PathArg + filters: Iterator[_PathArg] + stylesheets: Iterator[_PathArg] + variables: Dict[str, str] = field(default_factory=dict) + + def run(self, page, include_after=(), metadata=None): + cmd = ( + 'pandoc', '-s', page, '-o', self.output, + '--template', self.template, + *chain(*(('--lua-filter', f) for f in self.filters)), + *chain(*(('--css', s) for s in self.stylesheets)), + *chain(*(('--include-after-body', f) for f in include_after)) + ) - if variables is not None: - cmd += tuple(chain( - *(('-V', f'{k}={v}') for k, v in variables.items()) - )) - if metadata is not None: cmd += tuple(chain( - *(('-M', f'{k}={v}') for k, v in metadata.items()) + *(('-V', f'{k}={v}') for k, v in self.variables.items()) )) + if metadata is not None: + cmd += tuple(chain( + *(('-M', f'{k}={v}') for k, v in metadata.items()) + )) - environ['LUA_PATH'] = '.cache/?.lua;;' - run(cmd, check=True) + environ['LUA_PATH'] = '.cache/?.lua;;' + run(cmd, check=True) def generate_crumbs(target): -- cgit v1.2.3 From 2e7ffb635601d35d331ef92d72bf589bd054b6e6 Mon Sep 17 00:00:00 2001 From: Kévin Le Gouguec Date: Mon, 28 Sep 2020 22:19:23 +0200 Subject: Move branches around The no-readme case is more straightforward, thus more digestible as an extra-indented special-case. --- repo/www/generate-index.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'repo/www') diff --git a/repo/www/generate-index.py b/repo/www/generate-index.py index 3fe1212..ab5b2a4 100755 --- a/repo/www/generate-index.py +++ b/repo/www/generate-index.py @@ -108,35 +108,35 @@ def main(arguments): stylesheets, variables ) - if readme is not None: - repo_top = Repo(search_parent_directories=True).working_dir - readme_path = Path(repo_top, target, readme) - - # If the README doesn't have a title, give a default to pandoc - # out-of-band. - if not has_title(readme_path): - metadata['pagetitle'] = target or 'README' - - with NamedTemporaryFile(mode='w+') as toc: - toc.write(f'

{toc_title}

\n') + if readme is None: + with NamedTemporaryFile(suffix='.md') as dummy_readme, \ + NamedTemporaryFile(mode='w+') as toc: toc.write(html_toc) toc.flush() + metadata['pagetitle'] = toc_title + metadata['title'] = 'Index' + pandoc.run( - readme_path, include_after=(toc.name,), metadata=metadata + dummy_readme.name, include_after=(toc.name,), metadata=metadata ) - return + return - with NamedTemporaryFile(suffix='.md') as dummy_readme, \ - NamedTemporaryFile(mode='w+') as toc: + repo_top = Repo(search_parent_directories=True).working_dir + readme_path = Path(repo_top, target, readme) + + # If the README doesn't have a title, give a default to pandoc + # out-of-band. + if not has_title(readme_path): + metadata['pagetitle'] = target or 'README' + + with NamedTemporaryFile(mode='w+') as toc: + toc.write(f'

{toc_title}

\n') toc.write(html_toc) toc.flush() - metadata['pagetitle'] = toc_title - metadata['title'] = 'Index' - pandoc.run( - dummy_readme.name, include_after=(toc.name,), metadata=metadata + readme_path, include_after=(toc.name,), metadata=metadata ) -- cgit v1.2.3 From 3e230d40ab1255aec292df17d7b127b681a55710 Mon Sep 17 00:00:00 2001 From: Kévin Le Gouguec Date: Thu, 1 Oct 2020 22:40:43 +0200 Subject: DAMMIT ox-md does not syntax-highlight source blocks, and trips over definition lists. --- repo/www/helpers.py | 50 ++++++++++++++++++++++++++++++++++++++++++---- repo/www/preprocess-org.el | 20 +++++++++++++++++++ 2 files changed, 66 insertions(+), 4 deletions(-) create mode 100644 repo/www/preprocess-org.el (limited to 'repo/www') diff --git a/repo/www/helpers.py b/repo/www/helpers.py index dbab622..34f274f 100644 --- a/repo/www/helpers.py +++ b/repo/www/helpers.py @@ -3,7 +3,8 @@ from dataclasses import dataclass, field from itertools import chain from os import environ, path from pathlib import Path -from subprocess import run +from subprocess import CalledProcessError, run +from tempfile import NamedTemporaryFile from typing import Dict, Iterator, Union @@ -57,6 +58,42 @@ def deserialize_directories(directories): } +class _NullPreprocessor: + def __init__(self, source_path): + self._source_path = source_path + + def __enter__(self): + self.output = self._source_path + return self + + def __exit__(self, *args): + pass + +class _OrgPreprocessor: + def __init__(self, source_path): + self._source_path = source_path + + def __enter__(self): + self._output = NamedTemporaryFile(mode='w+', suffix='.md') + try: + run(( + 'emacs', '-Q', '--batch', '--load', 'preprocess-org.el', + '--eval', f'(preprocess-org "{self._source_path}")' + ), check=True, stdout=self._output) + except CalledProcessError: + self._output.close() + raise + + self.output = self._output.name + return self + + def __exit__(self, *args): + self._output.close() + +_PREPROCESSORS = defaultdict(lambda: _NullPreprocessor, + (('org', _OrgPreprocessor),)) + + _PathArg = Union[Path, str, bytes] @dataclass @@ -69,8 +106,7 @@ class PandocRunner: def run(self, page, include_after=(), metadata=None): cmd = ( - 'pandoc', '-s', page, '-o', self.output, - '--template', self.template, + 'pandoc', '-s', '-o', self.output, '--template', self.template, *chain(*(('--lua-filter', f) for f in self.filters)), *chain(*(('--css', s) for s in self.stylesheets)), *chain(*(('--include-after-body', f) for f in include_after)) @@ -85,7 +121,13 @@ class PandocRunner: )) environ['LUA_PATH'] = '.cache/?.lua;;' - run(cmd, check=True) + + _, ext = path.splitext(page) + preprocessor = _PREPROCESSORS[ext[1:]] + + with preprocessor(page) as preproc: + cmd = cmd + (preproc.output,) + run(cmd, check=True) def generate_crumbs(target): diff --git a/repo/www/preprocess-org.el b/repo/www/preprocess-org.el new file mode 100644 index 0000000..f7be936 --- /dev/null +++ b/repo/www/preprocess-org.el @@ -0,0 +1,20 @@ +(defun preprocess-org (input) + (with-temp-buffer + (insert-file-contents input) + (org-mode) + (while (re-search-forward org-heading-regexp nil t) + (save-excursion + (save-match-data + (when-let ((tags (org-get-tags (point)))) + (insert "\n#+begin_tags\n") + (dolist (tag tags) + (insert "- " tag "\n")) + (insert "#+end_tags\n"))))) + (let ((org-export-with-properties t) + (org-export-with-section-numbers nil) + (org-export-with-sub-superscripts '{}) + (org-export-with-tags nil) + (org-export-with-title nil) + (org-export-with-toc nil)) + (org-md-export-as-markdown)) + (princ (buffer-string)))) -- cgit v1.2.3 From 348ac65f367ec3b0ce4a517a281810e5c82bd135 Mon Sep 17 00:00:00 2001 From: Kévin Le Gouguec Date: Tue, 6 Oct 2020 10:30:42 +0200 Subject: Bang on Org export some more --- repo/www/helpers.py | 2 +- repo/www/preprocess-org.el | 48 ++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 41 insertions(+), 9 deletions(-) (limited to 'repo/www') diff --git a/repo/www/helpers.py b/repo/www/helpers.py index 34f274f..12d9a41 100644 --- a/repo/www/helpers.py +++ b/repo/www/helpers.py @@ -74,7 +74,7 @@ class _OrgPreprocessor: self._source_path = source_path def __enter__(self): - self._output = NamedTemporaryFile(mode='w+', suffix='.md') + self._output = NamedTemporaryFile(mode='w+', suffix='.org') try: run(( 'emacs', '-Q', '--batch', '--load', 'preprocess-org.el', diff --git a/repo/www/preprocess-org.el b/repo/www/preprocess-org.el index f7be936..bad9f90 100644 --- a/repo/www/preprocess-org.el +++ b/repo/www/preprocess-org.el @@ -1,5 +1,41 @@ +;; How I Convert Org Files To HTML. +;; ================================ +;; +;; Or: Why We Can't Have Nice Things: Exhibit #42. +;; ------------------------------------------- +;; +;; Or: I Got Way Too Much Time On My Hands, Apparently. +;; ------------------------------------------------ +;; +;; I see two straightforward ways to export Org files to HTML: +;; +;; 1. ox-html.el, Org's HTML backend: even with all the settings and +;; filters available, there are still a few things that annoy me: +;; lots of extra
s, unstable section IDs… +;; +;; Also, I want to squeeze pandoc somewhere in the pipeline, to run +;; my Lua filters. +;; +;; 2. pandoc: does not cover all of Org's features. Org is so crammed +;; with constructs that don't exist in other markup formats +;; (agendas, logbooks, spreadsheets, properties…) and so many knobs +;; can be tweaked on a per-file basis (link abbreviations, tags, +;; TODO cycles) that Elisp remains the least painful way to process +;; these files, IMO. +;; +;; A less-straightforward, but still reasonably simple way to go would +;; be to use Org's markdown backend, then run pandoc on the result. +;; Unfortunately, AFAICT ox-md.el does not implement definition lists, +;; nor syntax-highlighting in fenced code blocks. +;; +;; So here's where I'm at: using Elisp, I'll preprocess Org files to +;; add a bunch of #+OPTIONS pandoc recognizes, "dumb down" the stuff +;; pandoc does not recognize, format some other stuff arbitrarily, +;; *then* I'll run pandoc on the result. + (defun preprocess-org (input) (with-temp-buffer + (insert "#+OPTIONS: ^:{} tags:nil\n") (insert-file-contents input) (org-mode) (while (re-search-forward org-heading-regexp nil t) @@ -10,11 +46,7 @@ (dolist (tag tags) (insert "- " tag "\n")) (insert "#+end_tags\n"))))) - (let ((org-export-with-properties t) - (org-export-with-section-numbers nil) - (org-export-with-sub-superscripts '{}) - (org-export-with-tags nil) - (org-export-with-title nil) - (org-export-with-toc nil)) - (org-md-export-as-markdown)) - (princ (buffer-string)))) + ;; TODO: dump properties + ;; TODO: fontify TODO keywords + ;; TODO: expand #+LINK abbreviations + (princ (buffer-string)))) -- cgit v1.2.3 From 9dceda02d10adcabec2504da8e39cefa2e5b0336 Mon Sep 17 00:00:00 2001 From: Kévin Le Gouguec Date: Tue, 6 Oct 2020 11:00:24 +0200 Subject: Handle link abbreviations --- repo/www/preprocess-org.el | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'repo/www') diff --git a/repo/www/preprocess-org.el b/repo/www/preprocess-org.el index bad9f90..fda4476 100644 --- a/repo/www/preprocess-org.el +++ b/repo/www/preprocess-org.el @@ -1,3 +1,5 @@ +;; -*- lexical-binding: t -*- + ;; How I Convert Org Files To HTML. ;; ================================ ;; @@ -48,5 +50,14 @@ (insert "#+end_tags\n"))))) ;; TODO: dump properties ;; TODO: fontify TODO keywords - ;; TODO: expand #+LINK abbreviations + (pcase-dolist (`(,key . ,expansion) org-link-abbrev-alist-local) + (goto-char (point-min)) + (let ((link-re (rx "[[" (group (literal key) ":" + (group (+ (not "]")))))) + (replacement (if (string-match-p "%s" expansion) + (lambda (tag) (format expansion tag)) + (lambda (tag) (concat expansion tag))))) + (while (re-search-forward link-re nil t) + (let ((full-link (funcall replacement (match-string 2)))) + (replace-match full-link t t nil 1))))) (princ (buffer-string)))) -- cgit v1.2.3 From 64141744239182e60e2655e2ed24664461397e26 Mon Sep 17 00:00:00 2001 From: Kévin Le Gouguec Date: Tue, 6 Oct 2020 11:54:26 +0200 Subject: Split preprocessor into smaller functions --- repo/www/preprocess-org.el | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) (limited to 'repo/www') diff --git a/repo/www/preprocess-org.el b/repo/www/preprocess-org.el index fda4476..4963326 100644 --- a/repo/www/preprocess-org.el +++ b/repo/www/preprocess-org.el @@ -35,29 +35,36 @@ ;; pandoc does not recognize, format some other stuff arbitrarily, ;; *then* I'll run pandoc on the result. +(defun pp-org/list-tags () + (goto-char (point-min)) + (while (re-search-forward org-heading-regexp nil t) + (save-excursion + (save-match-data + (when-let ((tags (org-get-tags (point)))) + (insert "\n#+begin_tags\n") + (dolist (tag tags) + (insert "- " tag "\n")) + (insert "#+end_tags\n")))))) + +(defun pp-org/expand-links () + (pcase-dolist (`(,key . ,expansion) org-link-abbrev-alist-local) + (goto-char (point-min)) + (let ((link-re (rx "[[" (group (literal key) ":" + (group (+ (not "]")))))) + (replacement (if (string-match-p "%s" expansion) + (lambda (tag) (format expansion tag)) + (lambda (tag) (concat expansion tag))))) + (while (re-search-forward link-re nil t) + (let ((full-link (funcall replacement (match-string 2)))) + (replace-match full-link t t nil 1)))))) + (defun preprocess-org (input) (with-temp-buffer (insert "#+OPTIONS: ^:{} tags:nil\n") (insert-file-contents input) (org-mode) - (while (re-search-forward org-heading-regexp nil t) - (save-excursion - (save-match-data - (when-let ((tags (org-get-tags (point)))) - (insert "\n#+begin_tags\n") - (dolist (tag tags) - (insert "- " tag "\n")) - (insert "#+end_tags\n"))))) ;; TODO: dump properties ;; TODO: fontify TODO keywords - (pcase-dolist (`(,key . ,expansion) org-link-abbrev-alist-local) - (goto-char (point-min)) - (let ((link-re (rx "[[" (group (literal key) ":" - (group (+ (not "]")))))) - (replacement (if (string-match-p "%s" expansion) - (lambda (tag) (format expansion tag)) - (lambda (tag) (concat expansion tag))))) - (while (re-search-forward link-re nil t) - (let ((full-link (funcall replacement (match-string 2)))) - (replace-match full-link t t nil 1))))) + (pp-org/list-tags) + (pp-org/expand-links) (princ (buffer-string)))) -- cgit v1.2.3 From f36996c463a814bb8ddff0a0462f5638adbd1d6a Mon Sep 17 00:00:00 2001 From: Kévin Le Gouguec Date: Tue, 6 Oct 2020 11:49:15 +0200 Subject: Add default description for abbreviated links It's something I'd like Org to do out-of-the-box someday[1]; until then, this kludge will do. [1] https://orgmode.org/list/87mu3ze52c.fsf@gmail.com/t/#u --- repo/www/TODO | 14 ++++++-------- repo/www/preprocess-org.el | 25 +++++++++++++++++++------ 2 files changed, 25 insertions(+), 14 deletions(-) (limited to 'repo/www') diff --git a/repo/www/TODO b/repo/www/TODO index 7eac4fe..1799529 100644 --- a/repo/www/TODO +++ b/repo/www/TODO @@ -1,15 +1,13 @@ -- preprocess Org files - Org's HTML backend adds a lot of stuff I don't like (intermediate - divs, unstable section IDs); I'll use the markdown backend, then - feed that to pandoc - - change description of custom +LINKs - - convert properties - - convert tags +- org preprocessing: + - dump properties + - fontify TODO keywords - compute "leak count" on toplevel index - get stylin' - pandoc template - tufte css? at least sidenotes rather than footnotes -- use tags somehow (eg to fill in the "keywords" metadata in pandoc template) +- use tags somehow, eg + - fill in the "keywords" metadata in pandoc template + - index files/sections by tags - add author - add creation & last update dates - link to history diff --git a/repo/www/preprocess-org.el b/repo/www/preprocess-org.el index 4963326..01c5e0c 100644 --- a/repo/www/preprocess-org.el +++ b/repo/www/preprocess-org.el @@ -47,24 +47,37 @@ (insert "#+end_tags\n")))))) (defun pp-org/expand-links () + ;; Expand #+LINK abbreviations, since pandoc does not grok them. + ;; Also, use the abbreviation as default description for links that + ;; lack one. (pcase-dolist (`(,key . ,expansion) org-link-abbrev-alist-local) (goto-char (point-min)) (let ((link-re (rx "[[" (group (literal key) ":" - (group (+ (not "]")))))) - (replacement (if (string-match-p "%s" expansion) + (group (+ (not "]")))) + "]" (? (group "[" + (group (+ (not "]"))) + "]")) + "]")) + (expand-link (if (string-match-p "%s" expansion) (lambda (tag) (format expansion tag)) (lambda (tag) (concat expansion tag))))) (while (re-search-forward link-re nil t) - (let ((full-link (funcall replacement (match-string 2)))) - (replace-match full-link t t nil 1)))))) + (let ((link-beg (match-beginning 0)) + (link-abbrev (match-string 1)) + (link-tag (match-string 2)) + (description (match-string 4))) + (replace-match (funcall expand-link link-tag) t t nil 1) + (unless description + (save-excursion + (goto-char (1+ link-beg)) + (forward-sexp) + (insert (format "[%s]" link-abbrev))))))))) (defun preprocess-org (input) (with-temp-buffer (insert "#+OPTIONS: ^:{} tags:nil\n") (insert-file-contents input) (org-mode) - ;; TODO: dump properties - ;; TODO: fontify TODO keywords (pp-org/list-tags) (pp-org/expand-links) (princ (buffer-string)))) -- cgit v1.2.3 From 5fbd9a9e13332a8867eef3d2f408df24b19a34ef Mon Sep 17 00:00:00 2001 From: Kévin Le Gouguec Date: Tue, 6 Oct 2020 12:06:30 +0200 Subject: Use all HTML heading levels available --- repo/www/preprocess-org.el | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'repo/www') diff --git a/repo/www/preprocess-org.el b/repo/www/preprocess-org.el index 01c5e0c..fe63962 100644 --- a/repo/www/preprocess-org.el +++ b/repo/www/preprocess-org.el @@ -75,7 +75,7 @@ (defun preprocess-org (input) (with-temp-buffer - (insert "#+OPTIONS: ^:{} tags:nil\n") + (insert "#+OPTIONS: ^:{} tags:nil H:6\n") (insert-file-contents input) (org-mode) (pp-org/list-tags) -- cgit v1.2.3