summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKévin Le Gouguec <kevin.legouguec@gmail.com>2020-11-25 19:33:59 +0100
committerKévin Le Gouguec <kevin.legouguec@gmail.com>2020-11-25 19:33:59 +0100
commite1a80a5596dddc9582969e0a95fa8c09882085a9 (patch)
treecd60ce5f2ea2192a89d205e0f5950edb8d6b6486
parente72881f68cc3f2ddfbbd3f51449e0251042473ef (diff)
parent5fbd9a9e13332a8867eef3d2f408df24b19a34ef (diff)
downloadmemory-leaks-e1a80a5596dddc9582969e0a95fa8c09882085a9.tar.xz
Merge branch 'preprocess-org'
-rw-r--r--repo/www/TODO14
-rwxr-xr-xrepo/www/generate-index.py53
-rwxr-xr-xrepo/www/generate-page.py9
-rw-r--r--repo/www/helpers.py88
-rw-r--r--repo/www/preprocess-org.el83
5 files changed, 191 insertions, 56 deletions
diff --git a/repo/www/TODO b/repo/www/TODO
index 7eac4fe..1799529 100644
--- a/repo/www/TODO
+++ b/repo/www/TODO
@@ -1,15 +1,13 @@
-- preprocess Org files
- Org's HTML backend adds a lot of stuff I don't like (intermediate
- divs, unstable section IDs); I'll use the markdown backend, then
- feed that to pandoc
- - change description of custom +LINKs
- - convert properties
- - convert tags
+- org preprocessing:
+ - dump properties
+ - fontify TODO keywords
- compute "leak count" on toplevel index
- get stylin'
- pandoc template
- tufte css? at least sidenotes rather than footnotes
-- use tags somehow (eg to fill in the "keywords" metadata in pandoc template)
+- use tags somehow, eg
+ - fill in the "keywords" metadata in pandoc template
+ - index files/sections by tags
- add author
- add creation & last update dates
- link to history
diff --git a/repo/www/generate-index.py b/repo/www/generate-index.py
index 16d1874..ab5b2a4 100755
--- a/repo/www/generate-index.py
+++ b/repo/www/generate-index.py
@@ -10,7 +10,7 @@ from tempfile import NamedTemporaryFile
from git import Repo
-from helpers import deserialize_directories, generate_crumbs, pandoc
+from helpers import deserialize_directories, generate_crumbs, PandocRunner
def parse_arguments():
@@ -103,41 +103,40 @@ def main(arguments):
if arguments.site_title is not None:
metadata['sitetitle'] = arguments.site_title
- if readme is not None:
- repo_top = Repo(search_parent_directories=True).working_dir
- readme_path = Path(repo_top, target, readme)
-
- # If the README doesn't have a title, give a default to pandoc
- # out-of-band.
- if not has_title(readme_path):
- metadata['pagetitle'] = target or 'README'
+ pandoc = PandocRunner(
+ arguments.output, arguments.template, arguments.filters,
+ stylesheets, variables
+ )
- with NamedTemporaryFile(mode='w+') as toc:
- toc.write(f'<h1>{toc_title}</h1>\n')
+ if readme is None:
+ with NamedTemporaryFile(suffix='.md') as dummy_readme, \
+ NamedTemporaryFile(mode='w+') as toc:
toc.write(html_toc)
toc.flush()
- pandoc(
- readme_path, arguments.output,
- arguments.template, arguments.filters, stylesheets,
- include_after=(toc.name,),
- variables=variables, metadata=metadata
+ metadata['pagetitle'] = toc_title
+ metadata['title'] = 'Index'
+
+ pandoc.run(
+ dummy_readme.name, include_after=(toc.name,), metadata=metadata
)
- return
+ return
- with NamedTemporaryFile(suffix='.md') as dummy_readme, \
- NamedTemporaryFile(mode='w+') as toc:
+ repo_top = Repo(search_parent_directories=True).working_dir
+ readme_path = Path(repo_top, target, readme)
+
+ # If the README doesn't have a title, give a default to pandoc
+ # out-of-band.
+ if not has_title(readme_path):
+ metadata['pagetitle'] = target or 'README'
+
+ with NamedTemporaryFile(mode='w+') as toc:
+ toc.write(f'<h1>{toc_title}</h1>\n')
toc.write(html_toc)
toc.flush()
- metadata['pagetitle'] = toc_title
- metadata['title'] = 'Index'
-
- pandoc(
- dummy_readme.name, arguments.output,
- arguments.template, arguments.filters, stylesheets,
- include_after=(toc.name,),
- variables=variables, metadata=metadata
+ pandoc.run(
+ readme_path, include_after=(toc.name,), metadata=metadata
)
diff --git a/repo/www/generate-page.py b/repo/www/generate-page.py
index cb2317b..bbe1288 100755
--- a/repo/www/generate-page.py
+++ b/repo/www/generate-page.py
@@ -6,7 +6,7 @@ from pathlib import Path
from git import Repo
-from helpers import generate_crumbs, pandoc
+from helpers import generate_crumbs, PandocRunner
def parse_arguments():
@@ -44,13 +44,16 @@ def main(arguments):
page_path = Path(arguments.page).resolve().relative_to(repo_top)
- pandoc(
- arguments.page,
+ pandoc = PandocRunner(
arguments.output,
arguments.template,
arguments.filters,
stylesheets,
variables={'crumbs': generate_crumbs(page_path)},
+ )
+
+ pandoc.run(
+ arguments.page,
metadata={'pagetitle': arguments.title,
'sitetitle': arguments.site_title}
)
diff --git a/repo/www/helpers.py b/repo/www/helpers.py
index 48ebccf..12d9a41 100644
--- a/repo/www/helpers.py
+++ b/repo/www/helpers.py
@@ -2,8 +2,10 @@ from collections import defaultdict
from dataclasses import dataclass, field
from itertools import chain
from os import environ, path
-from subprocess import run
-from typing import Iterator
+from pathlib import Path
+from subprocess import CalledProcessError, run
+from tempfile import NamedTemporaryFile
+from typing import Dict, Iterator, Union
@dataclass
@@ -56,26 +58,76 @@ def deserialize_directories(directories):
}
-def pandoc(page, output, template, filters, stylesheets, include_after=(),
- variables=None, metadata=None):
- cmd = (
- 'pandoc', '-s', page, '-o', output, '--template', template,
- *chain(*(('--lua-filter', f) for f in filters)),
- *chain(*(('--css', s) for s in stylesheets)),
- *chain(*(('--include-after-body', f) for f in include_after))
- )
+class _NullPreprocessor:
+ def __init__(self, source_path):
+ self._source_path = source_path
+
+ def __enter__(self):
+ self.output = self._source_path
+ return self
+
+ def __exit__(self, *args):
+ pass
+
+class _OrgPreprocessor:
+ def __init__(self, source_path):
+ self._source_path = source_path
+
+ def __enter__(self):
+ self._output = NamedTemporaryFile(mode='w+', suffix='.org')
+ try:
+ run((
+ 'emacs', '-Q', '--batch', '--load', 'preprocess-org.el',
+ '--eval', f'(preprocess-org "{self._source_path}")'
+ ), check=True, stdout=self._output)
+ except CalledProcessError:
+ self._output.close()
+ raise
+
+ self.output = self._output.name
+ return self
+
+ def __exit__(self, *args):
+ self._output.close()
+
+_PREPROCESSORS = defaultdict(lambda: _NullPreprocessor,
+ (('org', _OrgPreprocessor),))
+
+
+_PathArg = Union[Path, str, bytes]
+
+@dataclass
+class PandocRunner:
+ output: _PathArg
+ template: _PathArg
+ filters: Iterator[_PathArg]
+ stylesheets: Iterator[_PathArg]
+ variables: Dict[str, str] = field(default_factory=dict)
+
+ def run(self, page, include_after=(), metadata=None):
+ cmd = (
+ 'pandoc', '-s', '-o', self.output, '--template', self.template,
+ *chain(*(('--lua-filter', f) for f in self.filters)),
+ *chain(*(('--css', s) for s in self.stylesheets)),
+ *chain(*(('--include-after-body', f) for f in include_after))
+ )
- if variables is not None:
- cmd += tuple(chain(
- *(('-V', f'{k}={v}') for k, v in variables.items())
- ))
- if metadata is not None:
cmd += tuple(chain(
- *(('-M', f'{k}={v}') for k, v in metadata.items())
+ *(('-V', f'{k}={v}') for k, v in self.variables.items())
))
+ if metadata is not None:
+ cmd += tuple(chain(
+ *(('-M', f'{k}={v}') for k, v in metadata.items())
+ ))
+
+ environ['LUA_PATH'] = '.cache/?.lua;;'
+
+ _, ext = path.splitext(page)
+ preprocessor = _PREPROCESSORS[ext[1:]]
- environ['LUA_PATH'] = '.cache/?.lua;;'
- run(cmd, check=True)
+ with preprocessor(page) as preproc:
+ cmd = cmd + (preproc.output,)
+ run(cmd, check=True)
def generate_crumbs(target):
diff --git a/repo/www/preprocess-org.el b/repo/www/preprocess-org.el
new file mode 100644
index 0000000..fe63962
--- /dev/null
+++ b/repo/www/preprocess-org.el
@@ -0,0 +1,83 @@
+;; -*- lexical-binding: t -*-
+
+;; How I Convert Org Files To HTML.
+;; ================================
+;;
+;; Or: Why We Can't Have Nice Things: Exhibit #42.
+;; -------------------------------------------
+;;
+;; Or: I Got Way Too Much Time On My Hands, Apparently.
+;; ------------------------------------------------
+;;
+;; I see two straightforward ways to export Org files to HTML:
+;;
+;; 1. ox-html.el, Org's HTML backend: even with all the settings and
+;; filters available, there are still a few things that annoy me:
+;; lots of extra <div>s, unstable section IDs…
+;;
+;; Also, I want to squeeze pandoc somewhere in the pipeline, to run
+;; my Lua filters.
+;;
+;; 2. pandoc: does not cover all of Org's features. Org is so crammed
+;; with constructs that don't exist in other markup formats
+;; (agendas, logbooks, spreadsheets, properties…) and so many knobs
+;; can be tweaked on a per-file basis (link abbreviations, tags,
+;; TODO cycles) that Elisp remains the least painful way to process
+;; these files, IMO.
+;;
+;; A less-straightforward, but still reasonably simple way to go would
+;; be to use Org's markdown backend, then run pandoc on the result.
+;; Unfortunately, AFAICT ox-md.el does not implement definition lists,
+;; nor syntax-highlighting in fenced code blocks.
+;;
+;; So here's where I'm at: using Elisp, I'll preprocess Org files to
+;; add a bunch of #+OPTIONS pandoc recognizes, "dumb down" the stuff
+;; pandoc does not recognize, format some other stuff arbitrarily,
+;; *then* I'll run pandoc on the result.
+
+(defun pp-org/list-tags ()
+ (goto-char (point-min))
+ (while (re-search-forward org-heading-regexp nil t)
+ (save-excursion
+ (save-match-data
+ (when-let ((tags (org-get-tags (point))))
+ (insert "\n#+begin_tags\n")
+ (dolist (tag tags)
+ (insert "- " tag "\n"))
+ (insert "#+end_tags\n"))))))
+
+(defun pp-org/expand-links ()
+ ;; Expand #+LINK abbreviations, since pandoc does not grok them.
+ ;; Also, use the abbreviation as default description for links that
+ ;; lack one.
+ (pcase-dolist (`(,key . ,expansion) org-link-abbrev-alist-local)
+ (goto-char (point-min))
+ (let ((link-re (rx "[[" (group (literal key) ":"
+ (group (+ (not "]"))))
+ "]" (? (group "["
+ (group (+ (not "]")))
+ "]"))
+ "]"))
+ (expand-link (if (string-match-p "%s" expansion)
+ (lambda (tag) (format expansion tag))
+ (lambda (tag) (concat expansion tag)))))
+ (while (re-search-forward link-re nil t)
+ (let ((link-beg (match-beginning 0))
+ (link-abbrev (match-string 1))
+ (link-tag (match-string 2))
+ (description (match-string 4)))
+ (replace-match (funcall expand-link link-tag) t t nil 1)
+ (unless description
+ (save-excursion
+ (goto-char (1+ link-beg))
+ (forward-sexp)
+ (insert (format "[%s]" link-abbrev)))))))))
+
+(defun preprocess-org (input)
+ (with-temp-buffer
+ (insert "#+OPTIONS: ^:{} tags:nil H:6\n")
+ (insert-file-contents input)
+ (org-mode)
+ (pp-org/list-tags)
+ (pp-org/expand-links)
+ (princ (buffer-string))))