From 66b5366b7b377f9e595ee9b97261b65a0064aa5b Mon Sep 17 00:00:00 2001 From: Kévin Le Gouguec Date: Wed, 23 Feb 2022 10:26:02 +0100 Subject: Add script to update feeds --- build-feed.py | 186 ---------------------------------------------------------- 1 file changed, 186 deletions(-) delete mode 100755 build-feed.py (limited to 'build-feed.py') diff --git a/build-feed.py b/build-feed.py deleted file mode 100755 index 5fe5c04..0000000 --- a/build-feed.py +++ /dev/null @@ -1,186 +0,0 @@ -#!/usr/bin/env python3 - -from datetime import datetime -import json -import re -from sys import argv -from urllib.parse import urljoin - -from lxml.builder import E -from lxml.etree import CDATA, XML, indent, tostring - -from helpers import ( - DATE_FORMATTERS, - guess_language, - read_concerts, - tmplocale, - touchup_plaintext, -) - - -# TODO: handle timezones correctly. -# Places to disambiguate: -# -# - concerts.in: -# either add the zone explicitly, or deduce it from the place, -# assuming all times in concerts.in are local times. -# -# - concerts-pubdates.json: -# just add the zone explicitly. -# -# Until then, assume all these "naive times" describe the same timezone -# (CET/CEST). - - -TIMEZONE = datetime.now().astimezone().tzinfo -NOW = datetime.now(tz=TIMEZONE) -DATE_FORMAT = '%-d %b %Y %H:%M %z' - -# TODO: add item pubDate - - -LOCALIZED_TEXT = { - 'en': { - 'title': 'Bellefeuille Quartet', - 'indexpath': 'en/', - 'description': 'News from the Bellefeuille quartet', - }, - 'fr': { - 'title': 'Quatuor Bellefeuille', - 'indexpath': '/', - 'description': 'Des nouvelles du quatuor Bellefeuille', - }, -} - -LOCALIZED_FORMATS = { - 'en': { - 'title': lambda c: f'{c.time.strftime("%B %-d %Y")} in {c.place}', - }, - 'fr': { - 'title': lambda c: f'{c.time.strftime("%-d %B %Y")} à {c.place}', - }, -} - - -def join(sequence, joiner_factory): - # There's got to be a standard itertools/functools thingy to do that… - result = [] - - for i, item in enumerate(sequence, start=1): - result.append(item) - - if i == len(sequence): - return result - - result.append(joiner_factory()) - - -CDATA_INDENT = 8*' ' - - -def cdata_concert(concert, lang): - formatters = DATE_FORMATTERS[lang] - - blocks = [] - - if concert.warning is not None: - blocks.append(E.p(concert.warning)) - - with tmplocale(lang): - blocks.extend(( - E.p(formatters['date'](concert.time)), - E.p(formatters['time'](concert.time)), - )) - - blocks.extend(( - E.p(*join(concert.address.splitlines(), E.br)), - E.ol( - *(XML(f'
  • {touchup_plaintext(p)}
  • ') - for p in concert.pieces.splitlines()) - ), - *(E.p(line) for line in concert.instructions.splitlines()), - )) - - for b in blocks: - indent(b) - - html_blocks = (tostring(b, encoding='utf-8').decode() for b in blocks) - - cdata = '\n' + '\n'.join(html_blocks) + '\n' - cdata = re.sub('^', CDATA_INDENT, cdata, flags=re.MULTILINE) - - return CDATA(cdata) - - -def generate_concert(concert, concerts_url, pubdates, lang): - formatters = LOCALIZED_FORMATS[lang] - - with tmplocale(lang): - title = formatters['title'](concert) - - anchor = f'concert-{concert.time.strftime("%F")}' - - item = E.item( - E.title(title), - E.link(f'{concerts_url}#{anchor}'), - E.description(cdata_concert(concert, lang)), - ) - - pubdate_str = pubdates[concert.time.isoformat(timespec='minutes')] - - if pubdate_str is not None: - pubdate = datetime.fromisoformat(pubdate_str).replace(tzinfo=TIMEZONE) - item.append(E.pubDate(pubdate.strftime(DATE_FORMAT))) - - return item - - -def generate_concerts(concerts_src, concerts_url, concerts_pubdates, lang): - with open(concerts_pubdates) as pubdates_file: - pubdates = json.load(pubdates_file) - - return tuple( - generate_concert(c, concerts_url, pubdates, lang) - for c in read_concerts(concerts_src) - ) - - -def main(concerts_src, feed_dst, concerts_pubdates, domain): - lang = guess_language(concerts_src) - text = LOCALIZED_TEXT[lang] - - url = f'https://{domain}' - index_url = urljoin(url, text['indexpath']) - concerts_url = urljoin(index_url, 'concerts.html') - - now_formatted = NOW.strftime(DATE_FORMAT) - - concerts = generate_concerts( - concerts_src, concerts_url, concerts_pubdates, lang - ) - - rss = E.rss( - E.channel( - E.title(text['title']), - E.link(index_url), - E.description(text['description']), - E.image( - E.url(urljoin(url, 'images/logo.svg')), - E.link(concerts_url), - ), - E.lastBuildDate(now_formatted), - E.pubDate(now_formatted), - E.language(lang), - *concerts, - ), - version='2.0', - ) - - indent(rss) - - with open(feed_dst, 'wb') as feed: - feed.write(tostring(rss, encoding='utf-8', xml_declaration=True)) - - -if __name__ == '__main__': - main(*argv[1:]) -- cgit v1.2.3