diff options
Diffstat (limited to 'admin/stats.py')
| -rwxr-xr-x[-rw-r--r--] | admin/stats.py | 70 |
1 files changed, 40 insertions, 30 deletions
diff --git a/admin/stats.py b/admin/stats.py index f69390b..e3d46cc 100644..100755 --- a/admin/stats.py +++ b/admin/stats.py @@ -1,7 +1,10 @@ +#!/usr/bin/env python3 + from collections import Counter, defaultdict from dataclasses import dataclass from datetime import datetime, timedelta import re +from os import remove from subprocess import run from sys import argv @@ -38,9 +41,17 @@ class Access: if resource == '/': resource = '/index.html' + referer = re.sub( + r'https://(?:www\.)?quatuorbellefeuille\.(?:fr|com)(/[^?]*)(?:\?.+)?', + r'\1', + info['referer'] + ) + if referer == '/': + referer = '/index.html' + return cls( info['address'], user_agents.parse(info['useragent']), - info['referer'], datetime.strptime(info['date'], DATE_FMT), + referer, datetime.strptime(info['date'], DATE_FMT), resource ) @@ -96,16 +107,33 @@ def order(grouped_visits): return visits -def normalizeref(referer): - if referer == '-': - return 'inconnu' - referer = re.sub( - r'https://(?:www\.)?quatuorbellefeuille\.(?:fr|com)(/[^?]*)(?:\?.+)?', - r'\1', referer - ) - if referer == '/': - return '/index.html' - return referer +def visit_graph(accesses): + edges = (f' "{a.referer}" -> "{a.resource}";' + for a in accesses) + return '\n'.join((f'digraph visit {{', *edges, '}')) + +def graph(visits): + date = visits[0][0].time.strftime('%F') + + tempfiles = { + f'{date}-{i}.pdf': visit for i, visit in enumerate(visits) + } + + for tempfile, visit in tempfiles.items(): + vgraph = visit_graph(visit) + + with open(tempfile, 'wb') as vfile: + vfile.write( + run(('dot', '-Tpdf'), text=False, check=True, + capture_output=True, input=vgraph.encode()) + .stdout + ) + + run(('qpdf', '--empty', '--pages', *tempfiles, '--', f'{date}.pdf'), + check=True) + + for f in tempfiles: + remove(f) def analyze(logs_path): accesses = parse(logs_path) @@ -119,25 +147,7 @@ def analyze(logs_path): for page, hits in pagehits.most_common(): print(hits, page) - date = accesses[0].time.strftime('%F') - - for i, visit in enumerate(visits_by_time.values()): - edges = (f' "{normalizeref(a.referer)}" -- "{a.resource}";' - for a in visit) - graph = '\n'.join(('graph trip {', - *edges, - '}')) - dot = run( - ('dot', '-Tpdf'), text=False, capture_output=True, check=True, - input=graph.encode() - ) - with open(f'{date}-{i}.pdf', 'wb') as visitgraph: - visitgraph.write(dot.stdout) - - pages = (f'{date}-{i}.pdf' for i in range(len(visits_by_time.values()))) - run(('qpdf', '--empty', '--pages', *pages, '--', f'{date}.pdf'), - text=False, check=True) - + graph(tuple(visits_by_time.values())) if __name__ == '__main__': analyze(argv[1]) |
