diff options
Diffstat (limited to 'admin/stats/dump.py')
| -rwxr-xr-x | admin/stats/dump.py | 46 |
1 files changed, 43 insertions, 3 deletions
diff --git a/admin/stats/dump.py b/admin/stats/dump.py index d0bdda3..ac412c9 100755 --- a/admin/stats/dump.py +++ b/admin/stats/dump.py @@ -10,7 +10,7 @@ from pathlib import Path import re from statistics import mean, median, stdev from sys import argv -from urllib.parse import urlparse +from urllib.parse import unquote, urlparse from typing import Dict, List, Tuple import user_agents @@ -41,7 +41,7 @@ DOMAINS = { def normalize_path(p): if p == '/': return '/index.html' - return p + return unquote(p) @dataclass @@ -165,6 +165,16 @@ def find_days(visits): } +def find_pages(visitors): + return sorted({ + access.resource + for v in visitors + for visit in v.visits + for access in visit + if v.useragent.is_human() + }) + + def daily_visitors(visitors, output_path): days: Dict[datetime, Counter] = defaultdict(Counter) columns = ('mobile', 'tablet', 'pc', 'bot', 'n/a') @@ -248,12 +258,42 @@ def daily_pages_per_visit(visitors, output_path): print(*values[:4], *(f'{v:.2f}' for v in values[4:]), sep='\t') +def daily_page_hits(visitors, output_path): + days: Dict[datetime, Counter] = defaultdict(Counter) + columns = find_pages(visitors.values()) + + print('Page hits:') + + for v in visitors.values(): + if not v.useragent.is_human(): + continue + + for visit in v.visits: + day = datetime_day(visit[0].time) + for access in visit: + days[day][access.resource] += 1 + + with open(output_path, 'w') as f: + out = csv.writer(f) + out.writerow(('day', *columns)) + + for day in sorted(days): + page_hits = days[day] + values = (day.strftime('%F'), + *(page_hits[page] for page in columns)) + out.writerow(values) + + print(day.strftime('%F')) + for page, hits in page_hits.most_common(5): + print(hits, page, sep='\t') + + def daily_stats(visitors, output_dir): output_dir = Path(output_dir) daily_visitors(visitors, output_dir.joinpath('dailyvisitors.csv')) daily_visits(visitors, output_dir.joinpath('dailyvisits.csv')) daily_pages_per_visit(visitors, output_dir.joinpath('dailypagespervisit.csv')) - # daily_page_hits(visitors, output_dir.joinpath('dailypagehits.csv')) + daily_page_hits(visitors, output_dir.joinpath('dailypagehits.csv')) # daily_referrers(visitors, output_dir.joinpath('dailyreferrers.csv')) |
