diff options
Diffstat (limited to 'admin/stats/dump.py')
| -rwxr-xr-x | admin/stats/dump.py | 100 |
1 files changed, 79 insertions, 21 deletions
diff --git a/admin/stats/dump.py b/admin/stats/dump.py index ce7d5b2..d60d226 100755 --- a/admin/stats/dump.py +++ b/admin/stats/dump.py @@ -11,7 +11,7 @@ import re from statistics import mean, median, stdev from sys import argv from urllib.parse import unquote, urlparse -from typing import Dict, List, Set, Tuple +from typing import Dict, List, Tuple import user_agents @@ -195,7 +195,7 @@ def find_referrers(visitors): }) -def daily_visitors(visitors, output_path): +def daily_visitors(visitors, output_dir): days: Dict[datetime, Counter] = defaultdict(Counter) columns = ('mobile', 'tablet', 'pc', 'bot', 'n/a') @@ -205,7 +205,7 @@ def daily_visitors(visitors, output_path): for day in find_days(v.visits): days[day][v.useragent.value] += 1 - with open(output_path, 'w') as f: + with open(Path(output_dir).joinpath('dailyvisitors.csv'), 'w') as f: out = csv.writer(f) out.writerow(('day', 'total', *columns)) print('day', 'total', *columns, sep='\t') @@ -218,8 +218,10 @@ def daily_visitors(visitors, output_path): out.writerow(values) print(*values, sep='\t') + return days -def daily_visits(visitors, output_path): + +def daily_visits(visitors, output_dir): days: Dict[datetime, Counter] = defaultdict(Counter) columns = ('mobile', 'tablet', 'pc', 'bot', 'n/a') @@ -230,7 +232,7 @@ def daily_visits(visitors, output_path): day = datetime_day(visit[0].time) days[day][v.useragent.value] += 1 - with open(output_path, 'w') as f: + with open(Path(output_dir, 'dailyvisits.csv'), 'w') as f: out = csv.writer(f) out.writerow(('day', 'total', *columns)) print('day', 'total', *columns, sep='\t') @@ -243,8 +245,10 @@ def daily_visits(visitors, output_path): out.writerow(values) print(*values, sep='\t') + return days + -def daily_pages_per_visit(visitors, output_path): +def daily_pages_per_visit(visitors, output_dir): days: Dict[datetime, list] = defaultdict(list) columns = ('min', 'max', 'med', 'avg', 'dev') @@ -258,7 +262,7 @@ def daily_pages_per_visit(visitors, output_path): day = datetime_day(visit[0].time) days[day].append(len(visit)) - with open(output_path, 'w') as f: + with open(Path(output_dir, 'dailypagespervisit.csv'), 'w') as f: out = csv.writer(f) out.writerow(('day', *columns)) print('day', *columns, sep='\t') @@ -277,8 +281,10 @@ def daily_pages_per_visit(visitors, output_path): out.writerow(values) print(*values[:4], *(f'{v:.2f}' for v in values[4:]), sep='\t') + return days + -def daily_page_hits(visitors, output_path): +def daily_page_hits(visitors, output_dir): days: Dict[datetime, Counter] = defaultdict(Counter) columns = find_pages(visitors.values()) @@ -293,7 +299,7 @@ def daily_page_hits(visitors, output_path): for access in visit: days[day][access.resource] += 1 - with open(output_path, 'w') as f: + with open(Path(output_dir, 'dailypagehits.csv'), 'w') as f: out = csv.writer(f) out.writerow(('day', *columns)) @@ -307,8 +313,10 @@ def daily_page_hits(visitors, output_path): for page, hits in page_hits.most_common(5): print(hits, page, sep='\t') + return days -def daily_referrers(visitors, output_path): + +def daily_referrers(visitors, output_dir): days: Dict[datetime, Counter] = defaultdict(Counter) columns = find_referrers(visitors.values()) @@ -323,7 +331,7 @@ def daily_referrers(visitors, output_path): continue days[day][simplify_referrer(access.referrer)] += 1 - with open(output_path, 'w') as f: + with open(Path(output_dir, 'dailyreferrers.csv'), 'w') as f: out = csv.writer(f) out.writerow(('day', *columns)) print('day', *columns, sep='\t') @@ -335,25 +343,75 @@ def daily_referrers(visitors, output_path): out.writerow(values) print(*values, sep='\t') + return days + -def daily_stats(visitors, output_dir): +def dump_stats(visitors, output_dir): output_dir = Path(output_dir) - daily_visitors(visitors, output_dir.joinpath('dailyvisitors.csv')) - daily_visits(visitors, output_dir.joinpath('dailyvisits.csv')) - daily_pages_per_visit(visitors, output_dir.joinpath('dailypagespervisit.csv')) - daily_page_hits(visitors, output_dir.joinpath('dailypagehits.csv')) - daily_referrers(visitors, output_dir.joinpath('dailyreferrers.csv')) + daily_visitors(visitors, output_dir) + visits_pday = daily_visits(visitors, output_dir) + pagespervisit_pday = daily_pages_per_visit(visitors, output_dir) + pagehits_pday = daily_page_hits(visitors, output_dir) + referrers_pday = daily_referrers(visitors, output_dir) + + ua_values = tuple(ua.value for ua in UserAgentKind) + + nb_visitors = { + ua.value: sum(1 for v in visitors.values() if v.useragent == ua) + for ua in UserAgentKind + } + nb_visits = { + ua: sum(visits_pday[day][ua] for day in visits_pday) + for ua in ua_values + } + pages_per_visit = tuple( + nb for day in pagespervisit_pday for nb in pagespervisit_pday[day] + ) + hits_per_page = { + page: sum(pagehits_pday[day][page] for day in pagehits_pday) + for page in find_pages(visitors.values()) + } + referrers = { + ref: sum(referrers_pday[day][ref] for day in referrers_pday) + for ref in find_referrers(visitors.values()) + } + + with open(Path(output_dir, 'global.csv'), 'w') as f: + out = csv.writer(f) + out.writerow(('#visitors',)) + out.writerows( + (ua, nb_visitors[ua]) for ua in ua_values + ) + out.writerow(('total', sum(nb_visitors.values()))) + + out.writerow(('#visits',)) + out.writerows( + (ua, nb_visits[ua]) for ua in ua_values + ) + out.writerow(('total', sum(nb_visits.values()))) + + out.writerow(('#pages/visit',)) + out.writerows(( + ('min', min(pages_per_visit)), + ('max', max(pages_per_visit)), + ('med', median(pages_per_visit)), + ('avg', mean(pages_per_visit)), + ('dev', stdev(pages_per_visit)) + )) + out.writerow(('#views/page',)) + lines = reversed(sorted(hits_per_page.items(), key=lambda kv: kv[1])) + out.writerows(lines) -def global_stats(visitors, output_dir): - pass + out.writerow(('#referrers',)) + lines = reversed(sorted(referrers.items(), key=lambda kv: kv[1])) + out.writerows(lines) def main(logs_paths, output_dir): accesses = parse(logs_paths) visitors = sort_visits(accesses) - daily_stats(visitors, output_dir) - global_stats(visitors, output_dir) + dump_stats(visitors, output_dir) if __name__ == '__main__': |
