summaryrefslogtreecommitdiff
path: root/admin/stats
diff options
context:
space:
mode:
Diffstat (limited to 'admin/stats')
-rwxr-xr-xadmin/stats/convertcsv.sh6
-rwxr-xr-xadmin/stats/dump.py100
2 files changed, 85 insertions, 21 deletions
diff --git a/admin/stats/convertcsv.sh b/admin/stats/convertcsv.sh
new file mode 100755
index 0000000..e86fc60
--- /dev/null
+++ b/admin/stats/convertcsv.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+for f in "$@"
+do
+ libreoffice --infilter="CSV:44,34,UTF-8" --convert-to xlsx "$f"
+done
diff --git a/admin/stats/dump.py b/admin/stats/dump.py
index ce7d5b2..d60d226 100755
--- a/admin/stats/dump.py
+++ b/admin/stats/dump.py
@@ -11,7 +11,7 @@ import re
from statistics import mean, median, stdev
from sys import argv
from urllib.parse import unquote, urlparse
-from typing import Dict, List, Set, Tuple
+from typing import Dict, List, Tuple
import user_agents
@@ -195,7 +195,7 @@ def find_referrers(visitors):
})
-def daily_visitors(visitors, output_path):
+def daily_visitors(visitors, output_dir):
days: Dict[datetime, Counter] = defaultdict(Counter)
columns = ('mobile', 'tablet', 'pc', 'bot', 'n/a')
@@ -205,7 +205,7 @@ def daily_visitors(visitors, output_path):
for day in find_days(v.visits):
days[day][v.useragent.value] += 1
- with open(output_path, 'w') as f:
+ with open(Path(output_dir).joinpath('dailyvisitors.csv'), 'w') as f:
out = csv.writer(f)
out.writerow(('day', 'total', *columns))
print('day', 'total', *columns, sep='\t')
@@ -218,8 +218,10 @@ def daily_visitors(visitors, output_path):
out.writerow(values)
print(*values, sep='\t')
+ return days
-def daily_visits(visitors, output_path):
+
+def daily_visits(visitors, output_dir):
days: Dict[datetime, Counter] = defaultdict(Counter)
columns = ('mobile', 'tablet', 'pc', 'bot', 'n/a')
@@ -230,7 +232,7 @@ def daily_visits(visitors, output_path):
day = datetime_day(visit[0].time)
days[day][v.useragent.value] += 1
- with open(output_path, 'w') as f:
+ with open(Path(output_dir, 'dailyvisits.csv'), 'w') as f:
out = csv.writer(f)
out.writerow(('day', 'total', *columns))
print('day', 'total', *columns, sep='\t')
@@ -243,8 +245,10 @@ def daily_visits(visitors, output_path):
out.writerow(values)
print(*values, sep='\t')
+ return days
+
-def daily_pages_per_visit(visitors, output_path):
+def daily_pages_per_visit(visitors, output_dir):
days: Dict[datetime, list] = defaultdict(list)
columns = ('min', 'max', 'med', 'avg', 'dev')
@@ -258,7 +262,7 @@ def daily_pages_per_visit(visitors, output_path):
day = datetime_day(visit[0].time)
days[day].append(len(visit))
- with open(output_path, 'w') as f:
+ with open(Path(output_dir, 'dailypagespervisit.csv'), 'w') as f:
out = csv.writer(f)
out.writerow(('day', *columns))
print('day', *columns, sep='\t')
@@ -277,8 +281,10 @@ def daily_pages_per_visit(visitors, output_path):
out.writerow(values)
print(*values[:4], *(f'{v:.2f}' for v in values[4:]), sep='\t')
+ return days
+
-def daily_page_hits(visitors, output_path):
+def daily_page_hits(visitors, output_dir):
days: Dict[datetime, Counter] = defaultdict(Counter)
columns = find_pages(visitors.values())
@@ -293,7 +299,7 @@ def daily_page_hits(visitors, output_path):
for access in visit:
days[day][access.resource] += 1
- with open(output_path, 'w') as f:
+ with open(Path(output_dir, 'dailypagehits.csv'), 'w') as f:
out = csv.writer(f)
out.writerow(('day', *columns))
@@ -307,8 +313,10 @@ def daily_page_hits(visitors, output_path):
for page, hits in page_hits.most_common(5):
print(hits, page, sep='\t')
+ return days
-def daily_referrers(visitors, output_path):
+
+def daily_referrers(visitors, output_dir):
days: Dict[datetime, Counter] = defaultdict(Counter)
columns = find_referrers(visitors.values())
@@ -323,7 +331,7 @@ def daily_referrers(visitors, output_path):
continue
days[day][simplify_referrer(access.referrer)] += 1
- with open(output_path, 'w') as f:
+ with open(Path(output_dir, 'dailyreferrers.csv'), 'w') as f:
out = csv.writer(f)
out.writerow(('day', *columns))
print('day', *columns, sep='\t')
@@ -335,25 +343,75 @@ def daily_referrers(visitors, output_path):
out.writerow(values)
print(*values, sep='\t')
+ return days
+
-def daily_stats(visitors, output_dir):
+def dump_stats(visitors, output_dir):
output_dir = Path(output_dir)
- daily_visitors(visitors, output_dir.joinpath('dailyvisitors.csv'))
- daily_visits(visitors, output_dir.joinpath('dailyvisits.csv'))
- daily_pages_per_visit(visitors, output_dir.joinpath('dailypagespervisit.csv'))
- daily_page_hits(visitors, output_dir.joinpath('dailypagehits.csv'))
- daily_referrers(visitors, output_dir.joinpath('dailyreferrers.csv'))
+ daily_visitors(visitors, output_dir)
+ visits_pday = daily_visits(visitors, output_dir)
+ pagespervisit_pday = daily_pages_per_visit(visitors, output_dir)
+ pagehits_pday = daily_page_hits(visitors, output_dir)
+ referrers_pday = daily_referrers(visitors, output_dir)
+
+ ua_values = tuple(ua.value for ua in UserAgentKind)
+
+ nb_visitors = {
+ ua.value: sum(1 for v in visitors.values() if v.useragent == ua)
+ for ua in UserAgentKind
+ }
+ nb_visits = {
+ ua: sum(visits_pday[day][ua] for day in visits_pday)
+ for ua in ua_values
+ }
+ pages_per_visit = tuple(
+ nb for day in pagespervisit_pday for nb in pagespervisit_pday[day]
+ )
+ hits_per_page = {
+ page: sum(pagehits_pday[day][page] for day in pagehits_pday)
+ for page in find_pages(visitors.values())
+ }
+ referrers = {
+ ref: sum(referrers_pday[day][ref] for day in referrers_pday)
+ for ref in find_referrers(visitors.values())
+ }
+
+ with open(Path(output_dir, 'global.csv'), 'w') as f:
+ out = csv.writer(f)
+ out.writerow(('#visitors',))
+ out.writerows(
+ (ua, nb_visitors[ua]) for ua in ua_values
+ )
+ out.writerow(('total', sum(nb_visitors.values())))
+
+ out.writerow(('#visits',))
+ out.writerows(
+ (ua, nb_visits[ua]) for ua in ua_values
+ )
+ out.writerow(('total', sum(nb_visits.values())))
+
+ out.writerow(('#pages/visit',))
+ out.writerows((
+ ('min', min(pages_per_visit)),
+ ('max', max(pages_per_visit)),
+ ('med', median(pages_per_visit)),
+ ('avg', mean(pages_per_visit)),
+ ('dev', stdev(pages_per_visit))
+ ))
+ out.writerow(('#views/page',))
+ lines = reversed(sorted(hits_per_page.items(), key=lambda kv: kv[1]))
+ out.writerows(lines)
-def global_stats(visitors, output_dir):
- pass
+ out.writerow(('#referrers',))
+ lines = reversed(sorted(referrers.items(), key=lambda kv: kv[1]))
+ out.writerows(lines)
def main(logs_paths, output_dir):
accesses = parse(logs_paths)
visitors = sort_visits(accesses)
- daily_stats(visitors, output_dir)
- global_stats(visitors, output_dir)
+ dump_stats(visitors, output_dir)
if __name__ == '__main__':