from collections import defaultdict from dataclasses import dataclass, field from os import path from typing import Iterator @dataclass class Directory: files: Iterator[str] = field(default_factory=list) subfolders: Iterator[str] = field(default_factory=set) def serialize(self): return { 'files': sorted(self.files), 'subfolders': sorted(self.subfolders) } @classmethod def deserialize(cls, d): return cls(**d) def _find_files(extensions, repository): patterns = (f'**.{ext}' for ext in extensions) zero = '\x00' return repository.git.ls_files('-z', *patterns).strip(zero).split(zero) def _fill_directories(files, top_dir): directories = defaultdict(Directory) for f in files: fdir, fname = path.split(f) directories[fdir].files.append(fname) while fdir: parent, child = path.split(fdir) directories[parent].subfolders.add(child) fdir = parent return directories def compute_directories(extensions, repository): files = _find_files(extensions, repository) top_dir = path.relpath(repository.working_dir, path.curdir) return _fill_directories(files, top_dir) def deserialize_directories(directories): return { k: Directory.deserialize(v) for k, v in directories.items() }