Mb- - Download- Smile.zip -3.16

out['image_stats'] = pd.DataFrame(img_info)

out['csv_summaries'] = csv_summaries

# 3. Image stats (if any) img_info = [] for p in ROOT.rglob('*.jpg') + ROOT.rglob('*.png'): try: with Image.open(p) as im: img_info.append( 'path': str(p.relative_to(ROOT)), 'width': im.width, 'height': im.height, 'mode': im.mode, 'size_bytes': p.stat().st_size ) except Exception as e: img_info.append('path': str(p), 'error': str(e)) Download- smile.zip -3.16 MB-

# 1. File type counts ext_counts = Counter(p.suffix.lower() for p in ROOT.rglob('*') if p.is_file()) out['ext_counts'] = ext_counts out['image_stats'] = pd

print("\n=== Duplicate files (SHA‑256) ===") for h, paths in duplicates.items(): print(f"h:") for p in paths: print(f" - p") SHA256 hashes (detect duplicates) hashes = {} for p in ROOT

# 2. SHA256 hashes (detect duplicates) hashes = {} for p in ROOT.rglob('*'): if p.is_file(): h = hashlib.sha256() with p.open('rb') as f: while chunk := f.read(8192): h.update(chunk) dig = h.hexdigest() hashes.setdefault(dig, []).append(p.relative_to(ROOT))

# Save everything for the paper with open('audit_report.json', 'w') as f: json.dump(out, f, indent=2)