#!/usr/bin/env python3
import csv
from pathlib import Path

# Files to compare
files = [
    'SA-MASTER-LIST-400.csv',
    'sa-expansion-round2.csv',
    'sa-new-facilities.csv',
    'south-africa-packhouses.csv'
]

all_companies = {}

for file in files:
    if not Path(file).exists():
        continue
    
    companies = set()
    with open(file, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            # Try different column names
            company = row.get('Company Name') or row.get('Company') or row.get('Name')
            if company:
                companies.add(company.strip().lower())
    
    all_companies[file] = companies
    print(f"{file}: {len(companies)} companies")

print("\n--- Unique Companies ---")

# Check what's in smaller files but not in SA-MASTER-LIST-400
master = all_companies.get('SA-MASTER-LIST-400.csv', set())

for file, companies in all_companies.items():
    if file == 'SA-MASTER-LIST-400.csv':
        continue
    
    unique = companies - master
    if unique:
        print(f"\n{file} has {len(unique)} companies NOT in SA-MASTER-LIST-400:")
        for comp in sorted(list(unique)[:10]):  # Show first 10
            print(f"  - {comp}")
        if len(unique) > 10:
            print(f"  ... and {len(unique) - 10} more")
