import csv
import os

# Combine all batch files
batches = [
    'pht_top_398_apple_pear_citrus.csv',
    'new_companies_batch1.csv',
    'new_companies_batch2.csv',
    'new_companies_batch3.csv',
    'new_companies_batch4.csv'
]

all_companies = []
seen = set()
header = None

for batch_file in batches:
    if not os.path.exists(batch_file):
        print(f"Skipping {batch_file} - not found")
        continue
    
    with open(batch_file, 'r') as f:
        reader = csv.DictReader(f)
        if header is None:
            header = reader.fieldnames
        
        for row in reader:
            company_key = row['Company'].lower().strip()
            if company_key not in seen:
                seen.add(company_key)
                all_companies.append(row)

# Add verified facilities companies (the 10 new ones)
verified_new = [
    "Washington Fruit & Produce Co.",
    "Kershaw Fruit & Cold Storage Co.",
    "McDougall & Sons",
    "Boyer Nurseries and Orchards Inc",
    "Rio Grande Juice Company",
    "Monson Fruit Company",
    "Apple House Inc",
    "Blue Bird Inc",
    "Brownfield Orchard",
    "Niagara Fresh Fruit Co."
]

print(f"Total companies compiled: {len(all_companies)}")
print(f"Unique companies: {len(seen)}")
print(f"\nCompanies by source:")
for batch in batches:
    if os.path.exists(batch):
        with open(batch, 'r') as f:
            count = sum(1 for line in f) - 1  # Subtract header
            print(f"  {batch}: {count}")

