#!/usr/bin/env python3
import csv
from collections import Counter

with open('verified-scored-facilities.csv', 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    facilities = list(reader)

# Get Confirmed facilities
confirmed = [f for f in facilities if f['Confidence Level'] == 'Confirmed']
estimated = [f for f in facilities if f['Confidence Level'] == 'Estimated']

print(f"CONFIRMED FACILITIES: {len(confirmed)}")
print(f"ESTIMATED FACILITIES: {len(estimated)}")
print(f"{'='*80}\n")

# Analyze Confirmed - categorize by what data they have
conf_with_rooms = [f for f in confirmed if f.get('Total Rooms') and f['Total Rooms'] not in ['', 'Unknown', 'N/A']]
conf_with_sqft = [f for f in confirmed if f.get('Square Footage') and f['Square Footage'] not in ['', 'Unknown', 'N/A']]
conf_with_produce = [f for f in confirmed if f.get('Primary Produce') and f['Primary Produce'] not in ['', 'Unknown', 'N/A']]
conf_without_website = [f for f in confirmed if not f.get('Website') or f['Website'] in ['N/A', '', 'Unknown']]

print("CONFIRMED - Data Analysis:")
print(f"  With room count: {len(conf_with_rooms)}")
print(f"  With square footage: {len(conf_with_sqft)}")
print(f"  With produce info: {len(conf_with_produce)}")
print(f"  Without website: {len(conf_without_website)}")
print()

# Group by state
def get_state(region):
    if not region:
        return 'Unknown'
    if ',' in region:
        parts = region.split(',')
        return parts[-1].strip()
    elif ' - ' in region:
        parts = region.split(' - ')
        return parts[0].strip()
    return region

confirmed_by_state = Counter(get_state(f['Region']) for f in confirmed)
estimated_by_state = Counter(get_state(f['Region']) for f in estimated)

print("CONFIRMED by State (top 10):")
for state, count in confirmed_by_state.most_common(10):
    print(f"  {state}: {count}")

print()
print("ESTIMATED by State (top 10):")
for state, count in estimated_by_state.most_common(10):
    print(f"  {state}: {count}")

print()
print("="*80)
print("ESTIMATED facilities (all):")
for f in estimated[:20]:
    print(f"  {f['Company']} - {f['Region']}")
