#!/usr/bin/env python3
"""
MARATHON PART 11 - Final Push to 100% Verified
Systematic verification of all remaining Confirmed and Estimated facilities
"""

import csv
import json
import time
from datetime import datetime

# Read the CSV
facilities = []
with open('verified-scored-facilities.csv', 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for row in reader:
        facilities.append(row)

print(f"Total facilities: {len(facilities)}")

# Separate by status
verified = [f for f in facilities if f['Confidence Level'] == 'Verified']
confirmed = [f for f in facilities if f['Confidence Level'] == 'Confirmed']
estimated = [f for f in facilities if f['Confidence Level'] == 'Estimated']

print(f"Verified: {len(verified)}")
print(f"Confirmed: {len(confirmed)} - need upgrade")
print(f"Estimated: {len(estimated)} - need full research")
print(f"Total to process: {len(confirmed) + len(estimated)}")
print()

# Group remaining by state/region for systematic processing
def get_state(facility):
    region = facility.get('Region', '')
    # Extract state abbreviation
    if ',' in region:
        parts = region.split(',')
        state = parts[-1].strip()
        # Handle multi-word states
        if len(state) <= 3:
            return state
        else:
            # Extract just the abbreviation if it's like "California CA"
            words = state.split()
            if len(words) > 1 and len(words[-1]) == 2:
                return words[-1]
            return state
    return region

# Group confirmed facilities by state
confirmed_by_state = {}
for f in confirmed:
    state = get_state(f)
    if state not in confirmed_by_state:
        confirmed_by_state[state] = []
    confirmed_by_state[state].append(f)

# Group estimated facilities by state
estimated_by_state = {}
for f in estimated:
    state = get_state(f)
    if state not in estimated_by_state:
        estimated_by_state[state] = []
    estimated_by_state[state].append(f)

print("=== CONFIRMED FACILITIES BY STATE ===")
for state in sorted(confirmed_by_state.keys()):
    count = len(confirmed_by_state[state])
    print(f"{state}: {count} facilities")

print()
print("=== ESTIMATED FACILITIES BY STATE ===")
for state in sorted(estimated_by_state.keys()):
    count = len(estimated_by_state[state])
    print(f"{state}: {count} facilities")

print()
print("=== PRIORITY ORDER FOR VERIFICATION ===")

# Create priority list - start with states that have most facilities
all_states = set(list(confirmed_by_state.keys()) + list(estimated_by_state.keys()))
state_priority = []
for state in all_states:
    conf_count = len(confirmed_by_state.get(state, []))
    est_count = len(estimated_by_state.get(state, []))
    total = conf_count + est_count
    state_priority.append((state, total, conf_count, est_count))

state_priority.sort(key=lambda x: x[1], reverse=True)

for i, (state, total, conf, est) in enumerate(state_priority, 1):
    print(f"{i}. {state}: {total} total ({conf} Confirmed, {est} Estimated)")

# Save work queues
with open('work-queue-confirmed.json', 'w') as f:
    json.dump(confirmed_by_state, f, indent=2)

with open('work-queue-estimated.json', 'w') as f:
    json.dump(estimated_by_state, f, indent=2)

with open('work-queue-priority.json', 'w') as f:
    json.dump(state_priority, f, indent=2)

print()
print("Work queues saved:")
print("- work-queue-confirmed.json")
print("- work-queue-estimated.json")
print("- work-queue-priority.json")