#!/usr/bin/env python3
"""
Apply verified enhancements to the master facility list
"""

import csv
import json
from pathlib import Path

# Verified enhancements data
ENHANCEMENTS = {
    "Rainier Fruit Company": {
        "total_rooms": "100",
        "square_footage": "1,500,000",
        "size_classification": "XXLarge",
        "premium_varieties": "Organic apples, pears, cherries, blueberries",
        "organic": "Yes",
        "ca_ma_storage": "Yes",
        "score": 135,
        "verification_source": "The Packer, Terra Firma Magazine",
        "confidence_level": "Verified",
        "notes": "Zirkle facility: 100 CA rooms, 1.5M sq ft, 6 packing lines, 29 shipping bays. Nation's largest organic grower."
    },
    "Washington Fruit & Produce Company": {
        "square_footage": "840,000",
        "size_classification": "XXLarge",
        "premium_varieties": "Modern apple varieties",
        "ca_ma_storage": "Yes",
        "score": 115,
        "verification_source": "Capital Press, Yakima Herald",
        "confidence_level": "Verified",
        "notes": "Founded 1916. 840K sq ft total (300K new facility 2017). Most modern in WA."
    },
    "FirstFruits Farms": {
        "square_footage": "500,000",
        "size_classification": "XXLarge",
        "premium_varieties": "23 varieties: Honeycrisp, Gala, Fuji, club varieties, Cosmic Crisp",
        "organic": "Yes",
        "ca_ma_storage": "Yes",
        "score": 125,
        "verification_source": "FirstFruits.com",
        "confidence_level": "Verified",
        "notes": "Multi-regional WA+MI. Acquired Applewood Fresh 2023. 500K+ boxes pears/year. Gem Drops cherries."
    },
    "Stemilt Growers LLC": {
        "square_footage": "400,000",
        "size_classification": "XXLarge",
        "premium_varieties": "Cosmic Crisp (promoted), sweet cherries, organic tree fruit",
        "organic": "Yes",
        "ca_ma_storage": "Yes",
        "score": 125,
        "verification_source": "Stemilt.com, Wikipedia",
        "confidence_level": "Verified",
        "notes": "Leading sweet cherry shipper. Largest organic tree fruit supplier. Solar-powered CA storage."
    },
    "CMI Orchards Headquarters": {
        "square_footage": "300,000",
        "size_classification": "XXLarge",
        "premium_varieties": "Conventional & organic apples, pears, cherries",
        "organic": "Yes",
        "ca_ma_storage": "Yes",
        "score": 120,
        "verification_source": "CMIOrchards.com",
        "confidence_level": "Verified",
        "notes": "Family cooperative. Multiple facilities: Wenatchee, Wapato, Chelan, Quincy, Hood River OR."
    },
    "Domex Superfresh Growers": {
        "square_footage": "350,000",
        "size_classification": "XXLarge",
        "premium_varieties": "Apples, pears, cherries, blueberries, apricots",
        "organic": "Yes",
        "ca_ma_storage": "Yes",
        "score": 120,
        "verification_source": "IGSO, Produce Market Guide",
        "confidence_level": "Verified",
        "notes": "5th generation. First solar CA storage. Hydroelectric/wind/solar powered. ISO 22000."
    },
    "Diamond Fruit Growers": {
        "square_footage": "300,000",
        "size_classification": "XXLarge",
        "premium_varieties": "Conventional & organic pears (primary), apples",
        "organic": "Yes",
        "ca_ma_storage": "Yes",
        "score": 115,
        "verification_source": "DiamondFruit.com, Produce News",
        "confidence_level": "Verified",
        "notes": "Founded 1913. Hood River cooperative. 650K box capacity (1937 world's largest)."
    },
    "Allan Brothers Fruit Inc.": {
        "premium_varieties": "Apples, cherries",
        "ca_ma_storage": "Yes",
        "score": 95,
        "verification_source": "AllanBrosFruit.com",
        "confidence_level": "Verified",
        "notes": "100+ years. Pioneers in automation/technology/R&D. CA storage."
    },
    "Chelan Fruit Cooperative": {
        "premium_varieties": "Ambrosia, Jazz, apples, cherries",
        "organic": "Unknown",
        "ca_ma_storage": "Yes",
        "score": 95,
        "verification_source": "Good Fruit Grower",
        "confidence_level": "Verified",
        "notes": "1,100 acres orchards. 5M packages/year capacity. Exclusive Ambrosia & Jazz varieties."
    },
    "Lineage Tacoma Port": {
        "square_footage": "132,000",
        "size_classification": "XLarge",
        "score": 90,
        "verification_source": "NW Seaport Alliance, Lineage website",
        "confidence_level": "Verified",
        "notes": "CTPAT certified. 93M cubic ft total in PNW. 1.5M sq ft across 5 regional warehouses."
    },
    "Nash Produce": {
        "square_footage": "500,000",
        "size_classification": "XXLarge",
        "premium_varieties": "Sweet potatoes (specialist)",
        "ca_ma_storage": "Yes",
        "score": 105,
        "verification_source": "NashProduce.com, The Packer",
        "confidence_level": "Verified",
        "notes": "92+ acres facility. 4M bushels CA storage. 12,000 acres sweet potato production."
    },
    "Sage Fruit Company": {
        "premium_varieties": "Organic apples, pears, cherries",
        "organic": "Yes",
        "ca_ma_storage": "Yes",
        "score": 95,
        "verification_source": "Wikipedia, Organic Produce Network",
        "confidence_level": "Verified",
        "notes": "Sales/marketing for 4 grower/packer/shippers. Organic facilities in Chelan & Yakima."
    },
    "Gebbers Farms": {
        "premium_varieties": "Apples, cherries",
        "ca_ma_storage": "Yes",
        "score": 90,
        "verification_source": "GebbersFarms.com",
        "confidence_level": "Verified",
        "notes": "Owns Brewster Heights Packing facility. Full ownership of warehouse & farming operations."
    }
}

def update_facility_csv():
    """Update the verified facilities CSV with enhancements"""
    
    workspace = Path('/Users/max/.openclaw/workspace/postharvest')
    input_file = workspace / 'verified-scored-facilities.csv'
    output_file = workspace / 'verified-scored-facilities-ENHANCED.csv'
    
    # Read existing data
    facilities = []
    with open(input_file, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        facilities = list(reader)
    
    print(f"Loaded {len(facilities)} facilities from {input_file}")
    
    # Apply enhancements
    enhanced_count = 0
    for facility in facilities:
        company = facility['Company']
        
        if company in ENHANCEMENTS:
            enhancements = ENHANCEMENTS[company]
            
            # Apply each enhancement
            for key, value in enhancements.items():
                # Convert snake_case to CSV column names
                if key == 'total_rooms':
                    if value:
                        facility['Total Rooms'] = value
                elif key == 'square_footage':
                    facility['Square Footage'] = value
                elif key == 'size_classification':
                    facility['Size Classification'] = value
                elif key == 'premium_varieties':
                    facility['Premium Varieties'] = value
                elif key == 'organic':
                    facility['Organic'] = value
                elif key == 'ca_ma_storage':
                    facility['CA/MA'] = value
                elif key == 'score':
                    facility['Score'] = str(value)
                elif key == 'verification_source':
                    facility['Verification Source'] = value
                elif key == 'confidence_level':
                    facility['Confidence Level'] = value
                elif key == 'notes':
                    facility['Notes'] = value
            
            enhanced_count += 1
            print(f"  Enhanced: {company} -> Score: {facility['Score']}")
    
    # Write enhanced data
    fieldnames = [
        'Company', 'Region', 'Website', 'Size Classification',
        'Total Rooms', 'Square Footage', 'Primary Produce',
        'Premium Varieties', 'Organic', 'CA/MA', 'Score',
        'Verification Source', 'Confidence Level', 'Notes'
    ]
    
    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        
        # Sort by score (highest first)
        facilities.sort(key=lambda x: int(x.get('Score', 0)), reverse=True)
        
        for facility in facilities:
            writer.writerow(facility)
    
    print(f"\nSaved {len(facilities)} facilities to {output_file}")
    print(f"Enhanced {enhanced_count} facilities with verified data")
    
    # Statistics
    print("\n" + "="*70)
    print("ENHANCED DATASET STATISTICS")
    print("="*70)
    
    print(f"\nTop 20 Facilities (Enhanced Data):")
    for i, facility in enumerate(facilities[:20], 1):
        sqft = facility.get('Square Footage', 'Unknown')
        rooms = facility.get('Total Rooms', 'Unknown')
        score = facility.get('Score', '0')
        size = facility.get('Size Classification', 'Unknown')
        conf = facility.get('Confidence Level', 'Unknown')
        
        print(f"  {i:2d}. {facility['Company'][:48]:48s} | {score:>3s} pts | " +
              f"{size:8s} | {rooms:>5s} rooms | {sqft:>10s} sf | {conf}")
    
    verified_count = sum(1 for f in facilities if f.get('Confidence Level') == 'Verified')
    xxlarge_count = sum(1 for f in facilities if f.get('Size Classification') == 'XXLarge')
    xlarge_count = sum(1 for f in facilities if f.get('Size Classification') == 'XLarge')
    
    print(f"\nVerified facilities: {verified_count}")
    print(f"XXLarge facilities: {xxlarge_count}")
    print(f"XLarge facilities: {xlarge_count}")
    print(f"Facilities with 100+ score: {sum(1 for f in facilities if int(f.get('Score', 0)) >= 100)}")
    print(f"Facilities with organic: {sum(1 for f in facilities if f.get('Organic') == 'Yes')}")
    
    return output_file

if __name__ == '__main__':
    output = update_facility_csv()
    print(f"\n{'='*70}")
    print(f"ENHANCED DATASET READY: {output}")
    print(f"{'='*70}")
