#!/usr/bin/env python3
"""
Parse MAIA member directory from the fetched HTML content
Simple regex-based parser - no external dependencies
"""

import re
import csv

# The HTML content from web_fetch (truncated at 50,000 chars but contains hundreds of members)
html_sample = """
4 Ransom Farms LLC

Luke Ransom

8010 W. Sagemoor Rd., Pasco, WA 99301

 -->
 42 North Apples, LLC

Matt Thiede

1055 - 7 Mile Road, NW, Comstock Park, MI 49321

 -->
 78 Acres LLC

R. Matthew Harsh

23340 Fruit Tree Drive, Smithsburg, MD 21783
"""

def parse_address(address):
    """Extract city, state from address string"""
    state = ""
    city = ""
    
    if not address:
        return city, state
    
    # Try to match state code (2 letters followed by zip)
    state_match = re.search(r',\s*([A-Z]{2})\s+\d{5}', address)
    if state_match:
        state = state_match.group(1)
    
    # Try to extract city (word(s) before state code)
    city_match = re.search(r',\s*([^,]+?),\s*[A-Z]{2}\s+\d{5}', address)
    if city_match:
        city = city_match.group(1).strip()
    
    return city, state

def manual_parse_maia():
    """
    Manually parse the member entries from the known HTML structure
    Format: Company Name, Contact Name, Address, (optional Phone)
    """
    
    # Full member list extracted from the web_fetch (sample shown above)
    # In real scraping, we'd fetch the full page
    # For now, let me create a smaller sample to demonstrate the structure
    
    members = []
    
    # Sample members from the HTML (first 20 for testing)
    sample_data = [
        ("4 Ransom Farms LLC", "Luke Ransom", "8010 W. Sagemoor Rd., Pasco, WA 99301", ""),
        ("42 North Apples, LLC", "Matt Thiede", "1055 - 7 Mile Road, NW, Comstock Park, MI 49321", ""),
        ("78 Acres LLC", "R. Matthew Harsh", "23340 Fruit Tree Drive, Smithsburg, MD 21783", ""),
        ("A & H Farm", "Andrea DeJesus", "1374 Collins Lane, Manhattan, KS 66502", ""),
        ("A. P. Lorson Fruit Farm LLC", "Randall J. & Ester B. Lorson", "3887 Jacks Hollow Rd., Williamsport, PA 17702", ""),
        ("Abagail House Orchards", "Tim Golomb", "1 Whiebarn Road, New Albany, OH 43054", ""),
        ("Adams County Nursery", "Phil Baugher", "PO Box 108, Aspers, PA 17304", "717-677-8105"),
        ("BelleHarvest", "Chris Sandwick", "11900 Fisk Rd, Belding, MI 48809", ""),
        ("Beak & Skiff Apple Farms", "Peter Fleckenstein", "4472 Cherry Valley Tpk, Lafayette, NY 13084", ""),
        ("Blake's Orchard & Cider Mill", "Pete Blake", "17985 Center Road, Armada, MI 48005", ""),
        ("Cameron Nursery, LLC", "Allison Schrader", "PO Box 300, Eltopia, WA 99330", "509-266-4669"),
        ("Countryside Farm & Nurseries LLC", "Mary K. Stickley-Godinez", "PO Box 675, 1044 Humbert Rd, Crimora, VA 24431", "540-363-6661"),
        ("Gold Crown Nursery", "Dale Goldy", "530 Valley Mall Parkway #3, Wenatchee, WA 98802", "509-860-0834"),
        ("Hilltop Fruit Trees LLC", "Marshall Pomeroy", "60395 CR 681, PO Box 538, Hartford, MI 49057", "269-621-3135"),
        ("Stemilt (Highway 28 Orchard LLC)", "Rob Blakey", "3135 Warehouse Rd., Wenatchee, WA 98801", ""),
        ("FirstFruits Farms, LLC", "Ralph Broetje", "1111 Fishhook Park Rd., Prescott, WA 99348", ""),
        ("Domex Superfresh Growers LLC", "", "151 Low Road, Yakima, WA 98908", ""),
        ("Evans Fruit Company", "Joseph Evans", "PO Box 70, Cowiche, WA 98923", ""),
        ("Gilbert Orchards Inc.", "Nicholas Willett", "P.O. Box 9066, Yakima, WA 98903", ""),
        ("Gold Star Nursery", "Ric Valicoff", "300 N Frontage Road, Wapato, WA 98951", "509-877-6000"),
    ]
    
    for company, contact, address, phone in sample_data:
        city, state = parse_address(address)
        
        member = {
            'Company Name': company,
            'Contact Name': contact,
            'Address': address,
            'Phone': phone,
            'City': city,
            'State': state,
            'Country': 'US',
            'Primary Fruit': 'Apples',
            'CA Storage Confirmed': 'Unknown',
            'Source': 'MAIA (Midwest Apple Improvement Association)',
            'Website': '',
            'Notes': 'Member of MAIA - 1,100+ member apple grower association'
        }
        
        members.append(member)
    
    return members

if __name__ == "__main__":
    print("Parsing MAIA member sample...")
    members = manual_parse_maia()
    
    print(f"\nParsed {len(members)} sample members")
    print("(Note: Full directory has 1,100+ members - would need complete HTML fetch)")
    
    # Save to CSV
    output_file = "/Users/max/.openclaw/workspace/postharvest/MAIA-members-sample.csv"
    if members:
        fieldnames = ['Company Name', 'Contact Name', 'Address', 'Phone', 'City', 'State', 
                     'Country', 'Primary Fruit', 'CA Storage Confirmed', 'Source', 'Website', 'Notes']
        with open(output_file, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(members)
        
        print(f"\nSaved to {output_file}")
        
        # Show sample
        print(f"\nSample entries:")
        for i, member in enumerate(members[:5], 1):
            print(f"\n{i}. {member['Company Name']}")
            print(f"   Contact: {member['Contact Name']}")
            print(f"   Location: {member['City']}, {member['State']}")
            if member['Phone']:
                print(f"   Phone: {member['Phone']}")
    
    print("\n✅ NEXT STEP: Need to fetch complete HTML to get all 1,100+ members")
    print("   This sample demonstrates the parsing logic works!")
