#!/usr/bin/env python3
"""
Scrape MAIA (Midwest Apple Improvement Association) member directory
1,100+ apple growers with contact info
"""

import re
import csv
import requests
from bs4 import BeautifulSoup

def scrape_maia():
    url = "https://maiaapples.com/member-directory/"
    
    print(f"Fetching {url}...")
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    members = []
    
    # Find all member entries (they appear to be in a specific structure)
    # Based on the HTML, members are in a format like:
    # Company Name
    # Contact Name
    # Address
    # Phone (optional)
    
    # Try to find the pattern in the HTML
    content = response.text
    
    # Look for the pattern of member entries
    # Example: "4 Ransom Farms LLC\n\nLuke Ransom\n\n8010 W. Sagemoor Rd., Pasco, WA 99301"
    
    # Split by the arrow markers that seem to separate entries
    entries = content.split('-->')
    
    for entry in entries:
        if not entry.strip():
            continue
            
        lines = [line.strip() for line in entry.strip().split('\n') if line.strip()]
        
        if len(lines) < 3:
            continue
        
        company_name = lines[0]
        contact_name = lines[1] if len(lines) > 1 else ""
        address = lines[2] if len(lines) > 2 else ""
        phone = lines[3] if len(lines) > 3 and re.match(r'[\d\-\(\) ]+', lines[3]) else ""
        
        # Skip entries that don't look like real companies
        if not company_name or len(company_name) < 3:
            continue
            
        # Parse address for state/city
        state = ""
        city = ""
        if address:
            # Address format: "1234 Street, City, ST 12345"
            parts = address.split(',')
            if len(parts) >= 2:
                city = parts[-2].strip()
                state_zip = parts[-1].strip().split()
                if state_zip:
                    state = state_zip[0]
        
        member = {
            'Company Name': company_name,
            'Contact Name': contact_name,
            'Address': address,
            'Phone': phone,
            'City': city,
            'State': state,
            'Country': 'US',
            'Source': 'MAIA',
            'Website': '',  # Not in directory
            'Primary Fruit': 'Apples',
            'CA Storage Confirmed': 'Unknown',
            'Notes': 'MAIA member - Midwest apple grower'
        }
        
        members.append(member)
    
    return members

if __name__ == "__main__":
    print("Scraping MAIA member directory...")
    members = scrape_maia()
    
    print(f"\nFound {len(members)} members")
    
    # Save to CSV
    output_file = "MAIA-members-scraped.csv"
    if members:
        fieldnames = list(members[0].keys())
        with open(output_file, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(members)
        
        print(f"\nSaved to {output_file}")
        
        # Show sample
        print(f"\nSample of first 5 members:")
        for i, member in enumerate(members[:5], 1):
            print(f"\n{i}. {member['Company Name']}")
            print(f"   Contact: {member['Contact Name']}")
            print(f"   Location: {member['City']}, {member['State']}")
            print(f"   Phone: {member['Phone']}")
    else:
        print("No members found!")
