#!/usr/bin/env python3
"""Scrape all Ontario Apple Grower profiles."""
import requests
import time
import csv
from bs4 import BeautifulSoup

profile_ids = [
    "ylK9B0n86j1EhbsX3g5YmJ3n6S262a22",
    "I6TAr7Px1viTXzklyUv41z1rG6ybFI6u",
    "DL92qWLtXg388LDy3sF6GLT9lnwcpAnA",
    "M2Aw18f2yW7s",
    "iaOMaUTEiptQ",
    "7aG89mIvlWXr",
    "i391qs0tn5i4",
    "rjoohd379132",
    "s134hmk60t7o",
    "83yw69e50yig",
    "fttg08067f5h",
    "ta017tj17th3",
    "13991nrm6ybv",
    "6i13m5y56bea",
    "ur84c5tg847m",
    "9o2xf76cl7q9",
    "p9bee6104fy2",
    "791x4gm0uit5",
    "i5rak2751o6j",
    "8wtdk66j245i"
]

results = []

for profile_id in profile_ids:
    url = f"https://www.onapples.com/meet-grower.php?id={profile_id}"
    print(f"Scraping: {url}")
    
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Extract name from title or heading
        title = soup.find('title')
        h1 = soup.find('h1')
        name = ""
        
        if h1:
            name = h1.get_text(strip=True)
        elif title:
            name = title.get_text(strip=True).replace(" - Ontario Apple Farms", "").strip()
        
        # Extract main content
        content = soup.find('div', class_='content') or soup.find('article') or soup.find('main')
        
        text = ""
        company = ""
        location = ""
        
        if content:
            text = content.get_text(separator=' ', strip=True)[:500]
            
            # Try to extract company name and location from text
            paragraphs = content.find_all('p')
            for p in paragraphs[:3]:
                p_text = p.get_text(strip=True)
                # Look for farm names, locations
                if 'Farm' in p_text or 'Orchard' in p_text:
                    words = p_text.split()
                    for i, word in enumerate(words):
                        if 'Farm' in word or 'Orchard' in word:
                            if i > 0:
                                company = ' '.join(words[max(0, i-2):i+1])
                                break
                
                # Look for Ontario locations
                if 'Ontario' in p_text or 'County' in p_text:
                    # Extract location
                    parts = p_text.split(',')
                    for part in parts:
                        if 'Ontario' in part or 'County' in part:
                            location = part.strip()
                            break
        
        results.append({
            'Name': name,
            'Company': company,
            'Location': location,
            'URL': url,
            'Country': 'Canada',
            'Region': 'Ontario',
            'Produce': 'Apples',
            'Source': 'Ontario Apple Growers'
        })
        
        time.sleep(0.5)  # Be polite
        
    except Exception as e:
        print(f"Error scraping {url}: {e}")
        continue

# Write to CSV
output_file = '/Users/max/.openclaw/workspace/postharvest/ontario-growers-scraped.csv'
with open(output_file, 'w', newline='', encoding='utf-8') as f:
    writer = csv.DictWriter(f, fieldnames=['Name', 'Company', 'Location', 'URL', 'Country', 'Region', 'Produce', 'Source'])
    writer.writeheader()
    writer.writerows(results)

print(f"\n✅ Scraped {len(results)} Ontario apple grower profiles")
print(f"📁 Saved to: {output_file}")
