#!/usr/bin/env python3
"""
4-State Apple Facility Scraper - Uses WORKING compass actor
"""
import requests
import json
import time

APIFY_TOKEN = "apify_api_s5UWN0W1FkB0cmjtawlkMLIFof2vfu3faGOR"
ACTOR_ID = "compass/crawler-google-places"  # This one WORKS (used for NZ)

# All 9 searches combined (NY, PA, MI, WA)
searches = {
    "4_states_apples": [
        "apple orchard New York USA",
        "apple grower New York USA", 
        "cold storage apples New York USA",
        "apple orchard Pennsylvania USA",
        "apple grower Pennsylvania USA",
        "apple orchard Michigan USA",
        "apple grower Michigan USA",
        "apple packer Washington State USA",
        "apple cold storage Washington State USA"
    ]
}

all_results = []

for region, queries in searches.items():
    print(f"\n🍎 === Starting 4-State Apple Scrape ===")
    print(f"Queries: {len(queries)}")
    
    # Prepare input (exact format that worked for NZ)
    input_data = {
        "searchStringsArray": queries,
        "maxCrawledPlacesPerSearch": 150,
        "language": "en",
        "exportPlaceUrls": False,
        "includeWebResults": False
    }
    
    print(f"\nInput config:")
    print(json.dumps(input_data, indent=2))
    
    # Start actor run
    print(f"\n🚀 Starting Apify run...")
    response = requests.post(
        f"https://api.apify.com/v2/acts/{ACTOR_ID}/runs",
        params={"token": APIFY_TOKEN},
        json=input_data
    )
    
    if response.status_code != 201:
        print(f"❌ Error starting run: {response.status_code}")
        print(f"Response: {response.text}")
        exit(1)
    
    run_data = response.json()
    run_id = run_data['data']['id']
    print(f"✅ Run started! ID: {run_id}")
    print(f"Monitor: https://console.apify.com/actors/runs/{run_id}")
    
    # Wait for completion
    iteration = 0
    while True:
        iteration += 1
        status_response = requests.get(
            f"https://api.apify.com/v2/acts/{ACTOR_ID}/runs/{run_id}",
            params={"token": APIFY_TOKEN}
        )
        
        status_data = status_response.json()['data']
        status = status_data['status']
        
        if iteration % 6 == 0:  # Every minute
            stats = status_data.get('stats', {})
            scraped = stats.get('requestsFinished', 0)
            print(f"⏳ [{iteration*10}s] Status: {status} | Scraped: {scraped}")
        
        if status in ['SUCCEEDED', 'FAILED', 'ABORTED']:
            break
        
        time.sleep(10)
    
    print(f"\n🏁 Final status: {status}")
    
    if status != 'SUCCEEDED':
        print(f"❌ Run failed!")
        exit(1)
    
    # Get results
    dataset_id = run_data['data']['defaultDatasetId']
    print(f"\n📥 Downloading results from dataset {dataset_id}...")
    
    results_response = requests.get(
        f"https://api.apify.com/v2/datasets/{dataset_id}/items",
        params={"token": APIFY_TOKEN}
    )
    
    results = results_response.json()
    print(f"✅ Found {len(results)} total results!")
    
    all_results.extend(results)

# Save results
output_file = '/Users/max/.openclaw/workspace/postharvest/apify-results/4-states-apples-FINAL.json'
with open(output_file, 'w') as f:
    json.dump(all_results, f, indent=2)

print(f"\n🎉 ✅ COMPLETE!")
print(f"Total facilities: {len(all_results)}")
print(f"Saved to: {output_file}")

# Quick stats
states_found = {}
for r in all_results:
    # Try to extract state from address
    addr = r.get('address', '') or r.get('location', {}).get('address', '')
    for state in ['New York', 'NY', 'Pennsylvania', 'PA', 'Michigan', 'MI', 'Washington', 'WA']:
        if state in addr:
            states_found[state] = states_found.get(state, 0) + 1

print(f"\n📊 Breakdown:")
for state, count in sorted(states_found.items()):
    print(f"  {state}: {count}")
