#!/usr/bin/env python3
import requests
import json
import time
import os

# Get Apify API token from environment
APIFY_TOKEN = os.environ.get('APIFY_TOKEN', '')

if not APIFY_TOKEN:
    print("❌ No APIFY_TOKEN found. Getting from keychain...")
    result = os.popen('security find-generic-password -s "APIFY_TOKEN" -w 2>/dev/null').read().strip()
    if result:
        APIFY_TOKEN = result
    else:
        print("❌ APIFY_TOKEN not found. Please set it first.")
        exit(1)

ACTOR_ID = "compass/crawler-google-places"

# Define searches for 4 states
searches = {
    "NY": [
        "apple orchard New York USA",
        "apple grower New York USA",
        "cold storage apples New York USA"
    ],
    "PA": [
        "apple orchard Pennsylvania USA",
        "apple grower Pennsylvania USA"
    ],
    "MI": [
        "apple orchard Michigan USA",
        "apple grower Michigan USA"
    ],
    "WA": [
        "apple packer Washington State USA",
        "apple cold storage Washington State USA"
    ]
}

all_results = []
total_estimate = 0

for state, queries in searches.items():
    print(f"\n🍎 === Scraping {state} ===")
    
    # Prepare input
    input_data = {
        "searchStringsArray": queries,
        "maxCrawledPlacesPerSearch": 150,  # Get plenty per search
        "language": "en",
        "exportPlaceUrls": False,
        "includeWebResults": False
    }
    
    # Start actor run
    response = requests.post(
        f"https://api.apify.com/v2/acts/{ACTOR_ID}/runs",
        params={"token": APIFY_TOKEN},
        json=input_data
    )
    
    if response.status_code != 201:
        print(f"❌ Error starting run: {response.text}")
        continue
    
    run_data = response.json()
    run_id = run_data['data']['id']
    print(f"✅ Started run {run_id}")
    
    # Wait for completion
    while True:
        status_response = requests.get(
            f"https://api.apify.com/v2/acts/{ACTOR_ID}/runs/{run_id}",
            params={"token": APIFY_TOKEN}
        )
        
        status = status_response.json()['data']['status']
        print(f"⏳ Status: {status}")
        
        if status in ['SUCCEEDED', 'FAILED', 'ABORTED']:
            break
        
        time.sleep(10)
    
    if status != 'SUCCEEDED':
        print(f"❌ Run failed with status: {status}")
        continue
    
    # Get results
    dataset_id = run_data['data']['defaultDatasetId']
    results_response = requests.get(
        f"https://api.apify.com/v2/datasets/{dataset_id}/items",
        params={"token": APIFY_TOKEN}
    )
    
    results = results_response.json()
    print(f"✅ Found {len(results)} results for {state}")
    
    # Add state tag
    for item in results:
        item['state'] = state
    
    all_results.extend(results)
    total_estimate += len(results)

# Save all results
output_file = '/Users/max/.openclaw/workspace/postharvest/apify-results/4-states-apples-raw.json'
with open(output_file, 'w') as f:
    json.dump(all_results, f, indent=2)

print(f"\n🎉 ✅ COMPLETE!")
print(f"Total results: {len(all_results)}")
print(f"Saved to: {output_file}")
print(f"\nBreakdown by state:")
for state in ['NY', 'PA', 'MI', 'WA']:
    count = len([r for r in all_results if r.get('state') == state])
    print(f"  {state}: {count}")
