#!/bin/bash
# Apify API scraper for 4-state apple facilities
set -e

APIFY_TOKEN="apify_api_s5UWN0W1FkB0cmjtawlkMLIFof2vfu3faGOR"
ACTOR_ID="nwua9Gu5YrADL7ZDj"  # Google Maps Scraper
OUTPUT_DIR="/Users/max/.openclaw/workspace/postharvest/apify-results"

mkdir -p "$OUTPUT_DIR"

echo "🚀 Starting 4-state Apify scrape via API..."
echo "Output directory: $OUTPUT_DIR"
echo ""

# Function to run a single Apify search
run_apify_search() {
    local query="$1"
    local location="$2"
    local max_results="$3"
    local state="$4"
    local type="$5"
    
    echo "📍 Search: \"$query\" in $location (max $max_results)"
    
    # Create input JSON
    INPUT_JSON=$(cat <<EOF
{
  "searchStringsArray": ["$query"],
  "locationQuery": "$location",
  "maxCrawledPlacesPerSearch": $max_results,
  "language": "en",
  "maxReviews": 0,
  "maxImages": 0,
  "exportPlaceUrls": false,
  "includeWebResults": false
}
EOF
)
    
    # Start the run
    echo "   Starting run..."
    RUN_RESPONSE=$(curl -s -X POST "https://api.apify.com/v2/acts/$ACTOR_ID/runs" \
        -H "Authorization: Bearer $APIFY_TOKEN" \
        -H "Content-Type: application/json" \
        -d "$INPUT_JSON")
    
    RUN_ID=$(echo "$RUN_RESPONSE" | grep -o '"id":"[^"]*"' | head -1 | cut -d'"' -f4)
    
    if [ -z "$RUN_ID" ]; then
        echo "   ❌ Failed to start run"
        echo "   Response: $RUN_RESPONSE"
        return 1
    fi
    
    echo "   Run ID: $RUN_ID"
    echo "   Waiting for completion..."
    
    # Poll for completion
    while true; do
        STATUS_RESPONSE=$(curl -s "https://api.apify.com/v2/acts/$ACTOR_ID/runs/$RUN_ID" \
            -H "Authorization: Bearer $APIFY_TOKEN")
        
        STATUS=$(echo "$STATUS_RESPONSE" | grep -o '"status":"[^"]*"' | head -1 | cut -d'"' -f4)
        
        if [ "$STATUS" = "SUCCEEDED" ]; then
            echo "   ✅ Complete!"
            break
        elif [ "$STATUS" = "FAILED" ] || [ "$STATUS" = "ABORTED" ]; then
            echo "   ❌ Failed with status: $STATUS"
            return 1
        fi
        
        echo "   ⏳ Status: $STATUS..."
        sleep 10
    done
    
    # Download results
    OUTPUT_FILE="$OUTPUT_DIR/${state}-${type}.json"
    echo "   Downloading results..."
    curl -s "https://api.apify.com/v2/acts/$ACTOR_ID/runs/$RUN_ID/dataset/items?format=json" \
        -H "Authorization: Bearer $APIFY_TOKEN" \
        > "$OUTPUT_FILE"
    
    COUNT=$(cat "$OUTPUT_FILE" | grep -o '"title"' | wc -l | tr -d ' ')
    echo "   💾 Saved $COUNT results to: ${state}-${type}.json"
    echo ""
    
    sleep 2  # Rate limit
}

# Run all 9 searches
run_apify_search "apple orchard" "New York, USA" 200 "NY" "orchard"
run_apify_search "apple grower" "New York, USA" 200 "NY" "grower"
run_apify_search "cold storage apples" "New York, USA" 100 "NY" "storage"
run_apify_search "apple orchard" "Pennsylvania, USA" 150 "PA" "orchard"
run_apify_search "apple grower" "Pennsylvania, USA" 150 "PA" "grower"
run_apify_search "apple orchard" "Michigan, USA" 150 "MI" "orchard"
run_apify_search "apple grower" "Michigan, USA" 150 "MI" "grower"
run_apify_search "apple packer" "Washington State, USA" 100 "WA" "packer"
run_apify_search "apple cold storage" "Washington State, USA" 100 "WA" "storage"

echo "🎉 All 9 searches complete!"
echo ""
echo "📊 Summary:"
for file in "$OUTPUT_DIR"/*.json; do
    if [ -f "$file" ]; then
        count=$(cat "$file" | grep -o '"title"' | wc -l | tr -d ' ')
        basename=$(basename "$file")
        echo "   $basename: $count results"
    fi
done

echo ""
echo "Next step: Merge and convert to CSV"
