#!/bin/bash

###############################################################################
# Master Batch Processor - Enriches all 200 facilities
# Usage: ./process-all-batches.sh [start_batch] [end_batch]
###############################################################################

START_BATCH=${1:-2}  # Default start at batch 2 (batch 1 already done)
END_BATCH=${2:-20}   # Default end at batch 20 (200 facilities / 10 per batch)

HUNTER_API_KEY="fda8536970076bc3228c5b5fa6e19fdc407c43c9"
DATA_DIR="/Users/max/.openclaw/workspace/postharvest"
OUTPUT_DIR="$DATA_DIR/batch-results"
FINAL_OUTPUT="$DATA_DIR/enriched-top-200-contacts.csv"

# Create output directory
mkdir -p "$OUTPUT_DIR"

echo "🚀 Master Batch Contact Enrichment"
echo "=================================="
echo "Processing batches $START_BATCH to $END_BATCH"
echo "Output directory: $OUTPUT_DIR"
echo ""

# Function to extract domain from URL
extract_domain() {
    echo "$1" | sed -E 's|https?://([^/]+).*|\1|' | sed 's/^www\.//'
}

# Function to generate batch data
generate_batch_data() {
    local batch_num=$1
    local start_idx=$(( (batch_num - 1) * 10 ))
    
    node -e "
    const fs = require('fs');
    const { parse } = require('csv-parse/sync');
    
    const csv = fs.readFileSync('$DATA_DIR/top-200-prioritized.csv', 'utf-8');
    const facilities = parse(csv, { columns: true });
    const batch = facilities.slice($start_idx, $start_idx + 10);
    
    const data = batch.map((f, idx) => ({
        id: $start_idx + idx + 1,
        company: f.Company,
        website: f.Website,
        domain: extractDomain(f.Website),
        region: f.Region,
        products: f['Primary Fruit'],
        rooms: f.Rooms,
        phone: f.Phone
    }));
    
    function extractDomain(url) {
        if (!url) return null;
        try {
            const match = url.match(/https?:\/\/(?:www\.)?([^\/]+)/);
            return match ? match[1] : null;
        } catch (e) {
            return null;
        }
    }
    
    console.log(JSON.stringify(data, null, 2));
    " > "$OUTPUT_DIR/batch-$batch_num-data.json"
}

# Function to enrich a single batch
enrich_batch() {
    local batch_num=$1
    local batch_file="$OUTPUT_DIR/batch-$batch_num-data.json"
    local result_file="$OUTPUT_DIR/batch-$batch_num-results.txt"
    
    echo "[$batch_num] Generating batch data..."
    generate_batch_data $batch_num
    
    echo "[$batch_num] Enriching with Hunter.io..."
    
    jq -c '.[]' "$batch_file" | while read -r facility; do
        company=$(echo "$facility" | jq -r '.company')
        domain=$(echo "$facility" | jq -r '.domain // empty')
        id=$(echo "$facility" | jq -r '.id')
        
        echo "  [$id] $company"
        
        if [ -n "$domain" ] && [ "$domain" != "null" ]; then
            echo "    Domain: $domain" >> "$result_file"
            
            # Hunter.io search
            curl -s "https://api.hunter.io/v2/domain-search?domain=$domain&api_key=$HUNTER_API_KEY&limit=10" \
                | jq -r '.data.emails[]? | "    \(.first_name // "N/A") \(.last_name // "N/A")|\(.position // "N/A")|\(.value)|\(.confidence)"' \
                >> "$result_file"
            
            echo "" >> "$result_file"
            
            # Rate limit - 2 seconds between requests
            sleep 2
        else
            echo "    No domain available" >> "$result_file"
        fi
    done
    
    echo "[$batch_num] ✅ Complete - Results: $result_file"
}

# Main processing loop
total_batches=$(( END_BATCH - START_BATCH + 1 ))
current=0

for batch in $(seq $START_BATCH $END_BATCH); do
    ((current++))
    echo ""
    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
    echo "Processing Batch $batch ($current/$total_batches)"
    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
    
    enrich_batch $batch
    
    # Delay between batches (prevent API throttling)
    if [ $batch -lt $END_BATCH ]; then
        echo ""
        echo "⏳ Waiting 10 seconds before next batch..."
        sleep 10
    fi
done

echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "✅ ALL BATCHES COMPLETE"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""
echo "📊 Results Summary:"
echo "   - Batches processed: $total_batches"
echo "   - Facilities enriched: $(( total_batches * 10 ))"
echo "   - Output directory: $OUTPUT_DIR"
echo ""
echo "📋 Next Steps:"
echo "   1. Review results in $OUTPUT_DIR/"
echo "   2. Compile into enriched CSV"
echo "   3. Manual LinkedIn research for gaps"
echo "   4. Verify contacts and update final deliverable"
echo ""
echo "🎯 Run compilation script:"
echo "   node compile-enriched-csv.js"
echo ""
