#!/usr/bin/env python3
"""
PHT Enrichment Pipeline
Finds contacts at target companies and verifies emails

Created: March 11, 2026
"""

import csv
import json
import requests
import time
from pathlib import Path
from datetime import datetime

# Paths
WORKSPACE = Path("/Users/max/.openclaw/workspace/postharvest")
DATA_DIR = WORKSPACE / "automation" / "data"

# API Keys
HUNTER_KEY = "fda8536970076bc3228c5b5fa6e19fdc407c43c9"
APOLLO_KEY_FILE = WORKSPACE / ".apollo-key"

# Target roles (in priority order)
TARGET_ROLES = [
    "quality manager",
    "operations manager",
    "production manager",
    "technical manager",
    "facility manager",
    "general manager"
]

def search_contacts_hunter(domain):
    """
    Search for contacts at a domain using Hunter.io
    """
    url = f"https://api.hunter.io/v2/domain-search"
    params = {
        "domain": domain,
        "api_key": HUNTER_KEY,
        "limit": 10
    }
    
    try:
        response = requests.get(url, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()
        
        if data.get('data'):
            emails = data['data'].get('emails', [])
            
            # Filter for target roles
            contacts = []
            for email in emails:
                position = email.get('position', '').lower()
                if any(role in position for role in TARGET_ROLES):
                    contacts.append({
                        'name': f"{email.get('first_name', '')} {email.get('last_name', '')}".strip(),
                        'title': email.get('position', ''),
                        'email': email.get('value', ''),
                        'verified': email.get('verification', {}).get('result') == 'deliverable',
                        'confidence': email.get('confidence', 0),
                        'source': 'hunter'
                    })
            
            return contacts
        
        return []
    
    except Exception as e:
        print(f"   ⚠ Hunter error for {domain}: {e}")
        return []

def verify_email_hunter(email):
    """
    Verify an email address using Hunter.io
    """
    url = f"https://api.hunter.io/v2/email-verifier"
    params = {
        "email": email,
        "api_key": HUNTER_KEY
    }
    
    try:
        response = requests.get(url, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()
        
        if data.get('data'):
            result = data['data'].get('result')
            score = data['data'].get('score', 0)
            
            return {
                'valid': result == 'deliverable',
                'score': score,
                'result': result
            }
        
        return {'valid': False, 'score': 0, 'result': 'unknown'}
    
    except Exception as e:
        print(f"   ⚠ Verification error for {email}: {e}")
        return {'valid': False, 'score': 0, 'result': 'error'}

def rank_contact_by_title(title):
    """
    Rank contact priority based on title
    Lower score = higher priority
    """
    title_lower = title.lower()
    
    for i, role in enumerate(TARGET_ROLES):
        if role in title_lower:
            return i
    
    return 999  # Not a target role

def enrich_company(company_name, domain):
    """
    Find and verify contacts for a company
    
    Returns top 3 contacts with verified emails
    """
    print(f"\n📧 Enriching: {company_name}")
    print(f"   Domain: {domain}")
    
    # Search for contacts
    contacts = search_contacts_hunter(domain)
    
    if not contacts:
        print(f"   ⚠ No contacts found")
        return []
    
    print(f"   Found {len(contacts)} potential contacts")
    
    # Sort by role priority and confidence
    contacts.sort(key=lambda x: (rank_contact_by_title(x['title']), -x['confidence']))
    
    # Take top 3
    top_contacts = contacts[:3]
    
    # Verify emails
    verified_contacts = []
    for contact in top_contacts:
        if contact['verified']:
            # Already verified by Hunter
            verified_contacts.append(contact)
            print(f"   ✓ {contact['name']} ({contact['title']}) - {contact['email']} [verified]")
        else:
            # Verify now
            verification = verify_email_hunter(contact['email'])
            time.sleep(0.5)  # Rate limiting
            
            if verification['valid'] and verification['score'] >= 70:
                contact['verified'] = True
                contact['verification_score'] = verification['score']
                verified_contacts.append(contact)
                print(f"   ✓ {contact['name']} ({contact['title']}) - {contact['email']} [verified: {verification['score']}%]")
            else:
                print(f"   ✗ {contact['name']} ({contact['title']}) - {contact['email']} [failed: {verification['result']}]")
    
    if not verified_contacts:
        print(f"   ⚠ No verified contacts for {company_name}")
    else:
        print(f"   ✅ {len(verified_contacts)} verified contacts")
    
    return verified_contacts

def run_enrichment(discovery_file, output_file=None):
    """
    Enrich all companies from discovery file
    
    Args:
        discovery_file: Path to discovered companies CSV
        output_file: Path to save enriched contacts (optional)
    """
    print(f"\n🔧 Starting enrichment")
    print(f"   Input: {discovery_file}")
    
    # Load discovered companies
    companies = []
    with open(discovery_file, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        companies = list(reader)
    
    print(f"   Companies to enrich: {len(companies)}")
    
    # Enrich each company
    all_contacts = []
    for i, company in enumerate(companies, 1):
        print(f"\n[{i}/{len(companies)}]")
        
        contacts = enrich_company(
            company['company_name'],
            company['domain']
        )
        
        # Add company context to each contact
        for contact in contacts:
            contact['company_name'] = company['company_name']
            contact['domain'] = company['domain']
            contact['country'] = company.get('country', '')
            contact['fruit_type'] = company.get('fruit_type', '')
            contact['enriched_date'] = datetime.now().isoformat()
        
        all_contacts.extend(contacts)
        
        # Rate limiting
        time.sleep(1)
    
    # Save results
    if not output_file:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        output_file = DATA_DIR / f"enriched_contacts_{timestamp}.csv"
    
    if all_contacts:
        with open(output_file, 'w', newline='', encoding='utf-8') as f:
            fieldnames = ['company_name', 'domain', 'country', 'fruit_type',
                         'name', 'title', 'email', 'verified', 'confidence',
                         'verification_score', 'source', 'enriched_date']
            writer = csv.DictWriter(f, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(all_contacts)
        
        print(f"\n✅ Enrichment complete!")
        print(f"   Total contacts: {len(all_contacts)}")
        print(f"   Companies with contacts: {len(set(c['company_name'] for c in all_contacts))}")
        print(f"   Saved: {output_file}")
        return str(output_file)
    else:
        print("\n⚠ No contacts enriched")
        return None

if __name__ == "__main__":
    # Test with most recent discovery file
    discovery_files = sorted(DATA_DIR.glob("discovered_*.csv"))
    
    if discovery_files:
        latest = discovery_files[-1]
        print(f"Using latest discovery: {latest.name}")
        result = run_enrichment(latest)
        
        if result:
            print(f"\n📁 Output: {result}")
    else:
        print("No discovery files found. Run 1_discovery.py first.")
