#!/usr/bin/env python3
"""
PHT Email Personalization
Generates custom email copy for each contact

Created: March 11, 2026
"""

import csv
import json
from pathlib import Path
from datetime import datetime

# Paths
WORKSPACE = Path("/Users/max/.openclaw/workspace/postharvest")
DATA_DIR = WORKSPACE / "automation" / "data"

# Case studies by fruit type
CASE_STUDIES = {
    "apples": {
        "company": "Stemilt",
        "stat": "reduced quality claims by 34%",
        "pain_point": "Ethylene buildup in CA rooms can cost you thousands in damaged fruit during peak storage season."
    },
    "citrus": {
        "company": "Wonderful Citrus",
        "stat": "eliminated $180K in annual losses",
        "pain_point": "Even small ethylene spikes during citrus storage can trigger early degreening and shorten shelf life."
    },
    "kiwis": {
        "company": "Zespri",
        "stat": "improved quality scores by 28%",
        "pain_point": "Kiwifruit is extremely sensitive to ethylene—undetected spikes mean shorter storage windows and lost export opportunities."
    },
    "pears": {
        "company": "Stemilt",
        "stat": "reduced quality claims by 34%",
        "pain_point": "Pears are ethylene-sensitive, and CA storage issues often don't show up until it's too late."
    },
    "bananas": {
        "company": "Costa Group",
        "stat": "extended shelf life by 5-7 days",
        "pain_point": "Banana ripening is incredibly ethylene-dependent—real-time monitoring means you control the process, not the other way around."
    },
    "mixed": {
        "company": "USI Insurance",
        "stat": "helped clients prevent over $2M in claims",
        "pain_point": "Quality issues from undetected ethylene spikes are one of the top causes of storage claims."
    }
}

# ROI stats by room count
def get_roi_stat(rooms):
    """Get ROI stat based on estimated room count"""
    rooms = int(rooms) if rooms else 0
    
    if rooms >= 50:
        return "At your scale (50+ rooms), we're seeing $200K+ annual impact"
    elif rooms >= 25:
        return "Facilities with 30+ rooms typically see $120K+ in savings annually"
    elif rooms >= 10:
        return "Even with 15-20 rooms, operators save $50K+ annually"
    else:
        return "Operators consistently see 10-15x ROI in the first year"

def generate_email_subject(first_name, company_name, variation=1):
    """Generate subject line (2 variations for A/B testing)"""
    
    if variation == 1:
        return f"{first_name}, reducing ethylene damage at {company_name}"
    else:
        return f"Quick question for {company_name}'s {first_name}"

def generate_email_body(contact, company_data):
    """
    Generate personalized email body
    
    Args:
        contact: Dict with name, title, email
        company_data: Dict with company_name, domain, country, fruit_type, rooms
    """
    first_name = contact['name'].split()[0] if contact.get('name') else "there"
    company_name = company_data.get('company_name', '')
    country = company_data.get('country', '')
    fruit_type = company_data.get('fruit_type', 'mixed').lower()
    rooms = company_data.get('rooms', 0)
    
    # Get case study
    case_study = CASE_STUDIES.get(fruit_type, CASE_STUDIES['mixed'])
    
    # Get ROI stat
    roi_stat = get_roi_stat(rooms)
    
    # Build email
    email = f"""Hi {first_name},

I noticed {company_name} handles {fruit_type} storage in {country}.

{case_study['pain_point']}

We recently helped {case_study['company']} {case_study['stat']} using real-time ethylene monitoring across their CA rooms.

{roi_stat}.

Would a 2-month pilot (10 units, free) at {company_name} make sense?

Best,
Jonny Shannon
PostHarvest Technologies
calendly.com/jonny_shannon/30mins"""
    
    return email

def generate_follow_up_emails(first_name, company_name):
    """Generate follow-up email sequence"""
    
    email_2 = f"""Hi {first_name},

I sent over some info about ethylene monitoring for {company_name} earlier this week.

Thought this case study might be useful - it's from a similar operation:
[Case Study PDF Link]

No pressure, just wanted to make sure you had the resource.

Best,
Jonny"""

    email_3 = f"""Hi {first_name},

Quick update - we just closed a deal with another {company_name.split()[-1]} facility last week.

They're starting with a 2-month pilot (10 units, free) to see the ROI firsthand.

Worth a quick 15-minute chat to see if it'd work for you too?

Best,
Jonny
calendly.com/jonny_shannon/30mins"""

    email_4 = f"""Hi {first_name},

Last follow-up on this - totally understand if now's not the right time for {company_name}.

Quick yes/no question: Worth discussing ethylene monitoring for next season?

If not, no worries at all.

Best,
Jonny"""

    email_5 = f"""Hi {first_name},

Sounds like now isn't the right time for {company_name} - totally understand.

Feel free to reach out when storage season picks up or if ethylene issues come up.

All the best,
Jonny"""

    return {
        "email_2": email_2,
        "email_3": email_3,
        "email_4": email_4,
        "email_5": email_5
    }

def personalize_contacts(enriched_file, output_file=None):
    """
    Generate personalized emails for all contacts
    
    Args:
        enriched_file: Path to enriched contacts CSV
        output_file: Path to save personalized data (optional)
    """
    print(f"\n✍️  Starting personalization")
    print(f"   Input: {enriched_file}")
    
    # Load enriched contacts
    contacts = []
    with open(enriched_file, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        contacts = list(reader)
    
    print(f"   Contacts to personalize: {len(contacts)}")
    
    # Generate emails for each contact
    personalized = []
    for contact in contacts:
        company_data = {
            'company_name': contact.get('company_name', ''),
            'domain': contact.get('domain', ''),
            'country': contact.get('country', ''),
            'fruit_type': contact.get('fruit_type', 'mixed'),
            'rooms': contact.get('rooms', 20)  # Default estimate
        }
        
        # Generate subject lines (2 variations)
        first_name = contact.get('name', '').split()[0] if contact.get('name') else "there"
        subject_1 = generate_email_subject(first_name, company_data['company_name'], 1)
        subject_2 = generate_email_subject(first_name, company_data['company_name'], 2)
        
        # Generate email body
        email_body = generate_email_body(contact, company_data)
        
        # Generate follow-ups
        follow_ups = generate_follow_up_emails(first_name, company_data['company_name'])
        
        # Quality checks
        checks = {
            'has_first_name': bool(first_name and first_name != "there"),
            'has_company': bool(company_data['company_name']),
            'has_fruit_type': bool(contact.get('fruit_type')),
            'body_length_ok': 150 <= len(email_body) <= 300,
            'subject_length_ok': len(subject_1) < 60
        }
        
        quality_score = sum(checks.values()) / len(checks) * 100
        
        personalized_contact = {
            **contact,
            'subject_a': subject_1,
            'subject_b': subject_2,
            'email_body': email_body,
            'email_2': follow_ups['email_2'],
            'email_3': follow_ups['email_3'],
            'email_4': follow_ups['email_4'],
            'email_5': follow_ups['email_5'],
            'quality_score': quality_score,
            'personalized_date': datetime.now().isoformat()
        }
        
        personalized.append(personalized_contact)
        
        # Preview
        if len(personalized) <= 3:
            print(f"\n✉️  Preview #{len(personalized)}:")
            print(f"   To: {contact.get('email')}")
            print(f"   Subject: {subject_1}")
            print(f"   Quality: {quality_score:.0f}%")
    
    # Save results
    if not output_file:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        output_file = DATA_DIR / f"personalized_{timestamp}.csv"
    
    if personalized:
        with open(output_file, 'w', newline='', encoding='utf-8') as f:
            fieldnames = list(personalized[0].keys())
            writer = csv.DictWriter(f, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(personalized)
        
        avg_quality = sum(p['quality_score'] for p in personalized) / len(personalized)
        
        print(f"\n✅ Personalization complete!")
        print(f"   Total emails: {len(personalized)}")
        print(f"   Average quality: {avg_quality:.1f}%")
        print(f"   Saved: {output_file}")
        return str(output_file)
    else:
        print("\n⚠ No emails generated")
        return None

if __name__ == "__main__":
    # Test with most recent enriched file
    enriched_files = sorted(DATA_DIR.glob("enriched_contacts_*.csv"))
    
    if enriched_files:
        latest = enriched_files[-1]
        print(f"Using latest enrichment: {latest.name}")
        result = personalize_contacts(latest)
        
        if result:
            print(f"\n📁 Output: {result}")
    else:
        print("No enriched files found. Run 2_enrichment.py first.")