#!/usr/bin/env python3
import csv
import json
import subprocess

def normalize_website(url):
    """Normalize website URL for comparison"""
    if not url:
        return ''
    url = url.lower().strip()
    url = url.replace('https://', '').replace('http://', '').replace('www.', '')
    url = url.rstrip('/')
    return url

def normalize_country(country):
    """Normalize country codes"""
    if not country:
        return ''
    country = country.strip().upper()
    # Map full names to codes
    mapping = {
        'AUSTRALIA': 'AU',
        'NEW ZEALAND': 'NZ',
        'UNITED KINGDOM': 'UK',
        'CANADA': 'CA',
        'SOUTH AFRICA': 'ZA',
        'UNITED STATES': 'USA',
    }
    return mapping.get(country, country)

def make_key(row):
    """Create unique key for company"""
    if len(row) >= 2:
        name = (row[0] or '').strip().lower()
        website = normalize_website(row[1] or '')
        country = normalize_country(row[2] or '') if len(row) > 2 else ''
        return f"{name}|{website}|{country}"
    return None

def normalize_row(row):
    """Normalize a row for consistency"""
    normalized = list(row)
    # Normalize website (remove protocol, keep clean URL)
    if len(normalized) > 1 and normalized[1]:
        normalized[1] = normalize_website(normalized[1])
    # Normalize country to 2-letter code
    if len(normalized) > 2 and normalized[2]:
        normalized[2] = normalize_country(normalized[2])
    return normalized

# Load desktop CSV
print("Loading desktop CSV...")
desktop = []
with open('/Users/max/Desktop/PHT-Master-List.csv', 'r', encoding='utf-8') as f:
    reader = csv.reader(f)
    desktop = list(reader)

print(f"Desktop: {len(desktop)-1} companies")

# Load Google Sheet
print("Loading Google Sheet...")
with open('/Users/max/.openclaw/workspace/postharvest/google-master-list.json', 'r') as f:
    google_data = json.load(f)
    google_rows = google_data['values']

print(f"Google: {len(google_rows)-1} companies")

# Build Google Sheet index
google_keys = {}
for i, row in enumerate(google_rows[1:], 1):  # Skip header
    key = make_key(row)
    if key:
        google_keys[key] = i

# Find truly missing companies
missing = []
for row in desktop[1:]:  # Skip header
    key = make_key(row)
    if key and key not in google_keys:
        # Normalize this row before adding
        normalized = normalize_row(row)
        missing.append(normalized)

print(f"\nMissing companies: {len(missing)}")

if missing:
    # Save for upload
    with open('/Users/max/.openclaw/workspace/postharvest/to-append.json', 'w') as f:
        json.dump(missing, f)
    
    print(f"\nSample missing companies:")
    for row in missing[:10]:
        print(f"  - {row[0]} ({row[2] if len(row) > 2 else 'N/A'})")
    
    # Append to Google Sheet
    print(f"\nAppending {len(missing)} companies to Google Sheet...")
    result = subprocess.run([
        'gog', 'sheets', 'append',
        '1uVd-xZFF4TEQGqtvw9z6W8fffeaifPCoLsek83GmEoQ',
        'Master!A:Z',
        '--values-json', json.dumps(missing),
        '--input', 'USER_ENTERED',
        '--account', 'jonny@jonnyshannon.com'
    ], capture_output=True, text=True)
    
    if result.returncode == 0:
        print("✅ Successfully appended missing companies!")
    else:
        print(f"❌ Error: {result.stderr}")
else:
    print("✅ No missing companies - Google Sheet is already up to date!")

# Now extract ALL Australian companies from desktop for the Australia sheet
print("\n--- Australian Companies ---")
au_companies = [desktop[0]]  # Header
for row in desktop[1:]:
    if len(row) > 2:
        country = normalize_country(row[2] or '')
        if country == 'AU':
            au_companies.append(normalize_row(row))

print(f"Found {len(au_companies)-1} Australian companies")

# Clear and update Australia sheet
if len(au_companies) > 1:
    with open('/Users/max/.openclaw/workspace/postharvest/au-companies-full.json', 'w') as f:
        json.dump(au_companies, f)
    
    print("Clearing Australia sheet...")
    subprocess.run([
        'gog', 'sheets', 'clear',
        '1uVd-xZFF4TEQGqtvw9z6W8fffeaifPCoLsek83GmEoQ',
        'Australia!A:Z',
        '--account', 'jonny@jonnyshannon.com'
    ], capture_output=True)
    
    print(f"Adding {len(au_companies)-1} Australian companies to Australia sheet...")
    result = subprocess.run([
        'gog', 'sheets', 'update',
        '1uVd-xZFF4TEQGqtvw9z6W8fffeaifPCoLsek83GmEoQ',
        'Australia!A1',
        '--values-json', json.dumps(au_companies),
        '--input', 'USER_ENTERED',
        '--account', 'jonny@jonnyshannon.com'
    ], capture_output=True, text=True)
    
    if result.returncode == 0:
        print("✅ Australia sheet updated!")
    else:
        print(f"❌ Error: {result.stderr}")

print("\n🔗 https://docs.google.com/spreadsheets/d/1uVd-xZFF4TEQGqtvw9z6W8fffeaifPCoLsek83GmEoQ/edit")
