#!/usr/bin/env python3
"""
Upload PHT Top 1,000 Apple/Pear/Citrus to Google Sheets
Sheet ID: 14WPFM_wwPv7aq25_r3csudwoNBrYTT-Fz8NOb6by2i4
Account: jonny@jonnyshannon.com
"""

import gspread
from oauth2client.service_account import ServiceAccountCredentials
import csv
import sys
from collections import Counter

# Configuration
SPREADSHEET_ID = '14WPFM_wwPv7aq25_r3csudwoNBrYTT-Fz8NOb6by2i4'
CSV_FILE = 'pht_top_1000_apple_pear_citrus.csv'
CREDS_FILE = '/Users/max/.openclaw/workspace/postharvest/google-master-list.json'
SHEET_NAME = 'PHT Top 1000 Apple Pear Citrus'

print("=" * 80)
print("UPLOADING PHT TOP 1,000 TO GOOGLE SHEETS")
print("=" * 80)
print(f"Sheet ID: {SPREADSHEET_ID}")
print(f"CSV File: {CSV_FILE}")
print()

# Step 1: Read CSV data
print("[1/4] Reading CSV file...")
rows = []
with open(CSV_FILE, 'r', encoding='utf-8') as f:
    reader = csv.reader(f)
    for row in reader:
        rows.append(row)

print(f"✓ Read {len(rows)} rows (including header)")
print(f"  - Header: {rows[0]}")
print(f"  - Data rows: {len(rows) - 1}")

if len(rows) != 1001:  # 1 header + 1000 data rows
    print(f"⚠️  WARNING: Expected 1001 rows, got {len(rows)}")

# Step 2: Authenticate with Google Sheets
print("\n[2/4] Authenticating with Google Sheets...")
try:
    scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
    creds = ServiceAccountCredentials.from_json_keyfile_name(CREDS_FILE, scope)
    client = gspread.authorize(creds)
    print("✓ Successfully authenticated")
except Exception as e:
    print(f"✗ Authentication failed: {e}")
    sys.exit(1)

# Step 3: Open/create sheet
print(f"\n[3/4] Opening spreadsheet {SPREADSHEET_ID}...")
try:
    spreadsheet = client.open_by_key(SPREADSHEET_ID)
    print(f"✓ Opened spreadsheet: {spreadsheet.title}")
    
    # Try to open the sheet, or create if it doesn't exist
    try:
        sheet = spreadsheet.worksheet(SHEET_NAME)
        print(f"✓ Found existing sheet: {SHEET_NAME}")
        print("  Clearing existing data...")
        sheet.clear()
    except:
        print(f"✓ Creating new sheet: {SHEET_NAME}")
        sheet = spreadsheet.add_worksheet(title=SHEET_NAME, rows=1100, cols=10)
    
except Exception as e:
    print(f"✗ Failed to open spreadsheet: {e}")
    sys.exit(1)

# Step 4: Upload data
print(f"\n[4/4] Uploading {len(rows)} rows to Google Sheets...")
try:
    # Upload in batches to avoid timeout
    batch_size = 500
    for i in range(0, len(rows), batch_size):
        batch = rows[i:i+batch_size]
        start_row = i + 1
        end_row = start_row + len(batch) - 1
        print(f"  Uploading rows {start_row}-{end_row}...")
        
        # Determine range
        if i == 0:
            cell_range = f'A1:J{len(batch)}'
        else:
            cell_range = f'A{start_row}:J{end_row}'
        
        sheet.update(cell_range, batch)
    
    print("✓ Successfully uploaded all data")
    
    # Format header
    print("\n  Formatting header row...")
    sheet.format('A1:J1', {
        'textFormat': {'bold': True, 'fontSize': 11},
        'backgroundColor': {'red': 0.2, 'green': 0.6, 'blue': 0.9},
        'horizontalAlignment': 'CENTER'
    })
    
    # Freeze header row
    sheet.freeze(rows=1)
    
    # Auto-resize columns
    print("  Auto-resizing columns...")
    for col_idx in range(10):
        sheet.columns_auto_resize(col_idx, col_idx)
    
    print("✓ Formatting complete")
    
except Exception as e:
    print(f"✗ Upload failed: {e}")
    import traceback
    traceback.print_exc()
    sys.exit(1)

# Step 5: Generate statistics
print("\n" + "=" * 80)
print("UPLOAD COMPLETE - GENERATING STATISTICS")
print("=" * 80)

# Count by country
countries = Counter()
fruits = Counter()
ca_storage = Counter()
sizes = Counter()

for row in rows[1:]:  # Skip header
    if len(row) >= 8:
        countries[row[2]] += 1  # Country
        fruits[row[4]] += 1     # Fruit
        ca_storage[row[5]] += 1 # CA Storage
        sizes[row[7]] += 1      # Size

print("\n📊 COUNTRY DISTRIBUTION (Top 10):")
for country, count in countries.most_common(10):
    pct = (count / 1000) * 100
    print(f"  {country:25s} {count:4d} ({pct:5.1f}%)")

print("\n🍎 FRUIT TYPE DISTRIBUTION (Top 10):")
for fruit, count in fruits.most_common(10):
    pct = (count / 1000) * 100
    print(f"  {fruit:30s} {count:4d} ({pct:5.1f}%)")

print("\n❄️  CA STORAGE:")
for status, count in ca_storage.most_common():
    pct = (count / 1000) * 100
    print(f"  {status:10s} {count:4d} ({pct:5.1f}%)")

print("\n📦 SIZE DISTRIBUTION:")
for size, count in sorted(sizes.items(), key=lambda x: ['XXLarge', 'XLarge', 'Large', 'Medium'].index(x[0]) if x[0] in ['XXLarge', 'XLarge', 'Large', 'Medium'] else 999):
    pct = (count / 1000) * 100
    print(f"  {size:10s} {count:4d} ({pct:5.1f}%)")

print("\n" + "=" * 80)
print("✅ SUCCESS - 1,000 COMPANIES UPLOADED TO GOOGLE SHEETS")
print("=" * 80)
print(f"\n🔗 View at: https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/edit")
print()
