#!/usr/bin/env python3
"""
Prepare CSV data for Google Sheets upload with HYPERLINK formulas for websites
"""

import csv
import json
import re

def format_website_hyperlink(website):
    """Format website as HYPERLINK formula for Google Sheets"""
    if not website or website.strip() == '':
        return ''
    
    website = website.strip()
    
    # Ensure website has protocol
    if not website.startswith(('http://', 'https://')):
        website = 'https://' + website
    
    # Extract display text (remove protocol and www)
    display_text = website
    display_text = re.sub(r'^https?://', '', display_text)
    display_text = re.sub(r'^www\.', '', display_text)
    display_text = display_text.rstrip('/')
    
    # Create HYPERLINK formula
    return f'=HYPERLINK("{website}","{display_text}")'

def prepare_sheets_data(csv_file_path):
    """Read CSV and prepare data array for Google Sheets"""
    
    target_columns = [
        'Company Name', 'Website', 'Country', 'State/Region', 'Primary Fruit',
        'CA Storage Confirmed', 'Employee Count', 'Est. Revenue ($M)', 
        'Contact Name', 'Contact Title', 'Contact Email', 'Contact Phone',
        'Notes', 'Source', 'Qualified'
    ]
    
    data_array = []
    
    # Add header row
    data_array.append(target_columns)
    
    # Read CSV data
    with open(csv_file_path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        
        for row in reader:
            row_data = []
            for col in target_columns:
                value = row.get(col, '') or ''
                
                # Format website as HYPERLINK
                if col == 'Website':
                    value = format_website_hyperlink(value)
                
                row_data.append(value)
            
            data_array.append(row_data)
    
    return data_array

def main():
    csv_file = '/Users/max/.openclaw/workspace/postharvest/MASTER-LIST-FINAL.csv'
    
    print("Preparing data for Google Sheets...")
    data_array = prepare_sheets_data(csv_file)
    
    print(f"Prepared {len(data_array)} rows (including header)")
    print(f"Data companies: {len(data_array) - 1}")
    
    # Convert to JSON
    json_data = json.dumps(data_array, ensure_ascii=False)
    
    # Save JSON to file
    json_file = '/Users/max/.openclaw/workspace/postharvest/sheets_data.json'
    with open(json_file, 'w', encoding='utf-8') as f:
        f.write(json_data)
    
    print(f"JSON data saved to {json_file}")
    print(f"JSON length: {len(json_data)} characters")
    
    # Show first few rows as sample
    print("\nSample data (first 3 rows):")
    for i, row in enumerate(data_array[:3]):
        print(f"Row {i}: {row}")
    
    return json_data

if __name__ == "__main__":
    main()