#!/usr/bin/env python3
"""
Finalize the database by merging all sources
"""

import json
import csv

def main():
    print("Finalizing PHT Top 1000 Database...")
    
    # Load current database (338 companies)
    with open('top_1000_current.json', 'r') as f:
        current = json.load(f)
    print(f"  Current database: {len(current)} companies")
    
    # Load additional researched companies (60 companies)
    with open('additional_companies.json', 'r') as f:
        additional = json.load(f)
    print(f"  Additional research: {len(additional)} companies")
    
    # Merge
    all_companies = current + additional
    print(f"  Total: {len(all_companies)} companies")
    
    # Sort by score
    all_companies.sort(key=lambda x: x['Score'], reverse=True)
    
    # Export to CSV
    fieldnames = ['Company', 'Website', 'Country', 'Region', 'Fruit', 'CA Storage', 'Score', 'Size', 'Notes', 'Contacts']
    
    with open('pht_top_398_apple_pear_citrus.csv', 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(all_companies)
    
    print(f"✓ Exported to pht_top_398_apple_pear_citrus.csv")
    
    # Statistics
    countries = {}
    for comp in all_companies:
        country = comp['Country']
        countries[country] = countries.get(country, 0) + 1
    
    print(f"\nCountry breakdown ({len(countries)} countries):")
    for country, count in sorted(countries.items(), key=lambda x: x[1], reverse=True):
        print(f"  {country}: {count}")
    
    print(f"\nAverage score: {sum(c['Score'] for c in all_companies) / len(all_companies):.1f}")
    print(f"Top score: {all_companies[0]['Score']} - {all_companies[0]['Company']}")
    
    # Save JSON
    with open('pht_top_398_apple_pear_citrus.json', 'w') as f:
        json.dump(all_companies, f, indent=2)
    
    print(f"\nFinal database: 398 companies")
    print(f"Gap to 1,000: {1000 - len(all_companies)}")

if __name__ == '__main__':
    main()
