#!/usr/bin/env python3
"""
Script to generate a static JSON file listing all export files with product counts.
This file is used by the WordPress plugin when access_type is 'http'.
"""

import os
import json
import csv
import glob
from datetime import datetime
from pathlib import Path

# Get the root directory (parent of scripts directory).
ROOT_DIR = Path(__file__).parent.parent
EXPORTS_DIR = ROOT_DIR / 'data' / 'exports'
API_DIR = ROOT_DIR / 'api' / 'exports'
OUTPUT_FILE = API_DIR / 'list.json'

def count_products_in_file(file_path):
    """Count products in a CSV file (excluding header)."""
    count = 0
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            # Skip header row.
            next(reader, None)
            # Count non-empty rows.
            for row in reader:
                if row and any(cell.strip() for cell in row):
                    count += 1
    except Exception as e:
        print(f"Error counting products in {file_path}: {e}")
    return count

def analyze_csv_file(file_path):
    """Analyze CSV file for product statistics (variable products + variations)."""
    stats = {
        'total_rows': 0,
        'parent_count': 0,
        'variation_count': 0,
        'simple_count': 0,
        'avg_variations': 0,
    }
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            # Read header row
            header = next(reader, None)
            if not header:
                return stats
            
            # Find Type column index
            try:
                type_index = header.index('Type')
            except ValueError:
                # No Type column, treat all as simple products
                stats['total_rows'] = count_products_in_file(file_path)
                stats['simple_count'] = stats['total_rows']
                stats['parent_count'] = stats['total_rows']
                return stats
            
            # Count product types
            variable_count = 0
            for row in reader:
                if not row or len(row) <= type_index:
                    continue
                
                type_val = row[type_index].strip().lower() if row[type_index] else ''
                
                if type_val == 'variable':
                    variable_count += 1
                    stats['parent_count'] += 1
                    stats['total_rows'] += 1
                elif type_val == 'variation':
                    stats['variation_count'] += 1
                    stats['total_rows'] += 1
                elif type_val == 'simple' or type_val:
                    stats['simple_count'] += 1
                    stats['total_rows'] += 1
            
            # Calculate total parent products (variable + simple)
            stats['parent_count'] = stats['parent_count'] + stats['simple_count']
            
            # Calculate average variations per variable product
            if variable_count > 0 and stats['variation_count'] > 0:
                stats['avg_variations'] = round(stats['variation_count'] / variable_count, 1)
    
    except Exception as e:
        print(f"Error analyzing CSV file {file_path}: {e}")
        # Fallback to simple count
        stats['total_rows'] = count_products_in_file(file_path)
        stats['simple_count'] = stats['total_rows']
        stats['parent_count'] = stats['total_rows']
    
    return stats

def main():
    """Generate the exports list JSON file."""
    # Ensure API directory exists.
    API_DIR.mkdir(parents=True, exist_ok=True)
    
    # Check if exports directory exists.
    if not EXPORTS_DIR.exists():
        print(f"Exports directory not found: {EXPORTS_DIR}")
        # Create empty list.
        output_data = {
            'files': [],
            'generated_at': datetime.now().isoformat(),
        }
        with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
            json.dump(output_data, f, indent=2)
        print(f"Created empty list at {OUTPUT_FILE}")
        return
    
    # Find all CSV export files.
    csv_files = glob.glob(str(EXPORTS_DIR / '*-wc.csv'))
    
    files = []
    for file_path in csv_files:
        file_name = os.path.basename(file_path)
        file_stat = os.stat(file_path)
        
        # Analyze CSV file for detailed stats
        stats = analyze_csv_file(file_path)
        
        files.append({
            'name': file_name,
            'path': f'/data/exports/{file_name}',
            # Use /ss-crawler-api/ prefix for URL to ensure access via Caddy (port 80/443)
            # Plugin will use this URL directly, so it needs the full path with prefix
            'url': f'/ss-crawler-api/data/exports/{file_name}',
            'size': file_stat.st_size,
            'date': datetime.fromtimestamp(file_stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'),
            'product_count': stats['total_rows'],
            'parent_count': stats['parent_count'],
            'variation_count': stats['variation_count'],
            'avg_variations': stats['avg_variations'],
        })
    
    # Sort by date (newest first).
    files.sort(key=lambda x: x['date'], reverse=True)
    
    # Create output data.
    output_data = {
        'files': files,
        'generated_at': datetime.now().isoformat(),
    }
    
    # Write JSON file.
    with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
        json.dump(output_data, f, indent=2)
    
    print(f"Generated exports list with {len(files)} files at {OUTPUT_FILE}")
    for file_info in files:
        print(f"  - {file_info['name']}: {file_info['product_count']} products (parents: {file_info['parent_count']}, variations: {file_info['variation_count']})")

if __name__ == '__main__':
    main()