#!/usr/bin/env python3 """ Script to generate a static JSON file listing all export files with product counts. This file is used by the WordPress plugin when access_type is 'http'. """ import os import json import csv import glob from datetime import datetime from pathlib import Path # Get the root directory (parent of scripts directory). ROOT_DIR = Path(__file__).parent.parent EXPORTS_DIR = ROOT_DIR / 'data' / 'exports' API_DIR = ROOT_DIR / 'api' / 'exports' OUTPUT_FILE = API_DIR / 'list.json' def count_products_in_file(file_path): """Count products in a CSV file (excluding header).""" count = 0 try: with open(file_path, 'r', encoding='utf-8') as f: reader = csv.reader(f) # Skip header row. next(reader, None) # Count non-empty rows. for row in reader: if row and any(cell.strip() for cell in row): count += 1 except Exception as e: print(f"Error counting products in {file_path}: {e}") return count def analyze_csv_file(file_path): """Analyze CSV file for product statistics (variable products + variations).""" stats = { 'total_rows': 0, 'parent_count': 0, 'variation_count': 0, 'simple_count': 0, 'avg_variations': 0, } try: with open(file_path, 'r', encoding='utf-8') as f: reader = csv.reader(f) # Read header row header = next(reader, None) if not header: return stats # Find Type column index try: type_index = header.index('Type') except ValueError: # No Type column, treat all as simple products stats['total_rows'] = count_products_in_file(file_path) stats['simple_count'] = stats['total_rows'] stats['parent_count'] = stats['total_rows'] return stats # Count product types variable_count = 0 for row in reader: if not row or len(row) <= type_index: continue type_val = row[type_index].strip().lower() if row[type_index] else '' if type_val == 'variable': variable_count += 1 stats['parent_count'] += 1 stats['total_rows'] += 1 elif type_val == 'variation': stats['variation_count'] += 1 stats['total_rows'] += 1 elif type_val == 'simple' or type_val: stats['simple_count'] += 1 stats['total_rows'] += 1 # Calculate total parent products (variable + simple) stats['parent_count'] = stats['parent_count'] + stats['simple_count'] # Calculate average variations per variable product if variable_count > 0 and stats['variation_count'] > 0: stats['avg_variations'] = round(stats['variation_count'] / variable_count, 1) except Exception as e: print(f"Error analyzing CSV file {file_path}: {e}") # Fallback to simple count stats['total_rows'] = count_products_in_file(file_path) stats['simple_count'] = stats['total_rows'] stats['parent_count'] = stats['total_rows'] return stats def main(): """Generate the exports list JSON file.""" # Ensure API directory exists. API_DIR.mkdir(parents=True, exist_ok=True) # Check if exports directory exists. if not EXPORTS_DIR.exists(): print(f"Exports directory not found: {EXPORTS_DIR}") # Create empty list. output_data = { 'files': [], 'generated_at': datetime.now().isoformat(), } with open(OUTPUT_FILE, 'w', encoding='utf-8') as f: json.dump(output_data, f, indent=2) print(f"Created empty list at {OUTPUT_FILE}") return # Find all CSV export files. csv_files = glob.glob(str(EXPORTS_DIR / '*-wc.csv')) files = [] for file_path in csv_files: file_name = os.path.basename(file_path) file_stat = os.stat(file_path) # Analyze CSV file for detailed stats stats = analyze_csv_file(file_path) files.append({ 'name': file_name, 'path': f'/data/exports/{file_name}', # Use /ss-crawler-api/ prefix for URL to ensure access via Caddy (port 80/443) # Plugin will use this URL directly, so it needs the full path with prefix 'url': f'/ss-crawler-api/data/exports/{file_name}', 'size': file_stat.st_size, 'date': datetime.fromtimestamp(file_stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'), 'product_count': stats['total_rows'], 'parent_count': stats['parent_count'], 'variation_count': stats['variation_count'], 'avg_variations': stats['avg_variations'], }) # Sort by date (newest first). files.sort(key=lambda x: x['date'], reverse=True) # Create output data. output_data = { 'files': files, 'generated_at': datetime.now().isoformat(), } # Write JSON file. with open(OUTPUT_FILE, 'w', encoding='utf-8') as f: json.dump(output_data, f, indent=2) print(f"Generated exports list with {len(files)} files at {OUTPUT_FILE}") for file_info in files: print(f" - {file_info['name']}: {file_info['product_count']} products (parents: {file_info['parent_count']}, variations: {file_info['variation_count']})") if __name__ == '__main__': main()